From 0793a61d4df8daeac6492dbf8d2f3e5713caae5e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 4 Dec 2008 20:12:29 +0100 Subject: performance counters: core code Implement the core kernel bits of Performance Counters subsystem. The Linux Performance Counter subsystem provides an abstraction of performance counter hardware capabilities. It provides per task and per CPU counters, and it provides event capabilities on top of those. Performance counters are accessed via special file descriptors. There's one file descriptor per virtual counter used. The special file descriptor is opened via the perf_counter_open() system call: int perf_counter_open(u32 hw_event_type, u32 hw_event_period, u32 record_type, pid_t pid, int cpu); The syscall returns the new fd. The fd can be used via the normal VFS system calls: read() can be used to read the counter, fcntl() can be used to set the blocking mode, etc. Multiple counters can be kept open at a time, and the counters can be poll()ed. See more details in Documentation/perf-counters.txt. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index ce0d9da..52146c2 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -244,6 +245,7 @@ static void sysrq_handle_showregs(int key, struct tty_struct *tty) struct pt_regs *regs = get_irq_regs(); if (regs) show_regs(regs); + perf_counter_print_debug(); } static struct sysrq_key_op sysrq_showregs_op = { .handler = sysrq_handle_showregs, diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h new file mode 100644 index 0000000..22c4469 --- /dev/null +++ b/include/linux/perf_counter.h @@ -0,0 +1,171 @@ +/* + * Performance counters: + * + * Copyright(C) 2008, Thomas Gleixner + * Copyright(C) 2008, Red Hat, Inc., Ingo Molnar + * + * Data type definitions, declarations, prototypes. + * + * Started by: Thomas Gleixner and Ingo Molnar + * + * For licencing details see kernel-base/COPYING + */ +#ifndef _LINUX_PERF_COUNTER_H +#define _LINUX_PERF_COUNTER_H + +#include + +#include +#include +#include +#include +#include + +struct task_struct; + +/* + * Generalized hardware event types, used by the hw_event_type parameter + * of the sys_perf_counter_open() syscall: + */ +enum hw_event_types { + PERF_COUNT_CYCLES, + PERF_COUNT_INSTRUCTIONS, + PERF_COUNT_CACHE_REFERENCES, + PERF_COUNT_CACHE_MISSES, + PERF_COUNT_BRANCH_INSTRUCTIONS, + PERF_COUNT_BRANCH_MISSES, + /* + * If this bit is set in the type, then trigger NMI sampling: + */ + PERF_COUNT_NMI = (1 << 30), +}; + +/* + * IRQ-notification data record type: + */ +enum perf_record_type { + PERF_RECORD_SIMPLE, + PERF_RECORD_IRQ, + PERF_RECORD_GROUP, +}; + +/** + * struct hw_perf_counter - performance counter hardware details + */ +struct hw_perf_counter { + u64 config; + unsigned long config_base; + unsigned long counter_base; + int nmi; + unsigned int idx; + u64 prev_count; + s32 next_count; + u64 irq_period; +}; + +/* + * Hardcoded buffer length limit for now, for IRQ-fed events: + */ +#define PERF_DATA_BUFLEN 2048 + +/** + * struct perf_data - performance counter IRQ data sampling ... + */ +struct perf_data { + int len; + int rd_idx; + int overrun; + u8 data[PERF_DATA_BUFLEN]; +}; + +/** + * struct perf_counter - performance counter kernel representation: + */ +struct perf_counter { + struct list_head list; + int active; +#if BITS_PER_LONG == 64 + atomic64_t count; +#else + atomic_t count32[2]; +#endif + u64 __irq_period; + + struct hw_perf_counter hw; + + struct perf_counter_context *ctx; + struct task_struct *task; + + /* + * Protect attach/detach: + */ + struct mutex mutex; + + int oncpu; + int cpu; + + s32 hw_event_type; + enum perf_record_type record_type; + + /* read() / irq related data */ + wait_queue_head_t waitq; + /* optional: for NMIs */ + int wakeup_pending; + struct perf_data *irqdata; + struct perf_data *usrdata; + struct perf_data data[2]; +}; + +/** + * struct perf_counter_context - counter context structure + * + * Used as a container for task counters and CPU counters as well: + */ +struct perf_counter_context { +#ifdef CONFIG_PERF_COUNTERS + /* + * Protect the list of counters: + */ + spinlock_t lock; + struct list_head counters; + int nr_counters; + int nr_active; + struct task_struct *task; +#endif +}; + +/** + * struct perf_counter_cpu_context - per cpu counter context structure + */ +struct perf_cpu_context { + struct perf_counter_context ctx; + struct perf_counter_context *task_ctx; + int active_oncpu; + int max_pertask; +}; + +/* + * Set by architecture code: + */ +extern int perf_max_counters; + +#ifdef CONFIG_PERF_COUNTERS +extern void perf_counter_task_sched_in(struct task_struct *task, int cpu); +extern void perf_counter_task_sched_out(struct task_struct *task, int cpu); +extern void perf_counter_task_tick(struct task_struct *task, int cpu); +extern void perf_counter_init_task(struct task_struct *task); +extern void perf_counter_notify(struct pt_regs *regs); +extern void perf_counter_print_debug(void); +#else +static inline void +perf_counter_task_sched_in(struct task_struct *task, int cpu) { } +static inline void +perf_counter_task_sched_out(struct task_struct *task, int cpu) { } +static inline void +perf_counter_task_tick(struct task_struct *task, int cpu) { } +static inline void perf_counter_init_task(struct task_struct *task) { } +static inline void perf_counter_notify(struct pt_regs *regs) { } +static inline void perf_counter_print_debug(void) { } +#endif + +#endif /* _LINUX_PERF_COUNTER_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 55e30d1..4c53027 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -71,6 +71,7 @@ struct sched_param { #include #include #include +#include #include #include #include @@ -1326,6 +1327,7 @@ struct task_struct { struct list_head pi_state_list; struct futex_pi_state *pi_state_cache; #endif + struct perf_counter_context perf_counter_ctx; #ifdef CONFIG_NUMA struct mempolicy *mempolicy; short il_next; @@ -2285,6 +2287,13 @@ static inline void inc_syscw(struct task_struct *tsk) #define TASK_SIZE_OF(tsk) TASK_SIZE #endif +/* + * Call the function if the target task is executing on a CPU right now: + */ +extern void task_oncpu_function_call(struct task_struct *p, + void (*func) (void *info), void *info); + + #ifdef CONFIG_MM_OWNER extern void mm_update_next_owner(struct mm_struct *mm); extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 04fb47b..6cce728 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -624,4 +624,10 @@ asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len); int kernel_execve(const char *filename, char *const argv[], char *const envp[]); +asmlinkage int +sys_perf_counter_open(u32 hw_event_type, + u32 hw_event_period, + u32 record_type, + pid_t pid, + int cpu); #endif diff --git a/init/Kconfig b/init/Kconfig index f763762..78bede2 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -732,6 +732,35 @@ config AIO by some high performance threaded applications. Disabling this option saves about 7k. +config HAVE_PERF_COUNTERS + bool + +menu "Performance Counters" + +config PERF_COUNTERS + bool "Kernel Performance Counters" + depends on HAVE_PERF_COUNTERS + default y + help + Enable kernel support for performance counter hardware. + + Performance counters are special hardware registers available + on most modern CPUs. These registers count the number of certain + types of hw events: such as instructions executed, cachemisses + suffered, or branches mis-predicted - without slowing down the + kernel or applications. These registers can also trigger interrupts + when a threshold number of events have passed - and can thus be + used to profile the code that runs on that CPU. + + The Linux Performance Counter subsystem provides an abstraction of + these hardware capabilities, available via a system call. It + provides per task and per CPU counters, and it provides event + capabilities on top of those. + + Say Y if unsure. + +endmenu + config VM_EVENT_COUNTERS default y bool "Enable VM event counters for /proc/vmstat" if EMBEDDED diff --git a/kernel/Makefile b/kernel/Makefile index 19fad00..1f184a1 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -89,6 +89,7 @@ obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o obj-$(CONFIG_FUNCTION_TRACER) += trace/ obj-$(CONFIG_TRACING) += trace/ obj-$(CONFIG_SMP) += sched_cpupri.o +obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) # According to Alan Modra , the -fno-omit-frame-pointer is diff --git a/kernel/fork.c b/kernel/fork.c index 2a372a0..441fadf 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -975,6 +975,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, goto fork_out; rt_mutex_init_task(p); + perf_counter_init_task(p); #ifdef CONFIG_PROVE_LOCKING DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c new file mode 100644 index 0000000..20508f0 --- /dev/null +++ b/kernel/perf_counter.c @@ -0,0 +1,943 @@ +/* + * Performance counter core code + * + * Copyright(C) 2008 Thomas Gleixner + * Copyright(C) 2008 Red Hat, Inc., Ingo Molnar + * + * For licencing details see kernel-base/COPYING + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Each CPU has a list of per CPU counters: + */ +DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); + +int perf_max_counters __read_mostly; +static int perf_reserved_percpu __read_mostly; +static int perf_overcommit __read_mostly = 1; + +/* + * Mutex for (sysadmin-configurable) counter reservations: + */ +static DEFINE_MUTEX(perf_resource_mutex); + +/* + * Architecture provided APIs - weak aliases: + */ + +int __weak hw_perf_counter_init(struct perf_counter *counter, u32 hw_event_type) +{ + return -EINVAL; +} + +void __weak hw_perf_counter_enable(struct perf_counter *counter) { } +void __weak hw_perf_counter_disable(struct perf_counter *counter) { } +void __weak hw_perf_counter_read(struct perf_counter *counter) { } +void __weak hw_perf_disable_all(void) { } +void __weak hw_perf_enable_all(void) { } +void __weak hw_perf_counter_setup(void) { } + +#if BITS_PER_LONG == 64 + +/* + * Read the cached counter in counter safe against cross CPU / NMI + * modifications. 64 bit version - no complications. + */ +static inline u64 perf_read_counter_safe(struct perf_counter *counter) +{ + return (u64) atomic64_read(&counter->count); +} + +#else + +/* + * Read the cached counter in counter safe against cross CPU / NMI + * modifications. 32 bit version. + */ +static u64 perf_read_counter_safe(struct perf_counter *counter) +{ + u32 cntl, cnth; + + local_irq_disable(); + do { + cnth = atomic_read(&counter->count32[1]); + cntl = atomic_read(&counter->count32[0]); + } while (cnth != atomic_read(&counter->count32[1])); + + local_irq_enable(); + + return cntl | ((u64) cnth) << 32; +} + +#endif + +/* + * Cross CPU call to remove a performance counter + * + * We disable the counter on the hardware level first. After that we + * remove it from the context list. + */ +static void __perf_remove_from_context(void *info) +{ + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + struct perf_counter *counter = info; + struct perf_counter_context *ctx = counter->ctx; + + /* + * If this is a task context, we need to check whether it is + * the current task context of this cpu. If not it has been + * scheduled out before the smp call arrived. + */ + if (ctx->task && cpuctx->task_ctx != ctx) + return; + + spin_lock(&ctx->lock); + + if (counter->active) { + hw_perf_counter_disable(counter); + counter->active = 0; + ctx->nr_active--; + cpuctx->active_oncpu--; + counter->task = NULL; + } + ctx->nr_counters--; + + /* + * Protect the list operation against NMI by disabling the + * counters on a global level. NOP for non NMI based counters. + */ + hw_perf_disable_all(); + list_del_init(&counter->list); + hw_perf_enable_all(); + + if (!ctx->task) { + /* + * Allow more per task counters with respect to the + * reservation: + */ + cpuctx->max_pertask = + min(perf_max_counters - ctx->nr_counters, + perf_max_counters - perf_reserved_percpu); + } + + spin_unlock(&ctx->lock); +} + + +/* + * Remove the counter from a task's (or a CPU's) list of counters. + * + * Must be called with counter->mutex held. + * + * CPU counters are removed with a smp call. For task counters we only + * call when the task is on a CPU. + */ +static void perf_remove_from_context(struct perf_counter *counter) +{ + struct perf_counter_context *ctx = counter->ctx; + struct task_struct *task = ctx->task; + + if (!task) { + /* + * Per cpu counters are removed via an smp call and + * the removal is always sucessful. + */ + smp_call_function_single(counter->cpu, + __perf_remove_from_context, + counter, 1); + return; + } + +retry: + task_oncpu_function_call(task, __perf_remove_from_context, + counter); + + spin_lock_irq(&ctx->lock); + /* + * If the context is active we need to retry the smp call. + */ + if (ctx->nr_active && !list_empty(&counter->list)) { + spin_unlock_irq(&ctx->lock); + goto retry; + } + + /* + * The lock prevents that this context is scheduled in so we + * can remove the counter safely, if it the call above did not + * succeed. + */ + if (!list_empty(&counter->list)) { + ctx->nr_counters--; + list_del_init(&counter->list); + counter->task = NULL; + } + spin_unlock_irq(&ctx->lock); +} + +/* + * Cross CPU call to install and enable a preformance counter + */ +static void __perf_install_in_context(void *info) +{ + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + struct perf_counter *counter = info; + struct perf_counter_context *ctx = counter->ctx; + int cpu = smp_processor_id(); + + /* + * If this is a task context, we need to check whether it is + * the current task context of this cpu. If not it has been + * scheduled out before the smp call arrived. + */ + if (ctx->task && cpuctx->task_ctx != ctx) + return; + + spin_lock(&ctx->lock); + + /* + * Protect the list operation against NMI by disabling the + * counters on a global level. NOP for non NMI based counters. + */ + hw_perf_disable_all(); + list_add_tail(&counter->list, &ctx->counters); + hw_perf_enable_all(); + + ctx->nr_counters++; + + if (cpuctx->active_oncpu < perf_max_counters) { + hw_perf_counter_enable(counter); + counter->active = 1; + counter->oncpu = cpu; + ctx->nr_active++; + cpuctx->active_oncpu++; + } + + if (!ctx->task && cpuctx->max_pertask) + cpuctx->max_pertask--; + + spin_unlock(&ctx->lock); +} + +/* + * Attach a performance counter to a context + * + * First we add the counter to the list with the hardware enable bit + * in counter->hw_config cleared. + * + * If the counter is attached to a task which is on a CPU we use a smp + * call to enable it in the task context. The task might have been + * scheduled away, but we check this in the smp call again. + */ +static void +perf_install_in_context(struct perf_counter_context *ctx, + struct perf_counter *counter, + int cpu) +{ + struct task_struct *task = ctx->task; + + counter->ctx = ctx; + if (!task) { + /* + * Per cpu counters are installed via an smp call and + * the install is always sucessful. + */ + smp_call_function_single(cpu, __perf_install_in_context, + counter, 1); + return; + } + + counter->task = task; +retry: + task_oncpu_function_call(task, __perf_install_in_context, + counter); + + spin_lock_irq(&ctx->lock); + /* + * If the context is active and the counter has not been added + * we need to retry the smp call. + */ + if (ctx->nr_active && list_empty(&counter->list)) { + spin_unlock_irq(&ctx->lock); + goto retry; + } + + /* + * The lock prevents that this context is scheduled in so we + * can add the counter safely, if it the call above did not + * succeed. + */ + if (list_empty(&counter->list)) { + list_add_tail(&counter->list, &ctx->counters); + ctx->nr_counters++; + } + spin_unlock_irq(&ctx->lock); +} + +/* + * Called from scheduler to remove the counters of the current task, + * with interrupts disabled. + * + * We stop each counter and update the counter value in counter->count. + * + * This does not protect us against NMI, but hw_perf_counter_disable() + * sets the disabled bit in the control field of counter _before_ + * accessing the counter control register. If a NMI hits, then it will + * not restart the counter. + */ +void perf_counter_task_sched_out(struct task_struct *task, int cpu) +{ + struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); + struct perf_counter_context *ctx = &task->perf_counter_ctx; + struct perf_counter *counter; + + if (likely(!cpuctx->task_ctx)) + return; + + spin_lock(&ctx->lock); + list_for_each_entry(counter, &ctx->counters, list) { + if (!ctx->nr_active) + break; + if (counter->active) { + hw_perf_counter_disable(counter); + counter->active = 0; + counter->oncpu = -1; + ctx->nr_active--; + cpuctx->active_oncpu--; + } + } + spin_unlock(&ctx->lock); + cpuctx->task_ctx = NULL; +} + +/* + * Called from scheduler to add the counters of the current task + * with interrupts disabled. + * + * We restore the counter value and then enable it. + * + * This does not protect us against NMI, but hw_perf_counter_enable() + * sets the enabled bit in the control field of counter _before_ + * accessing the counter control register. If a NMI hits, then it will + * keep the counter running. + */ +void perf_counter_task_sched_in(struct task_struct *task, int cpu) +{ + struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); + struct perf_counter_context *ctx = &task->perf_counter_ctx; + struct perf_counter *counter; + + if (likely(!ctx->nr_counters)) + return; + + spin_lock(&ctx->lock); + list_for_each_entry(counter, &ctx->counters, list) { + if (ctx->nr_active == cpuctx->max_pertask) + break; + if (counter->cpu != -1 && counter->cpu != cpu) + continue; + + hw_perf_counter_enable(counter); + counter->active = 1; + counter->oncpu = cpu; + ctx->nr_active++; + cpuctx->active_oncpu++; + } + spin_unlock(&ctx->lock); + cpuctx->task_ctx = ctx; +} + +void perf_counter_task_tick(struct task_struct *curr, int cpu) +{ + struct perf_counter_context *ctx = &curr->perf_counter_ctx; + struct perf_counter *counter; + + if (likely(!ctx->nr_counters)) + return; + + perf_counter_task_sched_out(curr, cpu); + + spin_lock(&ctx->lock); + + /* + * Rotate the first entry last: + */ + hw_perf_disable_all(); + list_for_each_entry(counter, &ctx->counters, list) { + list_del(&counter->list); + list_add_tail(&counter->list, &ctx->counters); + break; + } + hw_perf_enable_all(); + + spin_unlock(&ctx->lock); + + perf_counter_task_sched_in(curr, cpu); +} + +/* + * Initialize the perf_counter context in task_struct + */ +void perf_counter_init_task(struct task_struct *task) +{ + struct perf_counter_context *ctx = &task->perf_counter_ctx; + + spin_lock_init(&ctx->lock); + INIT_LIST_HEAD(&ctx->counters); + ctx->nr_counters = 0; + ctx->task = task; +} + +/* + * Cross CPU call to read the hardware counter + */ +static void __hw_perf_counter_read(void *info) +{ + hw_perf_counter_read(info); +} + +static u64 perf_read_counter(struct perf_counter *counter) +{ + /* + * If counter is enabled and currently active on a CPU, update the + * value in the counter structure: + */ + if (counter->active) { + smp_call_function_single(counter->oncpu, + __hw_perf_counter_read, counter, 1); + } + + return perf_read_counter_safe(counter); +} + +/* + * Cross CPU call to switch performance data pointers + */ +static void __perf_switch_irq_data(void *info) +{ + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + struct perf_counter *counter = info; + struct perf_counter_context *ctx = counter->ctx; + struct perf_data *oldirqdata = counter->irqdata; + + /* + * If this is a task context, we need to check whether it is + * the current task context of this cpu. If not it has been + * scheduled out before the smp call arrived. + */ + if (ctx->task) { + if (cpuctx->task_ctx != ctx) + return; + spin_lock(&ctx->lock); + } + + /* Change the pointer NMI safe */ + atomic_long_set((atomic_long_t *)&counter->irqdata, + (unsigned long) counter->usrdata); + counter->usrdata = oldirqdata; + + if (ctx->task) + spin_unlock(&ctx->lock); +} + +static struct perf_data *perf_switch_irq_data(struct perf_counter *counter) +{ + struct perf_counter_context *ctx = counter->ctx; + struct perf_data *oldirqdata = counter->irqdata; + struct task_struct *task = ctx->task; + + if (!task) { + smp_call_function_single(counter->cpu, + __perf_switch_irq_data, + counter, 1); + return counter->usrdata; + } + +retry: + spin_lock_irq(&ctx->lock); + if (!counter->active) { + counter->irqdata = counter->usrdata; + counter->usrdata = oldirqdata; + spin_unlock_irq(&ctx->lock); + return oldirqdata; + } + spin_unlock_irq(&ctx->lock); + task_oncpu_function_call(task, __perf_switch_irq_data, counter); + /* Might have failed, because task was scheduled out */ + if (counter->irqdata == oldirqdata) + goto retry; + + return counter->usrdata; +} + +static void put_context(struct perf_counter_context *ctx) +{ + if (ctx->task) + put_task_struct(ctx->task); +} + +static struct perf_counter_context *find_get_context(pid_t pid, int cpu) +{ + struct perf_cpu_context *cpuctx; + struct perf_counter_context *ctx; + struct task_struct *task; + + /* + * If cpu is not a wildcard then this is a percpu counter: + */ + if (cpu != -1) { + /* Must be root to operate on a CPU counter: */ + if (!capable(CAP_SYS_ADMIN)) + return ERR_PTR(-EACCES); + + if (cpu < 0 || cpu > num_possible_cpus()) + return ERR_PTR(-EINVAL); + + /* + * We could be clever and allow to attach a counter to an + * offline CPU and activate it when the CPU comes up, but + * that's for later. + */ + if (!cpu_isset(cpu, cpu_online_map)) + return ERR_PTR(-ENODEV); + + cpuctx = &per_cpu(perf_cpu_context, cpu); + ctx = &cpuctx->ctx; + + WARN_ON_ONCE(ctx->task); + return ctx; + } + + rcu_read_lock(); + if (!pid) + task = current; + else + task = find_task_by_vpid(pid); + if (task) + get_task_struct(task); + rcu_read_unlock(); + + if (!task) + return ERR_PTR(-ESRCH); + + ctx = &task->perf_counter_ctx; + ctx->task = task; + + /* Reuse ptrace permission checks for now. */ + if (!ptrace_may_access(task, PTRACE_MODE_READ)) { + put_context(ctx); + return ERR_PTR(-EACCES); + } + + return ctx; +} + +/* + * Called when the last reference to the file is gone. + */ +static int perf_release(struct inode *inode, struct file *file) +{ + struct perf_counter *counter = file->private_data; + struct perf_counter_context *ctx = counter->ctx; + + file->private_data = NULL; + + mutex_lock(&counter->mutex); + + perf_remove_from_context(counter); + put_context(ctx); + + mutex_unlock(&counter->mutex); + + kfree(counter); + + return 0; +} + +/* + * Read the performance counter - simple non blocking version for now + */ +static ssize_t +perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) +{ + u64 cntval; + + if (count != sizeof(cntval)) + return -EINVAL; + + mutex_lock(&counter->mutex); + cntval = perf_read_counter(counter); + mutex_unlock(&counter->mutex); + + return put_user(cntval, (u64 __user *) buf) ? -EFAULT : sizeof(cntval); +} + +static ssize_t +perf_copy_usrdata(struct perf_data *usrdata, char __user *buf, size_t count) +{ + if (!usrdata->len) + return 0; + + count = min(count, (size_t)usrdata->len); + if (copy_to_user(buf, usrdata->data + usrdata->rd_idx, count)) + return -EFAULT; + + /* Adjust the counters */ + usrdata->len -= count; + if (!usrdata->len) + usrdata->rd_idx = 0; + else + usrdata->rd_idx += count; + + return count; +} + +static ssize_t +perf_read_irq_data(struct perf_counter *counter, + char __user *buf, + size_t count, + int nonblocking) +{ + struct perf_data *irqdata, *usrdata; + DECLARE_WAITQUEUE(wait, current); + ssize_t res; + + irqdata = counter->irqdata; + usrdata = counter->usrdata; + + if (usrdata->len + irqdata->len >= count) + goto read_pending; + + if (nonblocking) + return -EAGAIN; + + spin_lock_irq(&counter->waitq.lock); + __add_wait_queue(&counter->waitq, &wait); + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + if (usrdata->len + irqdata->len >= count) + break; + + if (signal_pending(current)) + break; + + spin_unlock_irq(&counter->waitq.lock); + schedule(); + spin_lock_irq(&counter->waitq.lock); + } + __remove_wait_queue(&counter->waitq, &wait); + __set_current_state(TASK_RUNNING); + spin_unlock_irq(&counter->waitq.lock); + + if (usrdata->len + irqdata->len < count) + return -ERESTARTSYS; +read_pending: + mutex_lock(&counter->mutex); + + /* Drain pending data first: */ + res = perf_copy_usrdata(usrdata, buf, count); + if (res < 0 || res == count) + goto out; + + /* Switch irq buffer: */ + usrdata = perf_switch_irq_data(counter); + if (perf_copy_usrdata(usrdata, buf + res, count - res) < 0) { + if (!res) + res = -EFAULT; + } else { + res = count; + } +out: + mutex_unlock(&counter->mutex); + + return res; +} + +static ssize_t +perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) +{ + struct perf_counter *counter = file->private_data; + + switch (counter->record_type) { + case PERF_RECORD_SIMPLE: + return perf_read_hw(counter, buf, count); + + case PERF_RECORD_IRQ: + case PERF_RECORD_GROUP: + return perf_read_irq_data(counter, buf, count, + file->f_flags & O_NONBLOCK); + } + return -EINVAL; +} + +static unsigned int perf_poll(struct file *file, poll_table *wait) +{ + struct perf_counter *counter = file->private_data; + unsigned int events = 0; + unsigned long flags; + + poll_wait(file, &counter->waitq, wait); + + spin_lock_irqsave(&counter->waitq.lock, flags); + if (counter->usrdata->len || counter->irqdata->len) + events |= POLLIN; + spin_unlock_irqrestore(&counter->waitq.lock, flags); + + return events; +} + +static const struct file_operations perf_fops = { + .release = perf_release, + .read = perf_read, + .poll = perf_poll, +}; + +/* + * Allocate and initialize a counter structure + */ +static struct perf_counter * +perf_counter_alloc(u32 hw_event_period, int cpu, u32 record_type) +{ + struct perf_counter *counter = kzalloc(sizeof(*counter), GFP_KERNEL); + + if (!counter) + return NULL; + + mutex_init(&counter->mutex); + INIT_LIST_HEAD(&counter->list); + init_waitqueue_head(&counter->waitq); + + counter->irqdata = &counter->data[0]; + counter->usrdata = &counter->data[1]; + counter->cpu = cpu; + counter->record_type = record_type; + counter->__irq_period = hw_event_period; + counter->wakeup_pending = 0; + + return counter; +} + +/** + * sys_perf_task_open - open a performance counter associate it to a task + * @hw_event_type: event type for monitoring/sampling... + * @pid: target pid + */ +asmlinkage int +sys_perf_counter_open(u32 hw_event_type, + u32 hw_event_period, + u32 record_type, + pid_t pid, + int cpu) +{ + struct perf_counter_context *ctx; + struct perf_counter *counter; + int ret; + + ctx = find_get_context(pid, cpu); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + ret = -ENOMEM; + counter = perf_counter_alloc(hw_event_period, cpu, record_type); + if (!counter) + goto err_put_context; + + ret = hw_perf_counter_init(counter, hw_event_type); + if (ret) + goto err_free_put_context; + + perf_install_in_context(ctx, counter, cpu); + + ret = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0); + if (ret < 0) + goto err_remove_free_put_context; + + return ret; + +err_remove_free_put_context: + mutex_lock(&counter->mutex); + perf_remove_from_context(counter); + mutex_unlock(&counter->mutex); + +err_free_put_context: + kfree(counter); + +err_put_context: + put_context(ctx); + + return ret; +} + +static void __cpuinit perf_init_cpu(int cpu) +{ + struct perf_cpu_context *ctx; + + ctx = &per_cpu(perf_cpu_context, cpu); + spin_lock_init(&ctx->ctx.lock); + INIT_LIST_HEAD(&ctx->ctx.counters); + + mutex_lock(&perf_resource_mutex); + ctx->max_pertask = perf_max_counters - perf_reserved_percpu; + mutex_unlock(&perf_resource_mutex); + hw_perf_counter_setup(); +} + +#ifdef CONFIG_HOTPLUG_CPU +static void __perf_exit_cpu(void *info) +{ + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + struct perf_counter_context *ctx = &cpuctx->ctx; + struct perf_counter *counter, *tmp; + + list_for_each_entry_safe(counter, tmp, &ctx->counters, list) + __perf_remove_from_context(counter); + +} +static void perf_exit_cpu(int cpu) +{ + smp_call_function_single(cpu, __perf_exit_cpu, NULL, 1); +} +#else +static inline void perf_exit_cpu(int cpu) { } +#endif + +static int __cpuinit +perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) +{ + unsigned int cpu = (long)hcpu; + + switch (action) { + + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + perf_init_cpu(cpu); + break; + + case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: + perf_exit_cpu(cpu); + break; + + default: + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata perf_cpu_nb = { + .notifier_call = perf_cpu_notify, +}; + +static int __init perf_counter_init(void) +{ + perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE, + (void *)(long)smp_processor_id()); + register_cpu_notifier(&perf_cpu_nb); + + return 0; +} +early_initcall(perf_counter_init); + +static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, char *buf) +{ + return sprintf(buf, "%d\n", perf_reserved_percpu); +} + +static ssize_t +perf_set_reserve_percpu(struct sysdev_class *class, + const char *buf, + size_t count) +{ + struct perf_cpu_context *cpuctx; + unsigned long val; + int err, cpu, mpt; + + err = strict_strtoul(buf, 10, &val); + if (err) + return err; + if (val > perf_max_counters) + return -EINVAL; + + mutex_lock(&perf_resource_mutex); + perf_reserved_percpu = val; + for_each_online_cpu(cpu) { + cpuctx = &per_cpu(perf_cpu_context, cpu); + spin_lock_irq(&cpuctx->ctx.lock); + mpt = min(perf_max_counters - cpuctx->ctx.nr_counters, + perf_max_counters - perf_reserved_percpu); + cpuctx->max_pertask = mpt; + spin_unlock_irq(&cpuctx->ctx.lock); + } + mutex_unlock(&perf_resource_mutex); + + return count; +} + +static ssize_t perf_show_overcommit(struct sysdev_class *class, char *buf) +{ + return sprintf(buf, "%d\n", perf_overcommit); +} + +static ssize_t +perf_set_overcommit(struct sysdev_class *class, const char *buf, size_t count) +{ + unsigned long val; + int err; + + err = strict_strtoul(buf, 10, &val); + if (err) + return err; + if (val > 1) + return -EINVAL; + + mutex_lock(&perf_resource_mutex); + perf_overcommit = val; + mutex_unlock(&perf_resource_mutex); + + return count; +} + +static SYSDEV_CLASS_ATTR( + reserve_percpu, + 0644, + perf_show_reserve_percpu, + perf_set_reserve_percpu + ); + +static SYSDEV_CLASS_ATTR( + overcommit, + 0644, + perf_show_overcommit, + perf_set_overcommit + ); + +static struct attribute *perfclass_attrs[] = { + &attr_reserve_percpu.attr, + &attr_overcommit.attr, + NULL +}; + +static struct attribute_group perfclass_attr_group = { + .attrs = perfclass_attrs, + .name = "perf_counters", +}; + +static int __init perf_counter_sysfs_init(void) +{ + return sysfs_create_group(&cpu_sysdev_class.kset.kobj, + &perfclass_attr_group); +} +device_initcall(perf_counter_sysfs_init); + diff --git a/kernel/sched.c b/kernel/sched.c index b7480fb..254d56d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2212,6 +2212,27 @@ static int sched_balance_self(int cpu, int flag) #endif /* CONFIG_SMP */ +/** + * task_oncpu_function_call - call a function on the cpu on which a task runs + * @p: the task to evaluate + * @func: the function to be called + * @info: the function call argument + * + * Calls the function @func when the task is currently running. This might + * be on the current CPU, which just calls the function directly + */ +void task_oncpu_function_call(struct task_struct *p, + void (*func) (void *info), void *info) +{ + int cpu; + + preempt_disable(); + cpu = task_cpu(p); + if (task_curr(p)) + smp_call_function_single(cpu, func, info, 1); + preempt_enable(); +} + /*** * try_to_wake_up - wake up a thread * @p: the to-be-woken-up thread @@ -2534,6 +2555,7 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next) { fire_sched_out_preempt_notifiers(prev, next); + perf_counter_task_sched_out(prev, cpu_of(rq)); prepare_lock_switch(rq, next); prepare_arch_switch(next); } @@ -2574,6 +2596,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) */ prev_state = prev->state; finish_arch_switch(prev); + perf_counter_task_sched_in(current, cpu_of(rq)); finish_lock_switch(rq, prev); #ifdef CONFIG_SMP if (current->sched_class->post_schedule) @@ -4296,6 +4319,7 @@ void scheduler_tick(void) rq->idle_at_tick = idle_cpu(cpu); trigger_load_balance(rq, cpu); #endif + perf_counter_task_tick(curr, cpu); } #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \ diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index e14a232..4be8bbc 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -174,3 +174,6 @@ cond_syscall(compat_sys_timerfd_settime); cond_syscall(compat_sys_timerfd_gettime); cond_syscall(sys_eventfd); cond_syscall(sys_eventfd2); + +/* performance counters: */ +cond_syscall(sys_perf_counter_open); -- cgit v0.10.2 From e7bc62b6b3aeaa8849f8383e0cfb7ca6c003adc6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 4 Dec 2008 20:13:45 +0100 Subject: performance counters: documentation Add more documentation about performance counters. Signed-off-by: Ingo Molnar diff --git a/Documentation/perf-counters.txt b/Documentation/perf-counters.txt new file mode 100644 index 0000000..19033a0 --- /dev/null +++ b/Documentation/perf-counters.txt @@ -0,0 +1,104 @@ + +Performance Counters for Linux +------------------------------ + +Performance counters are special hardware registers available on most modern +CPUs. These registers count the number of certain types of hw events: such +as instructions executed, cachemisses suffered, or branches mis-predicted - +without slowing down the kernel or applications. These registers can also +trigger interrupts when a threshold number of events have passed - and can +thus be used to profile the code that runs on that CPU. + +The Linux Performance Counter subsystem provides an abstraction of these +hardware capabilities. It provides per task and per CPU counters, and +it provides event capabilities on top of those. + +Performance counters are accessed via special file descriptors. +There's one file descriptor per virtual counter used. + +The special file descriptor is opened via the perf_counter_open() +system call: + + int + perf_counter_open(u32 hw_event_type, + u32 hw_event_period, + u32 record_type, + pid_t pid, + int cpu); + +The syscall returns the new fd. The fd can be used via the normal +VFS system calls: read() can be used to read the counter, fcntl() +can be used to set the blocking mode, etc. + +Multiple counters can be kept open at a time, and the counters +can be poll()ed. + +When creating a new counter fd, 'hw_event_type' is one of: + + enum hw_event_types { + PERF_COUNT_CYCLES, + PERF_COUNT_INSTRUCTIONS, + PERF_COUNT_CACHE_REFERENCES, + PERF_COUNT_CACHE_MISSES, + PERF_COUNT_BRANCH_INSTRUCTIONS, + PERF_COUNT_BRANCH_MISSES, + }; + +These are standardized types of events that work uniformly on all CPUs +that implements Performance Counters support under Linux. If a CPU is +not able to count branch-misses, then the system call will return +-EINVAL. + +[ Note: more hw_event_types are supported as well, but they are CPU + specific and are enumerated via /sys on a per CPU basis. Raw hw event + types can be passed in as negative numbers. For example, to count + "External bus cycles while bus lock signal asserted" events on Intel + Core CPUs, pass in a -0x4064 event type value. ] + +The parameter 'hw_event_period' is the number of events before waking up +a read() that is blocked on a counter fd. Zero value means a non-blocking +counter. + +'record_type' is the type of data that a read() will provide for the +counter, and it can be one of: + + enum perf_record_type { + PERF_RECORD_SIMPLE, + PERF_RECORD_IRQ, + }; + +a "simple" counter is one that counts hardware events and allows +them to be read out into a u64 count value. (read() returns 8 on +a successful read of a simple counter.) + +An "irq" counter is one that will also provide an IRQ context information: +the IP of the interrupted context. In this case read() will return +the 8-byte counter value, plus the Instruction Pointer address of the +interrupted context. + +The 'pid' parameter allows the counter to be specific to a task: + + pid == 0: if the pid parameter is zero, the counter is attached to the + current task. + + pid > 0: the counter is attached to a specific task (if the current task + has sufficient privilege to do so) + + pid < 0: all tasks are counted (per cpu counters) + +The 'cpu' parameter allows a counter to be made specific to a full +CPU: + + cpu >= 0: the counter is restricted to a specific CPU + cpu == -1: the counter counts on all CPUs + +Note: the combination of 'pid == -1' and 'cpu == -1' is not valid. + +A 'pid > 0' and 'cpu == -1' counter is a per task counter that counts +events of that task and 'follows' that task to whatever CPU the task +gets schedule to. Per task counters can be created by any user, for +their own tasks. + +A 'pid == -1' and 'cpu == x' counter is a per CPU counter that counts +all events on CPU-x. Per CPU counters need CAP_SYS_ADMIN privilege. + -- cgit v0.10.2 From 241771ef016b5c0c83cd7a4372a74321c973c1e6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 3 Dec 2008 10:39:53 +0100 Subject: performance counters: x86 support Implement performance counters for x86 Intel CPUs. It's simplified right now: the PERFMON CPU feature is assumed, which is available in Core2 and later Intel CPUs. The design is flexible to be extended to more CPU types as well. Signed-off-by: Ingo Molnar diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d4d4cb7..f2fdc18 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -643,6 +643,7 @@ config X86_UP_IOAPIC config X86_LOCAL_APIC def_bool y depends on X86_64 || (X86_32 && (X86_UP_APIC || (SMP && !X86_VOYAGER) || X86_GENERICARCH)) + select HAVE_PERF_COUNTERS config X86_IO_APIC def_bool y diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 256b00b..3c14ed0 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -823,7 +823,8 @@ ia32_sys_call_table: .quad compat_sys_signalfd4 .quad sys_eventfd2 .quad sys_epoll_create1 - .quad sys_dup3 /* 330 */ + .quad sys_dup3 /* 330 */ .quad sys_pipe2 .quad sys_inotify_init1 + .quad sys_perf_counter_open ia32_syscall_end: diff --git a/arch/x86/include/asm/hardirq_32.h b/arch/x86/include/asm/hardirq_32.h index 5ca135e..b3e475d 100644 --- a/arch/x86/include/asm/hardirq_32.h +++ b/arch/x86/include/asm/hardirq_32.h @@ -9,6 +9,7 @@ typedef struct { unsigned long idle_timestamp; unsigned int __nmi_count; /* arch dependent */ unsigned int apic_timer_irqs; /* arch dependent */ + unsigned int apic_perf_irqs; /* arch dependent */ unsigned int irq0_irqs; unsigned int irq_resched_count; unsigned int irq_call_count; diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 8de644b..aa93e53 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -30,6 +30,8 @@ /* Interrupt handlers registered during init_IRQ */ extern void apic_timer_interrupt(void); extern void error_interrupt(void); +extern void perf_counter_interrupt(void); + extern void spurious_interrupt(void); extern void thermal_interrupt(void); extern void reschedule_interrupt(void); diff --git a/arch/x86/include/asm/intel_arch_perfmon.h b/arch/x86/include/asm/intel_arch_perfmon.h index fa0fd06..71598a9 100644 --- a/arch/x86/include/asm/intel_arch_perfmon.h +++ b/arch/x86/include/asm/intel_arch_perfmon.h @@ -1,22 +1,24 @@ #ifndef _ASM_X86_INTEL_ARCH_PERFMON_H #define _ASM_X86_INTEL_ARCH_PERFMON_H -#define MSR_ARCH_PERFMON_PERFCTR0 0xc1 -#define MSR_ARCH_PERFMON_PERFCTR1 0xc2 +#define MSR_ARCH_PERFMON_PERFCTR0 0xc1 +#define MSR_ARCH_PERFMON_PERFCTR1 0xc2 -#define MSR_ARCH_PERFMON_EVENTSEL0 0x186 -#define MSR_ARCH_PERFMON_EVENTSEL1 0x187 +#define MSR_ARCH_PERFMON_EVENTSEL0 0x186 +#define MSR_ARCH_PERFMON_EVENTSEL1 0x187 -#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) -#define ARCH_PERFMON_EVENTSEL_INT (1 << 20) -#define ARCH_PERFMON_EVENTSEL_OS (1 << 17) -#define ARCH_PERFMON_EVENTSEL_USR (1 << 16) +#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) +#define ARCH_PERFMON_EVENTSEL_INT (1 << 20) +#define ARCH_PERFMON_EVENTSEL_OS (1 << 17) +#define ARCH_PERFMON_EVENTSEL_USR (1 << 16) -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL (0x3c) -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX (0) +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ - (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) + (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) + +#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 union cpuid10_eax { struct { @@ -28,4 +30,12 @@ union cpuid10_eax { unsigned int full; }; +#ifdef CONFIG_PERF_COUNTERS +extern void init_hw_perf_counters(void); +extern void perf_counters_lapic_init(int nmi); +#else +static inline void init_hw_perf_counters(void) { } +static inline void perf_counters_lapic_init(int nmi) { } +#endif + #endif /* _ASM_X86_INTEL_ARCH_PERFMON_H */ diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 0005adb..b8d277f 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -87,6 +87,11 @@ #define LOCAL_TIMER_VECTOR 0xef /* + * Performance monitoring interrupt vector: + */ +#define LOCAL_PERF_VECTOR 0xee + +/* * First APIC vector available to drivers: (vectors 0x30-0xee) we * start at 0x31(0x41) to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) diff --git a/arch/x86/include/asm/mach-default/entry_arch.h b/arch/x86/include/asm/mach-default/entry_arch.h index 6b1add8..ad31e5d 100644 --- a/arch/x86/include/asm/mach-default/entry_arch.h +++ b/arch/x86/include/asm/mach-default/entry_arch.h @@ -25,10 +25,15 @@ BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) * a much simpler SMP time architecture: */ #ifdef CONFIG_X86_LOCAL_APIC + BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR) BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) +#ifdef CONFIG_PERF_COUNTERS +BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR) +#endif + #ifdef CONFIG_X86_MCE_P4THERMAL BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR) #endif diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h index 2fbfff8..90a8d9d 100644 --- a/arch/x86/include/asm/pda.h +++ b/arch/x86/include/asm/pda.h @@ -30,6 +30,7 @@ struct x8664_pda { short isidle; struct mm_struct *active_mm; unsigned apic_timer_irqs; + unsigned apic_perf_irqs; unsigned irq0_irqs; unsigned irq_resched_count; unsigned irq_call_count; diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index e44d379..810bf26 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -80,6 +80,7 @@ struct thread_info { #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ +#define TIF_PERF_COUNTERS 11 /* notify perf counter work */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* 32bit process */ #define TIF_FORK 18 /* ret_from_fork */ @@ -103,6 +104,7 @@ struct thread_info { #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) +#define _TIF_PERF_COUNTERS (1 << TIF_PERF_COUNTERS) #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_FORK (1 << TIF_FORK) @@ -135,7 +137,7 @@ struct thread_info { /* Only used for 64 bit */ #define _TIF_DO_NOTIFY_MASK \ - (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME) + (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_PERF_COUNTERS|_TIF_NOTIFY_RESUME) /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index f2bba78..7e47658 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h @@ -338,6 +338,7 @@ #define __NR_dup3 330 #define __NR_pipe2 331 #define __NR_inotify_init1 332 +#define __NR_perf_counter_open 333 #ifdef __KERNEL__ diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index d2e415e..53025fe 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h @@ -653,7 +653,8 @@ __SYSCALL(__NR_dup3, sys_dup3) __SYSCALL(__NR_pipe2, sys_pipe2) #define __NR_inotify_init1 294 __SYSCALL(__NR_inotify_init1, sys_inotify_init1) - +#define __NR_perf_counter_open 295 +__SYSCALL(__NR_perf_counter_open, sys_perf_counter_open) #ifndef __NO_STUBS #define __ARCH_WANT_OLD_READDIR diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 16f9487..8ab8c18 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -31,6 +31,7 @@ #include #include +#include #include #include #include @@ -1147,6 +1148,7 @@ void __cpuinit setup_local_APIC(void) apic_write(APIC_ESR, 0); } #endif + perf_counters_lapic_init(0); preempt_disable(); diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 82ec607..89e5336 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -1,5 +1,5 @@ # -# Makefile for x86-compatible CPU details and quirks +# Makefile for x86-compatible CPU details, features and quirks # obj-y := intel_cacheinfo.o addon_cpuid_features.o @@ -16,11 +16,13 @@ obj-$(CONFIG_CPU_SUP_CENTAUR_64) += centaur_64.o obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o -obj-$(CONFIG_X86_MCE) += mcheck/ -obj-$(CONFIG_MTRR) += mtrr/ -obj-$(CONFIG_CPU_FREQ) += cpufreq/ +obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o -obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o +obj-$(CONFIG_X86_MCE) += mcheck/ +obj-$(CONFIG_MTRR) += mtrr/ +obj-$(CONFIG_CPU_FREQ) += cpufreq/ + +obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o quiet_cmd_mkcapflags = MKCAP $@ cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b9c9ea0..4461011 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -750,6 +751,7 @@ void __init identify_boot_cpu(void) #else vgetcpu_set_mode(); #endif + init_hw_perf_counters(); } void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c new file mode 100644 index 0000000..82440cb --- /dev/null +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -0,0 +1,571 @@ +/* + * Performance counter x86 architecture code + * + * Copyright(C) 2008 Thomas Gleixner + * Copyright(C) 2008 Red Hat, Inc., Ingo Molnar + * + * For licencing details see kernel-base/COPYING + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +static bool perf_counters_initialized __read_mostly; + +/* + * Number of (generic) HW counters: + */ +static int nr_hw_counters __read_mostly; +static u32 perf_counter_mask __read_mostly; + +/* No support for fixed function counters yet */ + +#define MAX_HW_COUNTERS 8 + +struct cpu_hw_counters { + struct perf_counter *counters[MAX_HW_COUNTERS]; + unsigned long used[BITS_TO_LONGS(MAX_HW_COUNTERS)]; + int enable_all; +}; + +/* + * Intel PerfMon v3. Used on Core2 and later. + */ +static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters); + +const int intel_perfmon_event_map[] = +{ + [PERF_COUNT_CYCLES] = 0x003c, + [PERF_COUNT_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_CACHE_REFERENCES] = 0x4f2e, + [PERF_COUNT_CACHE_MISSES] = 0x412e, + [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x00c4, + [PERF_COUNT_BRANCH_MISSES] = 0x00c5, +}; + +const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map); + +/* + * Setup the hardware configuration for a given hw_event_type + */ +int hw_perf_counter_init(struct perf_counter *counter, s32 hw_event_type) +{ + struct hw_perf_counter *hwc = &counter->hw; + + if (unlikely(!perf_counters_initialized)) + return -EINVAL; + + /* + * Count user events, and generate PMC IRQs: + * (keep 'enabled' bit clear for now) + */ + hwc->config = ARCH_PERFMON_EVENTSEL_USR | ARCH_PERFMON_EVENTSEL_INT; + + /* + * If privileged enough, count OS events too, and allow + * NMI events as well: + */ + hwc->nmi = 0; + if (capable(CAP_SYS_ADMIN)) { + hwc->config |= ARCH_PERFMON_EVENTSEL_OS; + if (hw_event_type & PERF_COUNT_NMI) + hwc->nmi = 1; + } + + hwc->config_base = MSR_ARCH_PERFMON_EVENTSEL0; + hwc->counter_base = MSR_ARCH_PERFMON_PERFCTR0; + + hwc->irq_period = counter->__irq_period; + /* + * Intel PMCs cannot be accessed sanely above 32 bit width, + * so we install an artificial 1<<31 period regardless of + * the generic counter period: + */ + if (!hwc->irq_period) + hwc->irq_period = 0x7FFFFFFF; + + hwc->next_count = -((s32) hwc->irq_period); + + /* + * Negative event types mean raw encoded event+umask values: + */ + if (hw_event_type < 0) { + counter->hw_event_type = -hw_event_type; + counter->hw_event_type &= ~PERF_COUNT_NMI; + } else { + hw_event_type &= ~PERF_COUNT_NMI; + if (hw_event_type >= max_intel_perfmon_events) + return -EINVAL; + /* + * The generic map: + */ + counter->hw_event_type = intel_perfmon_event_map[hw_event_type]; + } + hwc->config |= counter->hw_event_type; + counter->wakeup_pending = 0; + + return 0; +} + +static void __hw_perf_enable_all(void) +{ + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, perf_counter_mask, 0); +} + +void hw_perf_enable_all(void) +{ + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + + cpuc->enable_all = 1; + __hw_perf_enable_all(); +} + +void hw_perf_disable_all(void) +{ + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + + cpuc->enable_all = 0; + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0, 0); +} + +static DEFINE_PER_CPU(u64, prev_next_count[MAX_HW_COUNTERS]); + +static void __hw_perf_counter_enable(struct hw_perf_counter *hwc, int idx) +{ + per_cpu(prev_next_count[idx], smp_processor_id()) = hwc->next_count; + + wrmsr(hwc->counter_base + idx, hwc->next_count, 0); + wrmsr(hwc->config_base + idx, hwc->config, 0); +} + +void hw_perf_counter_enable(struct perf_counter *counter) +{ + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + struct hw_perf_counter *hwc = &counter->hw; + int idx = hwc->idx; + + /* Try to get the previous counter again */ + if (test_and_set_bit(idx, cpuc->used)) { + idx = find_first_zero_bit(cpuc->used, nr_hw_counters); + set_bit(idx, cpuc->used); + hwc->idx = idx; + } + + perf_counters_lapic_init(hwc->nmi); + + wrmsr(hwc->config_base + idx, + hwc->config & ~ARCH_PERFMON_EVENTSEL0_ENABLE, 0); + + cpuc->counters[idx] = counter; + counter->hw.config |= ARCH_PERFMON_EVENTSEL0_ENABLE; + __hw_perf_counter_enable(hwc, idx); +} + +#ifdef CONFIG_X86_64 +static inline void atomic64_counter_set(struct perf_counter *counter, u64 val) +{ + atomic64_set(&counter->count, val); +} + +static inline u64 atomic64_counter_read(struct perf_counter *counter) +{ + return atomic64_read(&counter->count); +} +#else +/* + * Todo: add proper atomic64_t support to 32-bit x86: + */ +static inline void atomic64_counter_set(struct perf_counter *counter, u64 val64) +{ + u32 *val32 = (void *)&val64; + + atomic_set(counter->count32 + 0, *(val32 + 0)); + atomic_set(counter->count32 + 1, *(val32 + 1)); +} + +static inline u64 atomic64_counter_read(struct perf_counter *counter) +{ + return atomic_read(counter->count32 + 0) | + (u64) atomic_read(counter->count32 + 1) << 32; +} +#endif + +static void __hw_perf_save_counter(struct perf_counter *counter, + struct hw_perf_counter *hwc, int idx) +{ + s64 raw = -1; + s64 delta; + int err; + + /* + * Get the raw hw counter value: + */ + err = rdmsrl_safe(hwc->counter_base + idx, &raw); + WARN_ON_ONCE(err); + + /* + * Rebase it to zero (it started counting at -irq_period), + * to see the delta since ->prev_count: + */ + delta = (s64)hwc->irq_period + (s64)(s32)raw; + + atomic64_counter_set(counter, hwc->prev_count + delta); + + /* + * Adjust the ->prev_count offset - if we went beyond + * irq_period of units, then we got an IRQ and the counter + * was set back to -irq_period: + */ + while (delta >= (s64)hwc->irq_period) { + hwc->prev_count += hwc->irq_period; + delta -= (s64)hwc->irq_period; + } + + /* + * Calculate the next raw counter value we'll write into + * the counter at the next sched-in time: + */ + delta -= (s64)hwc->irq_period; + + hwc->next_count = (s32)delta; +} + +void perf_counter_print_debug(void) +{ + u64 ctrl, status, overflow, pmc_ctrl, pmc_count, next_count; + int cpu, err, idx; + + local_irq_disable(); + + cpu = smp_processor_id(); + + err = rdmsrl_safe(MSR_CORE_PERF_GLOBAL_CTRL, &ctrl); + WARN_ON_ONCE(err); + + err = rdmsrl_safe(MSR_CORE_PERF_GLOBAL_STATUS, &status); + WARN_ON_ONCE(err); + + err = rdmsrl_safe(MSR_CORE_PERF_GLOBAL_OVF_CTRL, &overflow); + WARN_ON_ONCE(err); + + printk(KERN_INFO "\n"); + printk(KERN_INFO "CPU#%d: ctrl: %016llx\n", cpu, ctrl); + printk(KERN_INFO "CPU#%d: status: %016llx\n", cpu, status); + printk(KERN_INFO "CPU#%d: overflow: %016llx\n", cpu, overflow); + + for (idx = 0; idx < nr_hw_counters; idx++) { + err = rdmsrl_safe(MSR_ARCH_PERFMON_EVENTSEL0 + idx, &pmc_ctrl); + WARN_ON_ONCE(err); + + err = rdmsrl_safe(MSR_ARCH_PERFMON_PERFCTR0 + idx, &pmc_count); + WARN_ON_ONCE(err); + + next_count = per_cpu(prev_next_count[idx], cpu); + + printk(KERN_INFO "CPU#%d: PMC%d ctrl: %016llx\n", + cpu, idx, pmc_ctrl); + printk(KERN_INFO "CPU#%d: PMC%d count: %016llx\n", + cpu, idx, pmc_count); + printk(KERN_INFO "CPU#%d: PMC%d next: %016llx\n", + cpu, idx, next_count); + } + local_irq_enable(); +} + +void hw_perf_counter_disable(struct perf_counter *counter) +{ + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + struct hw_perf_counter *hwc = &counter->hw; + unsigned int idx = hwc->idx; + + counter->hw.config &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsr(hwc->config_base + idx, hwc->config, 0); + + clear_bit(idx, cpuc->used); + cpuc->counters[idx] = NULL; + __hw_perf_save_counter(counter, hwc, idx); +} + +void hw_perf_counter_read(struct perf_counter *counter) +{ + struct hw_perf_counter *hwc = &counter->hw; + unsigned long addr = hwc->counter_base + hwc->idx; + s64 offs, val = -1LL; + s32 val32; + int err; + + /* Careful: NMI might modify the counter offset */ + do { + offs = hwc->prev_count; + err = rdmsrl_safe(addr, &val); + WARN_ON_ONCE(err); + } while (offs != hwc->prev_count); + + val32 = (s32) val; + val = (s64)hwc->irq_period + (s64)val32; + atomic64_counter_set(counter, hwc->prev_count + val); +} + +static void perf_store_irq_data(struct perf_counter *counter, u64 data) +{ + struct perf_data *irqdata = counter->irqdata; + + if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) { + irqdata->overrun++; + } else { + u64 *p = (u64 *) &irqdata->data[irqdata->len]; + + *p = data; + irqdata->len += sizeof(u64); + } +} + +static void perf_save_and_restart(struct perf_counter *counter) +{ + struct hw_perf_counter *hwc = &counter->hw; + int idx = hwc->idx; + + wrmsr(hwc->config_base + idx, + hwc->config & ~ARCH_PERFMON_EVENTSEL0_ENABLE, 0); + + if (hwc->config & ARCH_PERFMON_EVENTSEL0_ENABLE) { + __hw_perf_save_counter(counter, hwc, idx); + __hw_perf_counter_enable(hwc, idx); + } +} + +static void +perf_handle_group(struct perf_counter *leader, u64 *status, u64 *overflown) +{ + struct perf_counter_context *ctx = leader->ctx; + struct perf_counter *counter; + int bit; + + list_for_each_entry(counter, &ctx->counters, list) { + if (counter->record_type != PERF_RECORD_SIMPLE || + counter == leader) + continue; + + if (counter->active) { + /* + * When counter was not in the overflow mask, we have to + * read it from hardware. We read it as well, when it + * has not been read yet and clear the bit in the + * status mask. + */ + bit = counter->hw.idx; + if (!test_bit(bit, (unsigned long *) overflown) || + test_bit(bit, (unsigned long *) status)) { + clear_bit(bit, (unsigned long *) status); + perf_save_and_restart(counter); + } + } + perf_store_irq_data(leader, counter->hw_event_type); + perf_store_irq_data(leader, atomic64_counter_read(counter)); + } +} + +/* + * This handler is triggered by the local APIC, so the APIC IRQ handling + * rules apply: + */ +static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi) +{ + int bit, cpu = smp_processor_id(); + struct cpu_hw_counters *cpuc; + u64 ack, status; + + rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); + if (!status) { + ack_APIC_irq(); + return; + } + + /* Disable counters globally */ + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0, 0); + ack_APIC_irq(); + + cpuc = &per_cpu(cpu_hw_counters, cpu); + +again: + ack = status; + for_each_bit(bit, (unsigned long *) &status, nr_hw_counters) { + struct perf_counter *counter = cpuc->counters[bit]; + + clear_bit(bit, (unsigned long *) &status); + if (!counter) + continue; + + perf_save_and_restart(counter); + + switch (counter->record_type) { + case PERF_RECORD_SIMPLE: + continue; + case PERF_RECORD_IRQ: + perf_store_irq_data(counter, instruction_pointer(regs)); + break; + case PERF_RECORD_GROUP: + perf_store_irq_data(counter, counter->hw_event_type); + perf_store_irq_data(counter, + atomic64_counter_read(counter)); + perf_handle_group(counter, &status, &ack); + break; + } + /* + * From NMI context we cannot call into the scheduler to + * do a task wakeup - but we mark these counters as + * wakeup_pending and initate a wakeup callback: + */ + if (nmi) { + counter->wakeup_pending = 1; + set_tsk_thread_flag(current, TIF_PERF_COUNTERS); + } else { + wake_up(&counter->waitq); + } + } + + wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack, 0); + + /* + * Repeat if there is more work to be done: + */ + rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); + if (status) + goto again; + + /* + * Do not reenable when global enable is off: + */ + if (cpuc->enable_all) + __hw_perf_enable_all(); +} + +void smp_perf_counter_interrupt(struct pt_regs *regs) +{ + irq_enter(); +#ifdef CONFIG_X86_64 + add_pda(apic_perf_irqs, 1); +#else + per_cpu(irq_stat, smp_processor_id()).apic_perf_irqs++; +#endif + apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR); + __smp_perf_counter_interrupt(regs, 0); + + irq_exit(); +} + +/* + * This handler is triggered by NMI contexts: + */ +void perf_counter_notify(struct pt_regs *regs) +{ + struct cpu_hw_counters *cpuc; + unsigned long flags; + int bit, cpu; + + local_irq_save(flags); + cpu = smp_processor_id(); + cpuc = &per_cpu(cpu_hw_counters, cpu); + + for_each_bit(bit, cpuc->used, nr_hw_counters) { + struct perf_counter *counter = cpuc->counters[bit]; + + if (!counter) + continue; + + if (counter->wakeup_pending) { + counter->wakeup_pending = 0; + wake_up(&counter->waitq); + } + } + + local_irq_restore(flags); +} + +void __cpuinit perf_counters_lapic_init(int nmi) +{ + u32 apic_val; + + if (!perf_counters_initialized) + return; + /* + * Enable the performance counter vector in the APIC LVT: + */ + apic_val = apic_read(APIC_LVTERR); + + apic_write(APIC_LVTERR, apic_val | APIC_LVT_MASKED); + if (nmi) + apic_write(APIC_LVTPC, APIC_DM_NMI); + else + apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR); + apic_write(APIC_LVTERR, apic_val); +} + +static int __kprobes +perf_counter_nmi_handler(struct notifier_block *self, + unsigned long cmd, void *__args) +{ + struct die_args *args = __args; + struct pt_regs *regs; + + if (likely(cmd != DIE_NMI_IPI)) + return NOTIFY_DONE; + + regs = args->regs; + + apic_write(APIC_LVTPC, APIC_DM_NMI); + __smp_perf_counter_interrupt(regs, 1); + + return NOTIFY_STOP; +} + +static __read_mostly struct notifier_block perf_counter_nmi_notifier = { + .notifier_call = perf_counter_nmi_handler +}; + +void __init init_hw_perf_counters(void) +{ + union cpuid10_eax eax; + unsigned int unused; + unsigned int ebx; + + if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) + return; + + /* + * Check whether the Architectural PerfMon supports + * Branch Misses Retired Event or not. + */ + cpuid(10, &(eax.full), &ebx, &unused, &unused); + if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) + return; + + printk(KERN_INFO "Intel Performance Monitoring support detected.\n"); + + printk(KERN_INFO "... version: %d\n", eax.split.version_id); + printk(KERN_INFO "... num_counters: %d\n", eax.split.num_counters); + nr_hw_counters = eax.split.num_counters; + if (nr_hw_counters > MAX_HW_COUNTERS) { + nr_hw_counters = MAX_HW_COUNTERS; + WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!", + nr_hw_counters, MAX_HW_COUNTERS); + } + perf_counter_mask = (1 << nr_hw_counters) - 1; + perf_max_counters = nr_hw_counters; + + printk(KERN_INFO "... bit_width: %d\n", eax.split.bit_width); + printk(KERN_INFO "... mask_length: %d\n", eax.split.mask_length); + + perf_counters_lapic_init(0); + register_die_notifier(&perf_counter_nmi_notifier); + + perf_counters_initialized = true; +} diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 3194636..fc013cf 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -984,6 +984,11 @@ apicinterrupt ERROR_APIC_VECTOR \ apicinterrupt SPURIOUS_APIC_VECTOR \ spurious_interrupt smp_spurious_interrupt +#ifdef CONFIG_PERF_COUNTERS +apicinterrupt LOCAL_PERF_VECTOR \ + perf_counter_interrupt smp_perf_counter_interrupt +#endif + /* * Exception entry points. */ diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index d1d4dc5..d92bc71 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -56,6 +56,10 @@ static int show_other_interrupts(struct seq_file *p) for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs); seq_printf(p, " Local timer interrupts\n"); + seq_printf(p, "CNT: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); + seq_printf(p, " Performance counter interrupts\n"); #endif #ifdef CONFIG_SMP seq_printf(p, "RES: "); @@ -160,6 +164,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu) #ifdef CONFIG_X86_LOCAL_APIC sum += irq_stats(cpu)->apic_timer_irqs; + sum += irq_stats(cpu)->apic_perf_irqs; #endif #ifdef CONFIG_SMP sum += irq_stats(cpu)->irq_resched_count; diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 607db63..6a33b5e 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -160,6 +160,9 @@ void __init native_init_IRQ(void) /* IPI vectors for APIC spurious and error interrupts */ alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); +# ifdef CONFIG_PERF_COUNTERS + alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt); +# endif #endif #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL) diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index 8670b3c..91d785c 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c @@ -138,6 +138,11 @@ static void __init apic_intr_init(void) /* IPI vectors for APIC spurious and error interrupts */ alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); + + /* Performance monitoring interrupt: */ +#ifdef CONFIG_PERF_COUNTERS + alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt); +#endif } void __init native_init_IRQ(void) diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index b1cc6da..dee553c 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -6,7 +6,7 @@ * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes * 2000-2002 x86-64 support by Andi Kleen */ - +#include #include #include #include @@ -891,6 +891,11 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) tracehook_notify_resume(regs); } + if (thread_info_flags & _TIF_PERF_COUNTERS) { + clear_thread_flag(TIF_PERF_COUNTERS); + perf_counter_notify(regs); + } + #ifdef CONFIG_X86_32 clear_thread_flag(TIF_IRET); #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index d44395f..496726d 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -332,3 +332,4 @@ ENTRY(sys_call_table) .long sys_dup3 /* 330 */ .long sys_pipe2 .long sys_inotify_init1 + .long sys_perf_counter_open -- cgit v0.10.2 From 87b9cf4623ad4e5fc009e48c020593dffd5d3793 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 8 Dec 2008 14:20:16 +0100 Subject: x86, perfcounters: read out MSR_CORE_PERF_GLOBAL_STATUS with counters disabled Impact: make perfcounter NMI and IRQ sequence more robust Make __smp_perf_counter_interrupt() a bit more conservative: first disable all counters, then read out the status. Most invocations are because there are real events, so there's no performance impact. Code flow gets a bit simpler as well this way. Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 82440cb..615e953 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -383,18 +383,16 @@ static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi) struct cpu_hw_counters *cpuc; u64 ack, status; - rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); - if (!status) { - ack_APIC_irq(); - return; - } - /* Disable counters globally */ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0, 0); ack_APIC_irq(); cpuc = &per_cpu(cpu_hw_counters, cpu); + rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); + if (!status) + goto out; + again: ack = status; for_each_bit(bit, (unsigned long *) &status, nr_hw_counters) { @@ -440,7 +438,7 @@ again: rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); if (status) goto again; - +out: /* * Do not reenable when global enable is off: */ -- cgit v0.10.2 From 4c59e4676dc95f6f58a2cff5390b2699fa5b5549 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 8 Dec 2008 19:38:33 +0100 Subject: perfcounters: select ANON_INODES The perfcounters subsystem depends on CONFIG_ANON_INODES facilities, so make sure it's selected. Signed-off-by: Ingo Molnar diff --git a/init/Kconfig b/init/Kconfig index 78bede2..7d147a3 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -741,6 +741,7 @@ config PERF_COUNTERS bool "Kernel Performance Counters" depends on HAVE_PERF_COUNTERS default y + select ANON_INODES help Enable kernel support for performance counter hardware. -- cgit v0.10.2 From 7e2ae34749edf19e76e594b9c4b2cdde1066afc5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 9 Dec 2008 11:40:46 +0100 Subject: perfcounters, x86: simplify disable/enable of counters Impact: fix spurious missed counter wakeups In the case of NMI events, close a race window that can occur if an NMI hits counter code that temporarily disables+enables a counter, and the NMI leaks into the disabled section. Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 615e953..7d528ff 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -136,14 +136,25 @@ void hw_perf_disable_all(void) wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0, 0); } +static inline void +__hw_perf_counter_disable(struct hw_perf_counter *hwc, unsigned int idx) +{ + wrmsr(hwc->config_base + idx, hwc->config, 0); +} + static DEFINE_PER_CPU(u64, prev_next_count[MAX_HW_COUNTERS]); -static void __hw_perf_counter_enable(struct hw_perf_counter *hwc, int idx) +static void __hw_perf_counter_set_period(struct hw_perf_counter *hwc, int idx) { per_cpu(prev_next_count[idx], smp_processor_id()) = hwc->next_count; wrmsr(hwc->counter_base + idx, hwc->next_count, 0); - wrmsr(hwc->config_base + idx, hwc->config, 0); +} + +static void __hw_perf_counter_enable(struct hw_perf_counter *hwc, int idx) +{ + wrmsr(hwc->config_base + idx, + hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE, 0); } void hw_perf_counter_enable(struct perf_counter *counter) @@ -161,11 +172,11 @@ void hw_perf_counter_enable(struct perf_counter *counter) perf_counters_lapic_init(hwc->nmi); - wrmsr(hwc->config_base + idx, - hwc->config & ~ARCH_PERFMON_EVENTSEL0_ENABLE, 0); + __hw_perf_counter_disable(hwc, idx); cpuc->counters[idx] = counter; - counter->hw.config |= ARCH_PERFMON_EVENTSEL0_ENABLE; + + __hw_perf_counter_set_period(hwc, idx); __hw_perf_counter_enable(hwc, idx); } @@ -286,8 +297,7 @@ void hw_perf_counter_disable(struct perf_counter *counter) struct hw_perf_counter *hwc = &counter->hw; unsigned int idx = hwc->idx; - counter->hw.config &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; - wrmsr(hwc->config_base + idx, hwc->config, 0); + __hw_perf_counter_disable(hwc, idx); clear_bit(idx, cpuc->used); cpuc->counters[idx] = NULL; @@ -328,18 +338,24 @@ static void perf_store_irq_data(struct perf_counter *counter, u64 data) } } +/* + * NMI-safe enable method: + */ static void perf_save_and_restart(struct perf_counter *counter) { struct hw_perf_counter *hwc = &counter->hw; int idx = hwc->idx; + u64 pmc_ctrl; + int err; - wrmsr(hwc->config_base + idx, - hwc->config & ~ARCH_PERFMON_EVENTSEL0_ENABLE, 0); + err = rdmsrl_safe(MSR_ARCH_PERFMON_EVENTSEL0 + idx, &pmc_ctrl); + WARN_ON_ONCE(err); - if (hwc->config & ARCH_PERFMON_EVENTSEL0_ENABLE) { - __hw_perf_save_counter(counter, hwc, idx); + __hw_perf_save_counter(counter, hwc, idx); + __hw_perf_counter_set_period(hwc, idx); + + if (pmc_ctrl & ARCH_PERFMON_EVENTSEL0_ENABLE) __hw_perf_counter_enable(hwc, idx); - } } static void -- cgit v0.10.2 From 1e12567678054bc1d4c944ecfad17624b3e49345 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 9 Dec 2008 12:18:18 +0100 Subject: perfcounters, x86: clean up debug code Impact: cleanup Get rid of unused debug code. Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 7d528ff..919ec46 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -214,13 +214,11 @@ static void __hw_perf_save_counter(struct perf_counter *counter, { s64 raw = -1; s64 delta; - int err; /* * Get the raw hw counter value: */ - err = rdmsrl_safe(hwc->counter_base + idx, &raw); - WARN_ON_ONCE(err); + rdmsrl(hwc->counter_base + idx, raw); /* * Rebase it to zero (it started counting at -irq_period), @@ -252,20 +250,18 @@ static void __hw_perf_save_counter(struct perf_counter *counter, void perf_counter_print_debug(void) { u64 ctrl, status, overflow, pmc_ctrl, pmc_count, next_count; - int cpu, err, idx; + int cpu, idx; + + if (!nr_hw_counters) + return; local_irq_disable(); cpu = smp_processor_id(); - err = rdmsrl_safe(MSR_CORE_PERF_GLOBAL_CTRL, &ctrl); - WARN_ON_ONCE(err); - - err = rdmsrl_safe(MSR_CORE_PERF_GLOBAL_STATUS, &status); - WARN_ON_ONCE(err); - - err = rdmsrl_safe(MSR_CORE_PERF_GLOBAL_OVF_CTRL, &overflow); - WARN_ON_ONCE(err); + rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); + rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); + rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); printk(KERN_INFO "\n"); printk(KERN_INFO "CPU#%d: ctrl: %016llx\n", cpu, ctrl); @@ -273,11 +269,8 @@ void perf_counter_print_debug(void) printk(KERN_INFO "CPU#%d: overflow: %016llx\n", cpu, overflow); for (idx = 0; idx < nr_hw_counters; idx++) { - err = rdmsrl_safe(MSR_ARCH_PERFMON_EVENTSEL0 + idx, &pmc_ctrl); - WARN_ON_ONCE(err); - - err = rdmsrl_safe(MSR_ARCH_PERFMON_PERFCTR0 + idx, &pmc_count); - WARN_ON_ONCE(err); + rdmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, pmc_ctrl); + rdmsrl(MSR_ARCH_PERFMON_PERFCTR0 + idx, pmc_count); next_count = per_cpu(prev_next_count[idx], cpu); @@ -310,13 +303,11 @@ void hw_perf_counter_read(struct perf_counter *counter) unsigned long addr = hwc->counter_base + hwc->idx; s64 offs, val = -1LL; s32 val32; - int err; /* Careful: NMI might modify the counter offset */ do { offs = hwc->prev_count; - err = rdmsrl_safe(addr, &val); - WARN_ON_ONCE(err); + rdmsrl(addr, val); } while (offs != hwc->prev_count); val32 = (s32) val; @@ -346,10 +337,8 @@ static void perf_save_and_restart(struct perf_counter *counter) struct hw_perf_counter *hwc = &counter->hw; int idx = hwc->idx; u64 pmc_ctrl; - int err; - err = rdmsrl_safe(MSR_ARCH_PERFMON_EVENTSEL0 + idx, &pmc_ctrl); - WARN_ON_ONCE(err); + rdmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, pmc_ctrl); __hw_perf_save_counter(counter, hwc, idx); __hw_perf_counter_set_period(hwc, idx); -- cgit v0.10.2 From 43874d238d5f208854a73c3225ca2a22833eec8b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 9 Dec 2008 12:23:59 +0100 Subject: perfcounters: consolidate global-disable codepaths Impact: cleanup Simplify global disable handling. Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 919ec46..6a93d1f 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -33,7 +33,6 @@ static u32 perf_counter_mask __read_mostly; struct cpu_hw_counters { struct perf_counter *counters[MAX_HW_COUNTERS]; unsigned long used[BITS_TO_LONGS(MAX_HW_COUNTERS)]; - int enable_all; }; /* @@ -115,24 +114,13 @@ int hw_perf_counter_init(struct perf_counter *counter, s32 hw_event_type) return 0; } -static void __hw_perf_enable_all(void) -{ - wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, perf_counter_mask, 0); -} - void hw_perf_enable_all(void) { - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - - cpuc->enable_all = 1; - __hw_perf_enable_all(); + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, perf_counter_mask, 0); } void hw_perf_disable_all(void) { - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - - cpuc->enable_all = 0; wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0, 0); } @@ -385,8 +373,10 @@ perf_handle_group(struct perf_counter *leader, u64 *status, u64 *overflown) static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi) { int bit, cpu = smp_processor_id(); + u64 ack, status, saved_global; struct cpu_hw_counters *cpuc; - u64 ack, status; + + rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, saved_global); /* Disable counters globally */ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0, 0); @@ -445,10 +435,9 @@ again: goto again; out: /* - * Do not reenable when global enable is off: + * Restore - do not reenable when global enable is off: */ - if (cpuc->enable_all) - __hw_perf_enable_all(); + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, saved_global, 0); } void smp_perf_counter_interrupt(struct pt_regs *regs) -- cgit v0.10.2 From 4ac13294e44664bb7edf4daf52edb71e7c6bbe84 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 9 Dec 2008 21:43:39 +0100 Subject: perf counters: protect them against CSTATE transitions Impact: fix rare lost events problem There are CPUs whose performance counters misbehave on CSTATE transitions, so provide a way to just disable/enable them around deep idle methods. (hw_perf_enable_all() is cheap on x86.) Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 6a93d1f..0a7f3be 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -119,10 +120,21 @@ void hw_perf_enable_all(void) wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, perf_counter_mask, 0); } -void hw_perf_disable_all(void) +void hw_perf_restore_ctrl(u64 ctrl) { + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, ctrl, 0); +} +EXPORT_SYMBOL_GPL(hw_perf_restore_ctrl); + +u64 hw_perf_disable_all(void) +{ + u64 ctrl; + + rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0, 0); + return ctrl; } +EXPORT_SYMBOL_GPL(hw_perf_disable_all); static inline void __hw_perf_counter_disable(struct hw_perf_counter *hwc, unsigned int idx) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 5f8d746..cca804e 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -270,8 +270,11 @@ static atomic_t c3_cpu_count; /* Common C-state entry for C2, C3, .. */ static void acpi_cstate_enter(struct acpi_processor_cx *cstate) { + u64 pctrl; + /* Don't trace irqs off for idle */ stop_critical_timings(); + pctrl = hw_perf_disable_all(); if (cstate->entry_method == ACPI_CSTATE_FFH) { /* Call into architectural FFH based C-state */ acpi_processor_ffh_cstate_enter(cstate); @@ -284,6 +287,7 @@ static void acpi_cstate_enter(struct acpi_processor_cx *cstate) gets asserted in time to freeze execution properly. */ unused = inl(acpi_gbl_FADT.xpm_timer_block.address); } + hw_perf_restore_ctrl(pctrl); start_critical_timings(); } #endif /* !CONFIG_CPU_IDLE */ @@ -1425,8 +1429,11 @@ static inline void acpi_idle_update_bm_rld(struct acpi_processor *pr, */ static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx) { + u64 pctrl; + /* Don't trace irqs off for idle */ stop_critical_timings(); + pctrl = hw_perf_disable_all(); if (cx->entry_method == ACPI_CSTATE_FFH) { /* Call into architectural FFH based C-state */ acpi_processor_ffh_cstate_enter(cx); @@ -1441,6 +1448,7 @@ static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx) gets asserted in time to freeze execution properly. */ unused = inl(acpi_gbl_FADT.xpm_timer_block.address); } + hw_perf_restore_ctrl(pctrl); start_critical_timings(); } diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 22c4469..5031b56 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -156,6 +156,8 @@ extern void perf_counter_task_tick(struct task_struct *task, int cpu); extern void perf_counter_init_task(struct task_struct *task); extern void perf_counter_notify(struct pt_regs *regs); extern void perf_counter_print_debug(void); +extern void hw_perf_restore_ctrl(u64 ctrl); +extern u64 hw_perf_disable_all(void); #else static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } @@ -166,6 +168,8 @@ perf_counter_task_tick(struct task_struct *task, int cpu) { } static inline void perf_counter_init_task(struct task_struct *task) { } static inline void perf_counter_notify(struct pt_regs *regs) { } static inline void perf_counter_print_debug(void) { } +static inline void hw_perf_restore_ctrl(u64 ctrl) { } +static inline u64 hw_perf_disable_all(void) { return 0; } #endif #endif /* _LINUX_PERF_COUNTER_H */ -- cgit v0.10.2 From eab656ae04b9d3b83265e3db01c0d2c46b748ef7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 8 Dec 2008 19:26:59 +0100 Subject: perf counters: clean up 'raw' type API Impact: cleanup Introduce a separate hw_event type. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 5031b56..daedd7d 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -38,6 +38,7 @@ enum hw_event_types { * If this bit is set in the type, then trigger NMI sampling: */ PERF_COUNT_NMI = (1 << 30), + PERF_COUNT_RAW = (1 << 31), }; /* @@ -49,6 +50,12 @@ enum perf_record_type { PERF_RECORD_GROUP, }; +struct perf_counter_event { + u32 hw_event_type; + u32 hw_event_period; + u64 hw_raw_ctrl; +}; + /** * struct hw_perf_counter - performance counter hardware details */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 6cce728..3ecd73d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -54,6 +54,7 @@ struct compat_stat; struct compat_timeval; struct robust_list_head; struct getcpu_cache; +struct perf_counter_event; #include #include @@ -625,9 +626,6 @@ asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len); int kernel_execve(const char *filename, char *const argv[], char *const envp[]); asmlinkage int -sys_perf_counter_open(u32 hw_event_type, - u32 hw_event_period, - u32 record_type, - pid_t pid, - int cpu); +sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type, + pid_t pid, int cpu, int masterfd); #endif diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 20508f0..96c333a 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -734,26 +734,27 @@ perf_counter_alloc(u32 hw_event_period, int cpu, u32 record_type) * @pid: target pid */ asmlinkage int -sys_perf_counter_open(u32 hw_event_type, - u32 hw_event_period, - u32 record_type, - pid_t pid, - int cpu) +sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type, + pid_t pid, int cpu, int masterfd) { struct perf_counter_context *ctx; + struct perf_counter_event event; struct perf_counter *counter; int ret; + if (copy_from_user(&event, uevent, sizeof(event)) != 0) + return -EFAULT; + ctx = find_get_context(pid, cpu); if (IS_ERR(ctx)) return PTR_ERR(ctx); ret = -ENOMEM; - counter = perf_counter_alloc(hw_event_period, cpu, record_type); + counter = perf_counter_alloc(event.hw_event_period, cpu, record_type); if (!counter) goto err_put_context; - ret = hw_perf_counter_init(counter, hw_event_type); + ret = hw_perf_counter_init(counter, event.hw_event_type); if (ret) goto err_free_put_context; -- cgit v0.10.2 From dfa7c899b401d7dc5d85aca416aee64ac82812f2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 8 Dec 2008 19:35:37 +0100 Subject: perf counters: expand use of counter->event Impact: change syscall, cleanup Make use of the new perf_counters event type. Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 0a7f3be..30e7ebf 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -56,9 +56,10 @@ const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map); /* * Setup the hardware configuration for a given hw_event_type */ -int hw_perf_counter_init(struct perf_counter *counter, s32 hw_event_type) +int hw_perf_counter_init(struct perf_counter *counter) { struct hw_perf_counter *hwc = &counter->hw; + u32 hw_event_type = counter->event.hw_event_type; if (unlikely(!perf_counters_initialized)) return -EINVAL; @@ -83,7 +84,7 @@ int hw_perf_counter_init(struct perf_counter *counter, s32 hw_event_type) hwc->config_base = MSR_ARCH_PERFMON_EVENTSEL0; hwc->counter_base = MSR_ARCH_PERFMON_PERFCTR0; - hwc->irq_period = counter->__irq_period; + hwc->irq_period = counter->event.hw_event_period; /* * Intel PMCs cannot be accessed sanely above 32 bit width, * so we install an artificial 1<<31 period regardless of @@ -95,21 +96,19 @@ int hw_perf_counter_init(struct perf_counter *counter, s32 hw_event_type) hwc->next_count = -((s32) hwc->irq_period); /* - * Negative event types mean raw encoded event+umask values: + * Raw event type provide the config in the event structure */ - if (hw_event_type < 0) { - counter->hw_event_type = -hw_event_type; - counter->hw_event_type &= ~PERF_COUNT_NMI; + hw_event_type &= ~PERF_COUNT_NMI; + if (hw_event_type == PERF_COUNT_RAW) { + hwc->config |= counter->event.hw_raw_ctrl; } else { - hw_event_type &= ~PERF_COUNT_NMI; if (hw_event_type >= max_intel_perfmon_events) return -EINVAL; /* * The generic map: */ - counter->hw_event_type = intel_perfmon_event_map[hw_event_type]; + hwc->config |= intel_perfmon_event_map[hw_event_type]; } - hwc->config |= counter->hw_event_type; counter->wakeup_pending = 0; return 0; @@ -373,7 +372,7 @@ perf_handle_group(struct perf_counter *leader, u64 *status, u64 *overflown) perf_save_and_restart(counter); } } - perf_store_irq_data(leader, counter->hw_event_type); + perf_store_irq_data(leader, counter->event.hw_event_type); perf_store_irq_data(leader, atomic64_counter_read(counter)); } } @@ -418,7 +417,8 @@ again: perf_store_irq_data(counter, instruction_pointer(regs)); break; case PERF_RECORD_GROUP: - perf_store_irq_data(counter, counter->hw_event_type); + perf_store_irq_data(counter, + counter->event.hw_event_type); perf_store_irq_data(counter, atomic64_counter_read(counter)); perf_handle_group(counter, &status, &ack); diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index daedd7d..1f00176 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -96,8 +96,7 @@ struct perf_counter { #else atomic_t count32[2]; #endif - u64 __irq_period; - + struct perf_counter_event event; struct hw_perf_counter hw; struct perf_counter_context *ctx; @@ -111,7 +110,6 @@ struct perf_counter { int oncpu; int cpu; - s32 hw_event_type; enum perf_record_type record_type; /* read() / irq related data */ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 96c333a..2557c67 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -37,7 +37,7 @@ static DEFINE_MUTEX(perf_resource_mutex); * Architecture provided APIs - weak aliases: */ -int __weak hw_perf_counter_init(struct perf_counter *counter, u32 hw_event_type) +int __weak hw_perf_counter_init(struct perf_counter *counter) { return -EINVAL; } @@ -707,7 +707,7 @@ static const struct file_operations perf_fops = { * Allocate and initialize a counter structure */ static struct perf_counter * -perf_counter_alloc(u32 hw_event_period, int cpu, u32 record_type) +perf_counter_alloc(struct perf_counter_event *event, int cpu, u32 record_type) { struct perf_counter *counter = kzalloc(sizeof(*counter), GFP_KERNEL); @@ -722,7 +722,7 @@ perf_counter_alloc(u32 hw_event_period, int cpu, u32 record_type) counter->usrdata = &counter->data[1]; counter->cpu = cpu; counter->record_type = record_type; - counter->__irq_period = hw_event_period; + counter->event = *event; counter->wakeup_pending = 0; return counter; @@ -750,11 +750,11 @@ sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type, return PTR_ERR(ctx); ret = -ENOMEM; - counter = perf_counter_alloc(event.hw_event_period, cpu, record_type); + counter = perf_counter_alloc(&event, cpu, record_type); if (!counter) goto err_put_context; - ret = hw_perf_counter_init(counter, event.hw_event_type); + ret = hw_perf_counter_init(counter); if (ret) goto err_free_put_context; -- cgit v0.10.2 From 9f66a3810fe0d4100972db84290f3ae4a4d77025 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 10 Dec 2008 12:33:23 +0100 Subject: perf counters: restructure the API Impact: clean up new API Thorough cleanup of the new perf counters API, we now get clean separation of the various concepts: - introduce perf_counter_hw_event to separate out the event source details - move special type flags into separate attributes: PERF_COUNT_NMI, PERF_COUNT_RAW - extend the type to u64 and reserve it fully to the architecture in the raw type case. And make use of all these changes in the core and x86 perfcounters code. Also change the syscall signature to: asmlinkage int sys_perf_counter_open( struct perf_counter_hw_event *hw_event_uptr __user, pid_t pid, int cpu, int group_fd); ( Note that group_fd is unused for now - it's reserved for the counter groups abstraction. ) Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 30e7ebf..ef1936a 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -58,8 +58,8 @@ const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map); */ int hw_perf_counter_init(struct perf_counter *counter) { + struct perf_counter_hw_event *hw_event = &counter->hw_event; struct hw_perf_counter *hwc = &counter->hw; - u32 hw_event_type = counter->event.hw_event_type; if (unlikely(!perf_counters_initialized)) return -EINVAL; @@ -77,14 +77,14 @@ int hw_perf_counter_init(struct perf_counter *counter) hwc->nmi = 0; if (capable(CAP_SYS_ADMIN)) { hwc->config |= ARCH_PERFMON_EVENTSEL_OS; - if (hw_event_type & PERF_COUNT_NMI) + if (hw_event->nmi) hwc->nmi = 1; } - hwc->config_base = MSR_ARCH_PERFMON_EVENTSEL0; - hwc->counter_base = MSR_ARCH_PERFMON_PERFCTR0; + hwc->config_base = MSR_ARCH_PERFMON_EVENTSEL0; + hwc->counter_base = MSR_ARCH_PERFMON_PERFCTR0; - hwc->irq_period = counter->event.hw_event_period; + hwc->irq_period = hw_event->irq_period; /* * Intel PMCs cannot be accessed sanely above 32 bit width, * so we install an artificial 1<<31 period regardless of @@ -93,21 +93,20 @@ int hw_perf_counter_init(struct perf_counter *counter) if (!hwc->irq_period) hwc->irq_period = 0x7FFFFFFF; - hwc->next_count = -((s32) hwc->irq_period); + hwc->next_count = -(s32)hwc->irq_period; /* * Raw event type provide the config in the event structure */ - hw_event_type &= ~PERF_COUNT_NMI; - if (hw_event_type == PERF_COUNT_RAW) { - hwc->config |= counter->event.hw_raw_ctrl; + if (hw_event->raw) { + hwc->config |= hw_event->type; } else { - if (hw_event_type >= max_intel_perfmon_events) + if (hw_event->type >= max_intel_perfmon_events) return -EINVAL; /* * The generic map: */ - hwc->config |= intel_perfmon_event_map[hw_event_type]; + hwc->config |= intel_perfmon_event_map[hw_event->type]; } counter->wakeup_pending = 0; @@ -354,7 +353,7 @@ perf_handle_group(struct perf_counter *leader, u64 *status, u64 *overflown) int bit; list_for_each_entry(counter, &ctx->counters, list) { - if (counter->record_type != PERF_RECORD_SIMPLE || + if (counter->hw_event.record_type != PERF_RECORD_SIMPLE || counter == leader) continue; @@ -372,7 +371,7 @@ perf_handle_group(struct perf_counter *leader, u64 *status, u64 *overflown) perf_save_and_restart(counter); } } - perf_store_irq_data(leader, counter->event.hw_event_type); + perf_store_irq_data(leader, counter->hw_event.type); perf_store_irq_data(leader, atomic64_counter_read(counter)); } } @@ -410,7 +409,7 @@ again: perf_save_and_restart(counter); - switch (counter->record_type) { + switch (counter->hw_event.record_type) { case PERF_RECORD_SIMPLE: continue; case PERF_RECORD_IRQ: @@ -418,7 +417,7 @@ again: break; case PERF_RECORD_GROUP: perf_store_irq_data(counter, - counter->event.hw_event_type); + counter->hw_event.type); perf_store_irq_data(counter, atomic64_counter_read(counter)); perf_handle_group(counter, &status, &ack); diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 1f00176..a2b4852 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -24,65 +24,93 @@ struct task_struct; /* - * Generalized hardware event types, used by the hw_event_type parameter - * of the sys_perf_counter_open() syscall: + * User-space ABI bits: + */ + +/* + * Generalized performance counter event types, used by the hw_event.type + * parameter of the sys_perf_counter_open() syscall: */ enum hw_event_types { - PERF_COUNT_CYCLES, - PERF_COUNT_INSTRUCTIONS, - PERF_COUNT_CACHE_REFERENCES, - PERF_COUNT_CACHE_MISSES, - PERF_COUNT_BRANCH_INSTRUCTIONS, - PERF_COUNT_BRANCH_MISSES, /* - * If this bit is set in the type, then trigger NMI sampling: + * Common hardware events, generalized by the kernel: */ - PERF_COUNT_NMI = (1 << 30), - PERF_COUNT_RAW = (1 << 31), + PERF_COUNT_CYCLES = 0, + PERF_COUNT_INSTRUCTIONS = 1, + PERF_COUNT_CACHE_REFERENCES = 2, + PERF_COUNT_CACHE_MISSES = 3, + PERF_COUNT_BRANCH_INSTRUCTIONS = 4, + PERF_COUNT_BRANCH_MISSES = 5, + + /* + * Special "software" counters provided by the kernel, even if + * the hardware does not support performance counters. These + * counters measure various physical and sw events of the + * kernel (and allow the profiling of them as well): + */ + PERF_COUNT_CPU_CLOCK = -1, + PERF_COUNT_TASK_CLOCK = -2, + PERF_COUNT_PAGE_FAULTS = -3, + PERF_COUNT_CONTEXT_SWITCHES = -4, }; /* * IRQ-notification data record type: */ -enum perf_record_type { - PERF_RECORD_SIMPLE, - PERF_RECORD_IRQ, - PERF_RECORD_GROUP, +enum perf_counter_record_type { + PERF_RECORD_SIMPLE = 0, + PERF_RECORD_IRQ = 1, + PERF_RECORD_GROUP = 2, }; -struct perf_counter_event { - u32 hw_event_type; - u32 hw_event_period; - u64 hw_raw_ctrl; +/* + * Hardware event to monitor via a performance monitoring counter: + */ +struct perf_counter_hw_event { + u64 type; + + u64 irq_period; + u32 record_type; + + u32 disabled : 1, /* off by default */ + nmi : 1, /* NMI sampling */ + raw : 1, /* raw event type */ + __reserved_1 : 29; + + u64 __reserved_2; }; +/* + * Kernel-internal data types: + */ + /** - * struct hw_perf_counter - performance counter hardware details + * struct hw_perf_counter - performance counter hardware details: */ struct hw_perf_counter { - u64 config; - unsigned long config_base; - unsigned long counter_base; - int nmi; - unsigned int idx; - u64 prev_count; - s32 next_count; - u64 irq_period; + u64 config; + unsigned long config_base; + unsigned long counter_base; + int nmi; + unsigned int idx; + u64 prev_count; + u64 irq_period; + s32 next_count; }; /* * Hardcoded buffer length limit for now, for IRQ-fed events: */ -#define PERF_DATA_BUFLEN 2048 +#define PERF_DATA_BUFLEN 2048 /** * struct perf_data - performance counter IRQ data sampling ... */ struct perf_data { - int len; - int rd_idx; - int overrun; - u8 data[PERF_DATA_BUFLEN]; + int len; + int rd_idx; + int overrun; + u8 data[PERF_DATA_BUFLEN]; }; /** @@ -96,7 +124,7 @@ struct perf_counter { #else atomic_t count32[2]; #endif - struct perf_counter_event event; + struct perf_counter_hw_event hw_event; struct hw_perf_counter hw; struct perf_counter_context *ctx; @@ -110,8 +138,6 @@ struct perf_counter { int oncpu; int cpu; - enum perf_record_type record_type; - /* read() / irq related data */ wait_queue_head_t waitq; /* optional: for NMIs */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 3ecd73d..a549678 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -54,7 +54,7 @@ struct compat_stat; struct compat_timeval; struct robust_list_head; struct getcpu_cache; -struct perf_counter_event; +struct perf_counter_hw_event; #include #include @@ -625,7 +625,11 @@ asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len); int kernel_execve(const char *filename, char *const argv[], char *const envp[]); -asmlinkage int -sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type, - pid_t pid, int cpu, int masterfd); + +asmlinkage int sys_perf_counter_open( + + struct perf_counter_hw_event *hw_event_uptr __user, + pid_t pid, + int cpu, + int group_fd); #endif diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 2557c67..0d323ce 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -669,7 +669,7 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct perf_counter *counter = file->private_data; - switch (counter->record_type) { + switch (counter->hw_event.record_type) { case PERF_RECORD_SIMPLE: return perf_read_hw(counter, buf, count); @@ -707,7 +707,7 @@ static const struct file_operations perf_fops = { * Allocate and initialize a counter structure */ static struct perf_counter * -perf_counter_alloc(struct perf_counter_event *event, int cpu, u32 record_type) +perf_counter_alloc(struct perf_counter_hw_event *hw_event, int cpu) { struct perf_counter *counter = kzalloc(sizeof(*counter), GFP_KERNEL); @@ -718,31 +718,37 @@ perf_counter_alloc(struct perf_counter_event *event, int cpu, u32 record_type) INIT_LIST_HEAD(&counter->list); init_waitqueue_head(&counter->waitq); - counter->irqdata = &counter->data[0]; - counter->usrdata = &counter->data[1]; - counter->cpu = cpu; - counter->record_type = record_type; - counter->event = *event; - counter->wakeup_pending = 0; + counter->irqdata = &counter->data[0]; + counter->usrdata = &counter->data[1]; + counter->cpu = cpu; + counter->hw_event = *hw_event; + counter->wakeup_pending = 0; return counter; } /** - * sys_perf_task_open - open a performance counter associate it to a task - * @hw_event_type: event type for monitoring/sampling... + * sys_perf_task_open - open a performance counter, associate it to a task/cpu + * + * @hw_event_uptr: event type attributes for monitoring/sampling * @pid: target pid + * @cpu: target cpu + * @group_fd: group leader counter fd */ -asmlinkage int -sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type, - pid_t pid, int cpu, int masterfd) +asmlinkage int sys_perf_counter_open( + + struct perf_counter_hw_event *hw_event_uptr __user, + pid_t pid, + int cpu, + int group_fd) + { struct perf_counter_context *ctx; - struct perf_counter_event event; + struct perf_counter_hw_event hw_event; struct perf_counter *counter; int ret; - if (copy_from_user(&event, uevent, sizeof(event)) != 0) + if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0) return -EFAULT; ctx = find_get_context(pid, cpu); @@ -750,7 +756,7 @@ sys_perf_counter_open(struct perf_counter_event __user *uevent, u32 record_type, return PTR_ERR(ctx); ret = -ENOMEM; - counter = perf_counter_alloc(&event, cpu, record_type); + counter = perf_counter_alloc(&hw_event, cpu); if (!counter) goto err_put_context; -- cgit v0.10.2 From 04289bb9891882202d7e961c4c04d2376930e9f9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 11 Dec 2008 08:38:42 +0100 Subject: perf counters: add support for group counters Impact: add group counters This patch adds the "counter groups" abstraction. Groups of counters behave much like normal 'single' counters, with a few semantic and behavioral extensions on top of that. A counter group is created by creating a new counter with the open() syscall's group-leader group_fd file descriptor parameter pointing to another, already existing counter. Groups of counters are scheduled in and out in one atomic group, and they are also roundrobin-scheduled atomically. Counters that are member of a group can also record events with an (atomic) extended timestamp that extends to all members of the group, if the record type is set to PERF_RECORD_GROUP. Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index ef1936a..54b4ad0 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -346,18 +346,22 @@ static void perf_save_and_restart(struct perf_counter *counter) } static void -perf_handle_group(struct perf_counter *leader, u64 *status, u64 *overflown) +perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown) { - struct perf_counter_context *ctx = leader->ctx; - struct perf_counter *counter; + struct perf_counter *counter, *group_leader = sibling->group_leader; int bit; - list_for_each_entry(counter, &ctx->counters, list) { - if (counter->hw_event.record_type != PERF_RECORD_SIMPLE || - counter == leader) - continue; + /* + * Store the counter's own timestamp first: + */ + perf_store_irq_data(sibling, sibling->hw_event.type); + perf_store_irq_data(sibling, atomic64_counter_read(sibling)); - if (counter->active) { + /* + * Then store sibling timestamps (if any): + */ + list_for_each_entry(counter, &group_leader->sibling_list, list_entry) { + if (!counter->active) { /* * When counter was not in the overflow mask, we have to * read it from hardware. We read it as well, when it @@ -371,8 +375,8 @@ perf_handle_group(struct perf_counter *leader, u64 *status, u64 *overflown) perf_save_and_restart(counter); } } - perf_store_irq_data(leader, counter->hw_event.type); - perf_store_irq_data(leader, atomic64_counter_read(counter)); + perf_store_irq_data(sibling, counter->hw_event.type); + perf_store_irq_data(sibling, atomic64_counter_read(counter)); } } @@ -416,10 +420,6 @@ again: perf_store_irq_data(counter, instruction_pointer(regs)); break; case PERF_RECORD_GROUP: - perf_store_irq_data(counter, - counter->hw_event.type); - perf_store_irq_data(counter, - atomic64_counter_read(counter)); perf_handle_group(counter, &status, &ack); break; } diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index a2b4852..7af7d89 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -117,7 +117,10 @@ struct perf_data { * struct perf_counter - performance counter kernel representation: */ struct perf_counter { - struct list_head list; + struct list_head list_entry; + struct list_head sibling_list; + struct perf_counter *group_leader; + int active; #if BITS_PER_LONG == 64 atomic64_t count; @@ -158,7 +161,8 @@ struct perf_counter_context { * Protect the list of counters: */ spinlock_t lock; - struct list_head counters; + + struct list_head counter_list; int nr_counters; int nr_active; struct task_struct *task; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 0d323ce..fa59fe8 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -55,7 +56,7 @@ void __weak hw_perf_counter_setup(void) { } * Read the cached counter in counter safe against cross CPU / NMI * modifications. 64 bit version - no complications. */ -static inline u64 perf_read_counter_safe(struct perf_counter *counter) +static inline u64 perf_counter_read_safe(struct perf_counter *counter) { return (u64) atomic64_read(&counter->count); } @@ -66,7 +67,7 @@ static inline u64 perf_read_counter_safe(struct perf_counter *counter) * Read the cached counter in counter safe against cross CPU / NMI * modifications. 32 bit version. */ -static u64 perf_read_counter_safe(struct perf_counter *counter) +static u64 perf_counter_read_safe(struct perf_counter *counter) { u32 cntl, cnth; @@ -83,13 +84,55 @@ static u64 perf_read_counter_safe(struct perf_counter *counter) #endif +static void +list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx) +{ + struct perf_counter *group_leader = counter->group_leader; + + /* + * Depending on whether it is a standalone or sibling counter, + * add it straight to the context's counter list, or to the group + * leader's sibling list: + */ + if (counter->group_leader == counter) + list_add_tail(&counter->list_entry, &ctx->counter_list); + else + list_add_tail(&counter->list_entry, &group_leader->sibling_list); +} + +static void +list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx) +{ + struct perf_counter *sibling, *tmp; + + list_del_init(&counter->list_entry); + + if (list_empty(&counter->sibling_list)) + return; + + /* + * If this was a group counter with sibling counters then + * upgrade the siblings to singleton counters by adding them + * to the context list directly: + */ + list_for_each_entry_safe(sibling, tmp, + &counter->sibling_list, list_entry) { + + list_del_init(&sibling->list_entry); + list_add_tail(&sibling->list_entry, &ctx->counter_list); + WARN_ON_ONCE(!sibling->group_leader); + WARN_ON_ONCE(sibling->group_leader == sibling); + sibling->group_leader = sibling; + } +} + /* * Cross CPU call to remove a performance counter * * We disable the counter on the hardware level first. After that we * remove it from the context list. */ -static void __perf_remove_from_context(void *info) +static void __perf_counter_remove_from_context(void *info) { struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_counter *counter = info; @@ -119,7 +162,7 @@ static void __perf_remove_from_context(void *info) * counters on a global level. NOP for non NMI based counters. */ hw_perf_disable_all(); - list_del_init(&counter->list); + list_del_counter(counter, ctx); hw_perf_enable_all(); if (!ctx->task) { @@ -144,7 +187,7 @@ static void __perf_remove_from_context(void *info) * CPU counters are removed with a smp call. For task counters we only * call when the task is on a CPU. */ -static void perf_remove_from_context(struct perf_counter *counter) +static void perf_counter_remove_from_context(struct perf_counter *counter) { struct perf_counter_context *ctx = counter->ctx; struct task_struct *task = ctx->task; @@ -155,32 +198,32 @@ static void perf_remove_from_context(struct perf_counter *counter) * the removal is always sucessful. */ smp_call_function_single(counter->cpu, - __perf_remove_from_context, + __perf_counter_remove_from_context, counter, 1); return; } retry: - task_oncpu_function_call(task, __perf_remove_from_context, + task_oncpu_function_call(task, __perf_counter_remove_from_context, counter); spin_lock_irq(&ctx->lock); /* * If the context is active we need to retry the smp call. */ - if (ctx->nr_active && !list_empty(&counter->list)) { + if (ctx->nr_active && !list_empty(&counter->list_entry)) { spin_unlock_irq(&ctx->lock); goto retry; } /* * The lock prevents that this context is scheduled in so we - * can remove the counter safely, if it the call above did not + * can remove the counter safely, if the call above did not * succeed. */ - if (!list_empty(&counter->list)) { + if (!list_empty(&counter->list_entry)) { ctx->nr_counters--; - list_del_init(&counter->list); + list_del_counter(counter, ctx); counter->task = NULL; } spin_unlock_irq(&ctx->lock); @@ -211,7 +254,7 @@ static void __perf_install_in_context(void *info) * counters on a global level. NOP for non NMI based counters. */ hw_perf_disable_all(); - list_add_tail(&counter->list, &ctx->counters); + list_add_counter(counter, ctx); hw_perf_enable_all(); ctx->nr_counters++; @@ -268,7 +311,7 @@ retry: * If the context is active and the counter has not been added * we need to retry the smp call. */ - if (ctx->nr_active && list_empty(&counter->list)) { + if (ctx->nr_active && list_empty(&counter->list_entry)) { spin_unlock_irq(&ctx->lock); goto retry; } @@ -278,13 +321,45 @@ retry: * can add the counter safely, if it the call above did not * succeed. */ - if (list_empty(&counter->list)) { - list_add_tail(&counter->list, &ctx->counters); + if (list_empty(&counter->list_entry)) { + list_add_counter(counter, ctx); ctx->nr_counters++; } spin_unlock_irq(&ctx->lock); } +static void +counter_sched_out(struct perf_counter *counter, + struct perf_cpu_context *cpuctx, + struct perf_counter_context *ctx) +{ + if (!counter->active) + return; + + hw_perf_counter_disable(counter); + counter->active = 0; + counter->oncpu = -1; + + cpuctx->active_oncpu--; + ctx->nr_active--; +} + +static void +group_sched_out(struct perf_counter *group_counter, + struct perf_cpu_context *cpuctx, + struct perf_counter_context *ctx) +{ + struct perf_counter *counter; + + counter_sched_out(group_counter, cpuctx, ctx); + + /* + * Schedule out siblings (if any): + */ + list_for_each_entry(counter, &group_counter->sibling_list, list_entry) + counter_sched_out(counter, cpuctx, ctx); +} + /* * Called from scheduler to remove the counters of the current task, * with interrupts disabled. @@ -306,21 +381,48 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu) return; spin_lock(&ctx->lock); - list_for_each_entry(counter, &ctx->counters, list) { - if (!ctx->nr_active) - break; - if (counter->active) { - hw_perf_counter_disable(counter); - counter->active = 0; - counter->oncpu = -1; - ctx->nr_active--; - cpuctx->active_oncpu--; - } + if (ctx->nr_active) { + list_for_each_entry(counter, &ctx->counter_list, list_entry) + group_sched_out(counter, cpuctx, ctx); } spin_unlock(&ctx->lock); cpuctx->task_ctx = NULL; } +static void +counter_sched_in(struct perf_counter *counter, + struct perf_cpu_context *cpuctx, + struct perf_counter_context *ctx, + int cpu) +{ + if (!counter->active) + return; + + hw_perf_counter_enable(counter); + counter->active = 1; + counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */ + + cpuctx->active_oncpu++; + ctx->nr_active++; +} + +static void +group_sched_in(struct perf_counter *group_counter, + struct perf_cpu_context *cpuctx, + struct perf_counter_context *ctx, + int cpu) +{ + struct perf_counter *counter; + + counter_sched_in(group_counter, cpuctx, ctx, cpu); + + /* + * Schedule in siblings as one group (if any): + */ + list_for_each_entry(counter, &group_counter->sibling_list, list_entry) + counter_sched_in(counter, cpuctx, ctx, cpu); +} + /* * Called from scheduler to add the counters of the current task * with interrupts disabled. @@ -342,19 +444,21 @@ void perf_counter_task_sched_in(struct task_struct *task, int cpu) return; spin_lock(&ctx->lock); - list_for_each_entry(counter, &ctx->counters, list) { + list_for_each_entry(counter, &ctx->counter_list, list_entry) { if (ctx->nr_active == cpuctx->max_pertask) break; + + /* + * Listen to the 'cpu' scheduling filter constraint + * of counters: + */ if (counter->cpu != -1 && counter->cpu != cpu) continue; - hw_perf_counter_enable(counter); - counter->active = 1; - counter->oncpu = cpu; - ctx->nr_active++; - cpuctx->active_oncpu++; + group_sched_in(counter, cpuctx, ctx, cpu); } spin_unlock(&ctx->lock); + cpuctx->task_ctx = ctx; } @@ -371,12 +475,12 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) spin_lock(&ctx->lock); /* - * Rotate the first entry last: + * Rotate the first entry last (works just fine for group counters too): */ hw_perf_disable_all(); - list_for_each_entry(counter, &ctx->counters, list) { - list_del(&counter->list); - list_add_tail(&counter->list, &ctx->counters); + list_for_each_entry(counter, &ctx->counter_list, list_entry) { + list_del(&counter->list_entry); + list_add_tail(&counter->list_entry, &ctx->counter_list); break; } hw_perf_enable_all(); @@ -387,16 +491,23 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) } /* + * Initialize the perf_counter context in a task_struct: + */ +static void +__perf_counter_init_context(struct perf_counter_context *ctx, + struct task_struct *task) +{ + spin_lock_init(&ctx->lock); + INIT_LIST_HEAD(&ctx->counter_list); + ctx->nr_counters = 0; + ctx->task = task; +} +/* * Initialize the perf_counter context in task_struct */ void perf_counter_init_task(struct task_struct *task) { - struct perf_counter_context *ctx = &task->perf_counter_ctx; - - spin_lock_init(&ctx->lock); - INIT_LIST_HEAD(&ctx->counters); - ctx->nr_counters = 0; - ctx->task = task; + __perf_counter_init_context(&task->perf_counter_ctx, task); } /* @@ -407,7 +518,7 @@ static void __hw_perf_counter_read(void *info) hw_perf_counter_read(info); } -static u64 perf_read_counter(struct perf_counter *counter) +static u64 perf_counter_read(struct perf_counter *counter) { /* * If counter is enabled and currently active on a CPU, update the @@ -418,7 +529,7 @@ static u64 perf_read_counter(struct perf_counter *counter) __hw_perf_counter_read, counter, 1); } - return perf_read_counter_safe(counter); + return perf_counter_read_safe(counter); } /* @@ -555,7 +666,7 @@ static int perf_release(struct inode *inode, struct file *file) mutex_lock(&counter->mutex); - perf_remove_from_context(counter); + perf_counter_remove_from_context(counter); put_context(ctx); mutex_unlock(&counter->mutex); @@ -577,7 +688,7 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) return -EINVAL; mutex_lock(&counter->mutex); - cntval = perf_read_counter(counter); + cntval = perf_counter_read(counter); mutex_unlock(&counter->mutex); return put_user(cntval, (u64 __user *) buf) ? -EFAULT : sizeof(cntval); @@ -707,15 +818,25 @@ static const struct file_operations perf_fops = { * Allocate and initialize a counter structure */ static struct perf_counter * -perf_counter_alloc(struct perf_counter_hw_event *hw_event, int cpu) +perf_counter_alloc(struct perf_counter_hw_event *hw_event, + int cpu, + struct perf_counter *group_leader) { struct perf_counter *counter = kzalloc(sizeof(*counter), GFP_KERNEL); if (!counter) return NULL; + /* + * Single counters are their own group leaders, with an + * empty sibling list: + */ + if (!group_leader) + group_leader = counter; + mutex_init(&counter->mutex); - INIT_LIST_HEAD(&counter->list); + INIT_LIST_HEAD(&counter->list_entry); + INIT_LIST_HEAD(&counter->sibling_list); init_waitqueue_head(&counter->waitq); counter->irqdata = &counter->data[0]; @@ -723,6 +844,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, int cpu) counter->cpu = cpu; counter->hw_event = *hw_event; counter->wakeup_pending = 0; + counter->group_leader = group_leader; return counter; } @@ -743,20 +865,45 @@ asmlinkage int sys_perf_counter_open( int group_fd) { - struct perf_counter_context *ctx; + struct perf_counter *counter, *group_leader; struct perf_counter_hw_event hw_event; - struct perf_counter *counter; + struct perf_counter_context *ctx; + struct file *group_file = NULL; + int fput_needed = 0; int ret; if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0) return -EFAULT; + /* + * Look up the group leader: + */ + group_leader = NULL; + if (group_fd != -1) { + ret = -EINVAL; + group_file = fget_light(group_fd, &fput_needed); + if (!group_file) + goto out_fput; + if (group_file->f_op != &perf_fops) + goto out_fput; + + group_leader = group_file->private_data; + /* + * Do not allow a recursive hierarchy: + */ + if (group_leader->group_leader) + goto out_fput; + } + + /* + * Get the target context (task or percpu): + */ ctx = find_get_context(pid, cpu); if (IS_ERR(ctx)) return PTR_ERR(ctx); ret = -ENOMEM; - counter = perf_counter_alloc(&hw_event, cpu); + counter = perf_counter_alloc(&hw_event, cpu, group_leader); if (!counter) goto err_put_context; @@ -770,11 +917,14 @@ asmlinkage int sys_perf_counter_open( if (ret < 0) goto err_remove_free_put_context; +out_fput: + fput_light(group_file, fput_needed); + return ret; err_remove_free_put_context: mutex_lock(&counter->mutex); - perf_remove_from_context(counter); + perf_counter_remove_from_context(counter); mutex_unlock(&counter->mutex); err_free_put_context: @@ -783,40 +933,40 @@ err_free_put_context: err_put_context: put_context(ctx); - return ret; + goto out_fput; } -static void __cpuinit perf_init_cpu(int cpu) +static void __cpuinit perf_counter_init_cpu(int cpu) { - struct perf_cpu_context *ctx; + struct perf_cpu_context *cpuctx; - ctx = &per_cpu(perf_cpu_context, cpu); - spin_lock_init(&ctx->ctx.lock); - INIT_LIST_HEAD(&ctx->ctx.counters); + cpuctx = &per_cpu(perf_cpu_context, cpu); + __perf_counter_init_context(&cpuctx->ctx, NULL); mutex_lock(&perf_resource_mutex); - ctx->max_pertask = perf_max_counters - perf_reserved_percpu; + cpuctx->max_pertask = perf_max_counters - perf_reserved_percpu; mutex_unlock(&perf_resource_mutex); + hw_perf_counter_setup(); } #ifdef CONFIG_HOTPLUG_CPU -static void __perf_exit_cpu(void *info) +static void __perf_counter_exit_cpu(void *info) { struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_counter_context *ctx = &cpuctx->ctx; struct perf_counter *counter, *tmp; - list_for_each_entry_safe(counter, tmp, &ctx->counters, list) - __perf_remove_from_context(counter); + list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry) + __perf_counter_remove_from_context(counter); } -static void perf_exit_cpu(int cpu) +static void perf_counter_exit_cpu(int cpu) { - smp_call_function_single(cpu, __perf_exit_cpu, NULL, 1); + smp_call_function_single(cpu, __perf_counter_exit_cpu, NULL, 1); } #else -static inline void perf_exit_cpu(int cpu) { } +static inline void perf_counter_exit_cpu(int cpu) { } #endif static int __cpuinit @@ -828,12 +978,12 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: - perf_init_cpu(cpu); + perf_counter_init_cpu(cpu); break; case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE_FROZEN: - perf_exit_cpu(cpu); + perf_counter_exit_cpu(cpu); break; default: -- cgit v0.10.2 From ccff286d85098ba5438e22aa2ea807fc1e18cf2f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 11 Dec 2008 11:26:29 +0100 Subject: perf counters: group counter, fixes Impact: bugfix Check that a group does not span outside the context of a CPU or a task. Also, do not allow deep recursive hierarchies. Signed-off-by: Ingo Molnar diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index fa59fe8..278209c 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -107,9 +107,6 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx) list_del_init(&counter->list_entry); - if (list_empty(&counter->sibling_list)) - return; - /* * If this was a group counter with sibling counters then * upgrade the siblings to singleton counters by adding them @@ -395,9 +392,6 @@ counter_sched_in(struct perf_counter *counter, struct perf_counter_context *ctx, int cpu) { - if (!counter->active) - return; - hw_perf_counter_enable(counter); counter->active = 1; counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */ @@ -876,32 +870,39 @@ asmlinkage int sys_perf_counter_open( return -EFAULT; /* - * Look up the group leader: + * Get the target context (task or percpu): + */ + ctx = find_get_context(pid, cpu); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + /* + * Look up the group leader (we will attach this counter to it): */ group_leader = NULL; if (group_fd != -1) { ret = -EINVAL; group_file = fget_light(group_fd, &fput_needed); if (!group_file) - goto out_fput; + goto err_put_context; if (group_file->f_op != &perf_fops) - goto out_fput; + goto err_put_context; group_leader = group_file->private_data; /* - * Do not allow a recursive hierarchy: + * Do not allow a recursive hierarchy (this new sibling + * becoming part of another group-sibling): + */ + if (group_leader->group_leader != group_leader) + goto err_put_context; + /* + * Do not allow to attach to a group in a different + * task or CPU context: */ - if (group_leader->group_leader) - goto out_fput; + if (group_leader->ctx != ctx) + goto err_put_context; } - /* - * Get the target context (task or percpu): - */ - ctx = find_get_context(pid, cpu); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - ret = -ENOMEM; counter = perf_counter_alloc(&hw_event, cpu, group_leader); if (!counter) -- cgit v0.10.2 From 621a01eac89b5e2f81a4cf576568b31f40a02724 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 11 Dec 2008 12:46:46 +0100 Subject: perf counters: hw driver API Impact: restructure code, introduce hw_ops driver abstraction Introduce this abstraction to handle counter details: struct hw_perf_counter_ops { void (*hw_perf_counter_enable) (struct perf_counter *counter); void (*hw_perf_counter_disable) (struct perf_counter *counter); void (*hw_perf_counter_read) (struct perf_counter *counter); }; This will be useful to support assymetric hw details, and it will also be useful to implement "software counters". (Counters that count kernel managed sw events such as pagefaults, context-switches, wall-clock time or task-local time.) Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 54b4ad0..718b635 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -56,7 +56,7 @@ const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map); /* * Setup the hardware configuration for a given hw_event_type */ -int hw_perf_counter_init(struct perf_counter *counter) +static int __hw_perf_counter_init(struct perf_counter *counter) { struct perf_counter_hw_event *hw_event = &counter->hw_event; struct hw_perf_counter *hwc = &counter->hw; @@ -135,7 +135,7 @@ u64 hw_perf_disable_all(void) EXPORT_SYMBOL_GPL(hw_perf_disable_all); static inline void -__hw_perf_counter_disable(struct hw_perf_counter *hwc, unsigned int idx) +__x86_perf_counter_disable(struct hw_perf_counter *hwc, unsigned int idx) { wrmsr(hwc->config_base + idx, hwc->config, 0); } @@ -149,13 +149,13 @@ static void __hw_perf_counter_set_period(struct hw_perf_counter *hwc, int idx) wrmsr(hwc->counter_base + idx, hwc->next_count, 0); } -static void __hw_perf_counter_enable(struct hw_perf_counter *hwc, int idx) +static void __x86_perf_counter_enable(struct hw_perf_counter *hwc, int idx) { wrmsr(hwc->config_base + idx, hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE, 0); } -void hw_perf_counter_enable(struct perf_counter *counter) +static void x86_perf_counter_enable(struct perf_counter *counter) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); struct hw_perf_counter *hwc = &counter->hw; @@ -170,12 +170,12 @@ void hw_perf_counter_enable(struct perf_counter *counter) perf_counters_lapic_init(hwc->nmi); - __hw_perf_counter_disable(hwc, idx); + __x86_perf_counter_disable(hwc, idx); cpuc->counters[idx] = counter; __hw_perf_counter_set_period(hwc, idx); - __hw_perf_counter_enable(hwc, idx); + __x86_perf_counter_enable(hwc, idx); } #ifdef CONFIG_X86_64 @@ -282,20 +282,20 @@ void perf_counter_print_debug(void) local_irq_enable(); } -void hw_perf_counter_disable(struct perf_counter *counter) +static void x86_perf_counter_disable(struct perf_counter *counter) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); struct hw_perf_counter *hwc = &counter->hw; unsigned int idx = hwc->idx; - __hw_perf_counter_disable(hwc, idx); + __x86_perf_counter_disable(hwc, idx); clear_bit(idx, cpuc->used); cpuc->counters[idx] = NULL; __hw_perf_save_counter(counter, hwc, idx); } -void hw_perf_counter_read(struct perf_counter *counter) +static void x86_perf_counter_read(struct perf_counter *counter) { struct hw_perf_counter *hwc = &counter->hw; unsigned long addr = hwc->counter_base + hwc->idx; @@ -342,7 +342,7 @@ static void perf_save_and_restart(struct perf_counter *counter) __hw_perf_counter_set_period(hwc, idx); if (pmc_ctrl & ARCH_PERFMON_EVENTSEL0_ENABLE) - __hw_perf_counter_enable(hwc, idx); + __x86_perf_counter_enable(hwc, idx); } static void @@ -572,3 +572,20 @@ void __init init_hw_perf_counters(void) perf_counters_initialized = true; } + +static struct hw_perf_counter_ops x86_perf_counter_ops = { + .hw_perf_counter_enable = x86_perf_counter_enable, + .hw_perf_counter_disable = x86_perf_counter_disable, + .hw_perf_counter_read = x86_perf_counter_read, +}; + +struct hw_perf_counter_ops *hw_perf_counter_init(struct perf_counter *counter) +{ + int err; + + err = __hw_perf_counter_init(counter); + if (err) + return NULL; + + return &x86_perf_counter_ops; +} diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 7af7d89..2738564 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -113,6 +113,17 @@ struct perf_data { u8 data[PERF_DATA_BUFLEN]; }; +struct perf_counter; + +/** + * struct hw_perf_counter_ops - performance counter hw ops + */ +struct hw_perf_counter_ops { + void (*hw_perf_counter_enable) (struct perf_counter *counter); + void (*hw_perf_counter_disable) (struct perf_counter *counter); + void (*hw_perf_counter_read) (struct perf_counter *counter); +}; + /** * struct perf_counter - performance counter kernel representation: */ @@ -120,6 +131,7 @@ struct perf_counter { struct list_head list_entry; struct list_head sibling_list; struct perf_counter *group_leader; + struct hw_perf_counter_ops *hw_ops; int active; #if BITS_PER_LONG == 64 @@ -185,6 +197,9 @@ struct perf_cpu_context { extern int perf_max_counters; #ifdef CONFIG_PERF_COUNTERS +extern struct hw_perf_counter_ops * +hw_perf_counter_init(struct perf_counter *counter); + extern void perf_counter_task_sched_in(struct task_struct *task, int cpu); extern void perf_counter_task_sched_out(struct task_struct *task, int cpu); extern void perf_counter_task_tick(struct task_struct *task, int cpu); diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 278209c..e6e41ca 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -37,18 +37,15 @@ static DEFINE_MUTEX(perf_resource_mutex); /* * Architecture provided APIs - weak aliases: */ - -int __weak hw_perf_counter_init(struct perf_counter *counter) +extern __weak struct hw_perf_counter_ops * +hw_perf_counter_init(struct perf_counter *counter) { - return -EINVAL; + return ERR_PTR(-EINVAL); } -void __weak hw_perf_counter_enable(struct perf_counter *counter) { } -void __weak hw_perf_counter_disable(struct perf_counter *counter) { } -void __weak hw_perf_counter_read(struct perf_counter *counter) { } -void __weak hw_perf_disable_all(void) { } -void __weak hw_perf_enable_all(void) { } -void __weak hw_perf_counter_setup(void) { } +void __weak hw_perf_disable_all(void) { } +void __weak hw_perf_enable_all(void) { } +void __weak hw_perf_counter_setup(void) { } #if BITS_PER_LONG == 64 @@ -146,7 +143,7 @@ static void __perf_counter_remove_from_context(void *info) spin_lock(&ctx->lock); if (counter->active) { - hw_perf_counter_disable(counter); + counter->hw_ops->hw_perf_counter_disable(counter); counter->active = 0; ctx->nr_active--; cpuctx->active_oncpu--; @@ -257,7 +254,7 @@ static void __perf_install_in_context(void *info) ctx->nr_counters++; if (cpuctx->active_oncpu < perf_max_counters) { - hw_perf_counter_enable(counter); + counter->hw_ops->hw_perf_counter_enable(counter); counter->active = 1; counter->oncpu = cpu; ctx->nr_active++; @@ -333,7 +330,7 @@ counter_sched_out(struct perf_counter *counter, if (!counter->active) return; - hw_perf_counter_disable(counter); + counter->hw_ops->hw_perf_counter_disable(counter); counter->active = 0; counter->oncpu = -1; @@ -392,7 +389,7 @@ counter_sched_in(struct perf_counter *counter, struct perf_counter_context *ctx, int cpu) { - hw_perf_counter_enable(counter); + counter->hw_ops->hw_perf_counter_enable(counter); counter->active = 1; counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */ @@ -509,7 +506,9 @@ void perf_counter_init_task(struct task_struct *task) */ static void __hw_perf_counter_read(void *info) { - hw_perf_counter_read(info); + struct perf_counter *counter = info; + + counter->hw_ops->hw_perf_counter_read(counter); } static u64 perf_counter_read(struct perf_counter *counter) @@ -816,8 +815,10 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, int cpu, struct perf_counter *group_leader) { - struct perf_counter *counter = kzalloc(sizeof(*counter), GFP_KERNEL); + struct hw_perf_counter_ops *hw_ops; + struct perf_counter *counter; + counter = kzalloc(sizeof(*counter), GFP_KERNEL); if (!counter) return NULL; @@ -839,6 +840,14 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, counter->hw_event = *hw_event; counter->wakeup_pending = 0; counter->group_leader = group_leader; + counter->hw_ops = NULL; + + hw_ops = hw_perf_counter_init(counter); + if (!hw_ops) { + kfree(counter); + return NULL; + } + counter->hw_ops = hw_ops; return counter; } @@ -908,10 +917,6 @@ asmlinkage int sys_perf_counter_open( if (!counter) goto err_put_context; - ret = hw_perf_counter_init(counter); - if (ret) - goto err_free_put_context; - perf_install_in_context(ctx, counter, cpu); ret = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0); @@ -927,8 +932,6 @@ err_remove_free_put_context: mutex_lock(&counter->mutex); perf_counter_remove_from_context(counter); mutex_unlock(&counter->mutex); - -err_free_put_context: kfree(counter); err_put_context: -- cgit v0.10.2 From 5c92d12411dfe5f0f3d1b1c1e2f756245e6f7249 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 11 Dec 2008 13:21:10 +0100 Subject: perf counters: implement PERF_COUNT_CPU_CLOCK Impact: add new perf-counter type The 'CPU clock' counter counts the amount of CPU clock time that is elapsing, in nanoseconds. (regardless of how much of it the task is spending on a CPU executing) This counter type is a Linux kernel based abstraction, it is available even if the hardware does not support native hardware performance counters. Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 718b635..43c8e9a 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -178,35 +178,6 @@ static void x86_perf_counter_enable(struct perf_counter *counter) __x86_perf_counter_enable(hwc, idx); } -#ifdef CONFIG_X86_64 -static inline void atomic64_counter_set(struct perf_counter *counter, u64 val) -{ - atomic64_set(&counter->count, val); -} - -static inline u64 atomic64_counter_read(struct perf_counter *counter) -{ - return atomic64_read(&counter->count); -} -#else -/* - * Todo: add proper atomic64_t support to 32-bit x86: - */ -static inline void atomic64_counter_set(struct perf_counter *counter, u64 val64) -{ - u32 *val32 = (void *)&val64; - - atomic_set(counter->count32 + 0, *(val32 + 0)); - atomic_set(counter->count32 + 1, *(val32 + 1)); -} - -static inline u64 atomic64_counter_read(struct perf_counter *counter) -{ - return atomic_read(counter->count32 + 0) | - (u64) atomic_read(counter->count32 + 1) << 32; -} -#endif - static void __hw_perf_save_counter(struct perf_counter *counter, struct hw_perf_counter *hwc, int idx) { @@ -309,7 +280,7 @@ static void x86_perf_counter_read(struct perf_counter *counter) } while (offs != hwc->prev_count); val32 = (s32) val; - val = (s64)hwc->irq_period + (s64)val32; + val = (s64)hwc->irq_period + (s64)val32; atomic64_counter_set(counter, hwc->prev_count + val); } @@ -573,13 +544,14 @@ void __init init_hw_perf_counters(void) perf_counters_initialized = true; } -static struct hw_perf_counter_ops x86_perf_counter_ops = { +static const struct hw_perf_counter_ops x86_perf_counter_ops = { .hw_perf_counter_enable = x86_perf_counter_enable, .hw_perf_counter_disable = x86_perf_counter_disable, .hw_perf_counter_read = x86_perf_counter_read, }; -struct hw_perf_counter_ops *hw_perf_counter_init(struct perf_counter *counter) +const struct hw_perf_counter_ops * +hw_perf_counter_init(struct perf_counter *counter) { int err; diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 2738564..9a1713a 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -131,7 +131,7 @@ struct perf_counter { struct list_head list_entry; struct list_head sibling_list; struct perf_counter *group_leader; - struct hw_perf_counter_ops *hw_ops; + const struct hw_perf_counter_ops *hw_ops; int active; #if BITS_PER_LONG == 64 @@ -197,7 +197,7 @@ struct perf_cpu_context { extern int perf_max_counters; #ifdef CONFIG_PERF_COUNTERS -extern struct hw_perf_counter_ops * +extern const struct hw_perf_counter_ops * hw_perf_counter_init(struct perf_counter *counter); extern void perf_counter_task_sched_in(struct task_struct *task, int cpu); @@ -208,6 +208,9 @@ extern void perf_counter_notify(struct pt_regs *regs); extern void perf_counter_print_debug(void); extern void hw_perf_restore_ctrl(u64 ctrl); extern u64 hw_perf_disable_all(void); +extern void atomic64_counter_set(struct perf_counter *counter, u64 val64); +extern u64 atomic64_counter_read(struct perf_counter *counter); + #else static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } @@ -219,7 +222,7 @@ static inline void perf_counter_init_task(struct task_struct *task) { } static inline void perf_counter_notify(struct pt_regs *regs) { } static inline void perf_counter_print_debug(void) { } static inline void hw_perf_restore_ctrl(u64 ctrl) { } -static inline u64 hw_perf_disable_all(void) { return 0; } +static inline u64 hw_perf_disable_all(void) { return 0; } #endif #endif /* _LINUX_PERF_COUNTER_H */ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index e6e41ca..506286e 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -37,15 +37,15 @@ static DEFINE_MUTEX(perf_resource_mutex); /* * Architecture provided APIs - weak aliases: */ -extern __weak struct hw_perf_counter_ops * +extern __weak const struct hw_perf_counter_ops * hw_perf_counter_init(struct perf_counter *counter) { return ERR_PTR(-EINVAL); } -void __weak hw_perf_disable_all(void) { } -void __weak hw_perf_enable_all(void) { } -void __weak hw_perf_counter_setup(void) { } +u64 __weak hw_perf_disable_all(void) { return 0; } +void __weak hw_perf_restore_ctrl(u64 ctrl) { } +void __weak hw_perf_counter_setup(void) { } #if BITS_PER_LONG == 64 @@ -58,6 +58,16 @@ static inline u64 perf_counter_read_safe(struct perf_counter *counter) return (u64) atomic64_read(&counter->count); } +void atomic64_counter_set(struct perf_counter *counter, u64 val) +{ + atomic64_set(&counter->count, val); +} + +u64 atomic64_counter_read(struct perf_counter *counter) +{ + return atomic64_read(&counter->count); +} + #else /* @@ -79,6 +89,20 @@ static u64 perf_counter_read_safe(struct perf_counter *counter) return cntl | ((u64) cnth) << 32; } +void atomic64_counter_set(struct perf_counter *counter, u64 val64) +{ + u32 *val32 = (void *)&val64; + + atomic_set(counter->count32 + 0, *(val32 + 0)); + atomic_set(counter->count32 + 1, *(val32 + 1)); +} + +u64 atomic64_counter_read(struct perf_counter *counter) +{ + return atomic_read(counter->count32 + 0) | + (u64) atomic_read(counter->count32 + 1) << 32; +} + #endif static void @@ -131,6 +155,7 @@ static void __perf_counter_remove_from_context(void *info) struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_counter *counter = info; struct perf_counter_context *ctx = counter->ctx; + u64 perf_flags; /* * If this is a task context, we need to check whether it is @@ -155,9 +180,9 @@ static void __perf_counter_remove_from_context(void *info) * Protect the list operation against NMI by disabling the * counters on a global level. NOP for non NMI based counters. */ - hw_perf_disable_all(); + perf_flags = hw_perf_disable_all(); list_del_counter(counter, ctx); - hw_perf_enable_all(); + hw_perf_restore_ctrl(perf_flags); if (!ctx->task) { /* @@ -232,6 +257,7 @@ static void __perf_install_in_context(void *info) struct perf_counter *counter = info; struct perf_counter_context *ctx = counter->ctx; int cpu = smp_processor_id(); + u64 perf_flags; /* * If this is a task context, we need to check whether it is @@ -247,9 +273,9 @@ static void __perf_install_in_context(void *info) * Protect the list operation against NMI by disabling the * counters on a global level. NOP for non NMI based counters. */ - hw_perf_disable_all(); + perf_flags = hw_perf_disable_all(); list_add_counter(counter, ctx); - hw_perf_enable_all(); + hw_perf_restore_ctrl(perf_flags); ctx->nr_counters++; @@ -457,6 +483,7 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) { struct perf_counter_context *ctx = &curr->perf_counter_ctx; struct perf_counter *counter; + u64 perf_flags; if (likely(!ctx->nr_counters)) return; @@ -468,13 +495,13 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) /* * Rotate the first entry last (works just fine for group counters too): */ - hw_perf_disable_all(); + perf_flags = hw_perf_disable_all(); list_for_each_entry(counter, &ctx->counter_list, list_entry) { list_del(&counter->list_entry); list_add_tail(&counter->list_entry, &ctx->counter_list); break; } - hw_perf_enable_all(); + hw_perf_restore_ctrl(perf_flags); spin_unlock(&ctx->lock); @@ -807,6 +834,42 @@ static const struct file_operations perf_fops = { .poll = perf_poll, }; +static void cpu_clock_perf_counter_enable(struct perf_counter *counter) +{ +} + +static void cpu_clock_perf_counter_disable(struct perf_counter *counter) +{ +} + +static void cpu_clock_perf_counter_read(struct perf_counter *counter) +{ + int cpu = raw_smp_processor_id(); + + atomic64_counter_set(counter, cpu_clock(cpu)); +} + +static const struct hw_perf_counter_ops perf_ops_cpu_clock = { + .hw_perf_counter_enable = cpu_clock_perf_counter_enable, + .hw_perf_counter_disable = cpu_clock_perf_counter_disable, + .hw_perf_counter_read = cpu_clock_perf_counter_read, +}; + +static const struct hw_perf_counter_ops * +sw_perf_counter_init(struct perf_counter *counter) +{ + const struct hw_perf_counter_ops *hw_ops = NULL; + + switch (counter->hw_event.type) { + case PERF_COUNT_CPU_CLOCK: + hw_ops = &perf_ops_cpu_clock; + break; + default: + break; + } + return hw_ops; +} + /* * Allocate and initialize a counter structure */ @@ -815,7 +878,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, int cpu, struct perf_counter *group_leader) { - struct hw_perf_counter_ops *hw_ops; + const struct hw_perf_counter_ops *hw_ops; struct perf_counter *counter; counter = kzalloc(sizeof(*counter), GFP_KERNEL); @@ -842,7 +905,13 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, counter->group_leader = group_leader; counter->hw_ops = NULL; - hw_ops = hw_perf_counter_init(counter); + hw_ops = NULL; + if (!hw_event->raw && hw_event->type < 0) + hw_ops = sw_perf_counter_init(counter); + if (!hw_ops) { + hw_ops = hw_perf_counter_init(counter); + } + if (!hw_ops) { kfree(counter); return NULL; @@ -912,7 +981,7 @@ asmlinkage int sys_perf_counter_open( goto err_put_context; } - ret = -ENOMEM; + ret = -EINVAL; counter = perf_counter_alloc(&hw_event, cpu, group_leader); if (!counter) goto err_put_context; -- cgit v0.10.2 From 01b2838c4298c5e0d30b4993c195ac34dd9df61e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 11 Dec 2008 13:45:51 +0100 Subject: perf counters: consolidate hw_perf save/restore APIs Impact: cleanup Rename them to better match up the usual IRQ disable/enable APIs: hw_perf_disable_all() => hw_perf_save_disable() hw_perf_restore_ctrl() => hw_perf_restore() Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 43c8e9a..3e1dbeb 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -118,13 +118,13 @@ void hw_perf_enable_all(void) wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, perf_counter_mask, 0); } -void hw_perf_restore_ctrl(u64 ctrl) +void hw_perf_restore(u64 ctrl) { wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, ctrl, 0); } -EXPORT_SYMBOL_GPL(hw_perf_restore_ctrl); +EXPORT_SYMBOL_GPL(hw_perf_restore); -u64 hw_perf_disable_all(void) +u64 hw_perf_save_disable(void) { u64 ctrl; @@ -132,7 +132,7 @@ u64 hw_perf_disable_all(void) wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0, 0); return ctrl; } -EXPORT_SYMBOL_GPL(hw_perf_disable_all); +EXPORT_SYMBOL_GPL(hw_perf_save_disable); static inline void __x86_perf_counter_disable(struct hw_perf_counter *hwc, unsigned int idx) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index cca804e..a3e66a3 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -270,11 +270,11 @@ static atomic_t c3_cpu_count; /* Common C-state entry for C2, C3, .. */ static void acpi_cstate_enter(struct acpi_processor_cx *cstate) { - u64 pctrl; + u64 perf_flags; /* Don't trace irqs off for idle */ stop_critical_timings(); - pctrl = hw_perf_disable_all(); + perf_flags = hw_perf_save_disable(); if (cstate->entry_method == ACPI_CSTATE_FFH) { /* Call into architectural FFH based C-state */ acpi_processor_ffh_cstate_enter(cstate); @@ -287,7 +287,7 @@ static void acpi_cstate_enter(struct acpi_processor_cx *cstate) gets asserted in time to freeze execution properly. */ unused = inl(acpi_gbl_FADT.xpm_timer_block.address); } - hw_perf_restore_ctrl(pctrl); + hw_perf_restore(perf_flags); start_critical_timings(); } #endif /* !CONFIG_CPU_IDLE */ @@ -1433,7 +1433,7 @@ static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx) /* Don't trace irqs off for idle */ stop_critical_timings(); - pctrl = hw_perf_disable_all(); + pctrl = hw_perf_save_disable(); if (cx->entry_method == ACPI_CSTATE_FFH) { /* Call into architectural FFH based C-state */ acpi_processor_ffh_cstate_enter(cx); @@ -1448,7 +1448,7 @@ static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx) gets asserted in time to freeze execution properly. */ unused = inl(acpi_gbl_FADT.xpm_timer_block.address); } - hw_perf_restore_ctrl(pctrl); + hw_perf_restore(pctrl); start_critical_timings(); } diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 9a1713a..68f6e3a 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -67,7 +67,7 @@ enum perf_counter_record_type { * Hardware event to monitor via a performance monitoring counter: */ struct perf_counter_hw_event { - u64 type; + s64 type; u64 irq_period; u32 record_type; @@ -206,8 +206,8 @@ extern void perf_counter_task_tick(struct task_struct *task, int cpu); extern void perf_counter_init_task(struct task_struct *task); extern void perf_counter_notify(struct pt_regs *regs); extern void perf_counter_print_debug(void); -extern void hw_perf_restore_ctrl(u64 ctrl); -extern u64 hw_perf_disable_all(void); +extern u64 hw_perf_save_disable(void); +extern void hw_perf_restore(u64 ctrl); extern void atomic64_counter_set(struct perf_counter *counter, u64 val64); extern u64 atomic64_counter_read(struct perf_counter *counter); @@ -221,8 +221,8 @@ perf_counter_task_tick(struct task_struct *task, int cpu) { } static inline void perf_counter_init_task(struct task_struct *task) { } static inline void perf_counter_notify(struct pt_regs *regs) { } static inline void perf_counter_print_debug(void) { } -static inline void hw_perf_restore_ctrl(u64 ctrl) { } -static inline u64 hw_perf_disable_all(void) { return 0; } +static inline void hw_perf_restore(u64 ctrl) { } +static inline u64 hw_perf_save_disable(void) { return 0; } #endif #endif /* _LINUX_PERF_COUNTER_H */ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 506286e..0e93fea 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -43,8 +43,8 @@ hw_perf_counter_init(struct perf_counter *counter) return ERR_PTR(-EINVAL); } -u64 __weak hw_perf_disable_all(void) { return 0; } -void __weak hw_perf_restore_ctrl(u64 ctrl) { } +u64 __weak hw_perf_save_disable(void) { return 0; } +void __weak hw_perf_restore(u64 ctrl) { } void __weak hw_perf_counter_setup(void) { } #if BITS_PER_LONG == 64 @@ -180,9 +180,9 @@ static void __perf_counter_remove_from_context(void *info) * Protect the list operation against NMI by disabling the * counters on a global level. NOP for non NMI based counters. */ - perf_flags = hw_perf_disable_all(); + perf_flags = hw_perf_save_disable(); list_del_counter(counter, ctx); - hw_perf_restore_ctrl(perf_flags); + hw_perf_restore(perf_flags); if (!ctx->task) { /* @@ -273,9 +273,9 @@ static void __perf_install_in_context(void *info) * Protect the list operation against NMI by disabling the * counters on a global level. NOP for non NMI based counters. */ - perf_flags = hw_perf_disable_all(); + perf_flags = hw_perf_save_disable(); list_add_counter(counter, ctx); - hw_perf_restore_ctrl(perf_flags); + hw_perf_restore(perf_flags); ctx->nr_counters++; @@ -495,13 +495,13 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) /* * Rotate the first entry last (works just fine for group counters too): */ - perf_flags = hw_perf_disable_all(); + perf_flags = hw_perf_save_disable(); list_for_each_entry(counter, &ctx->counter_list, list_entry) { list_del(&counter->list_entry); list_add_tail(&counter->list_entry, &ctx->counter_list); break; } - hw_perf_restore_ctrl(perf_flags); + hw_perf_restore(perf_flags); spin_unlock(&ctx->lock); -- cgit v0.10.2 From bae43c9945ebeef15e7952e317efb02393d3bfc7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 11 Dec 2008 14:03:20 +0100 Subject: perf counters: implement PERF_COUNT_TASK_CLOCK Impact: add new perf-counter type The 'task clock' counter counts the amount of time a task is executing, in nanoseconds. It stops ticking when a task is scheduled out either due to it blocking, sleeping or it being preempted. This counter type is a Linux kernel based abstraction, it is available even if the hardware does not support native hardware performance counters. Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 68f6e3a..30c0ec8 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -50,8 +50,11 @@ enum hw_event_types { */ PERF_COUNT_CPU_CLOCK = -1, PERF_COUNT_TASK_CLOCK = -2, - PERF_COUNT_PAGE_FAULTS = -3, - PERF_COUNT_CONTEXT_SWITCHES = -4, + /* + * Future software events: + */ + /* PERF_COUNT_PAGE_FAULTS = -3, + PERF_COUNT_CONTEXT_SWITCHES = -4, */ }; /* diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 0e93fea..a0fe847 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -855,6 +855,25 @@ static const struct hw_perf_counter_ops perf_ops_cpu_clock = { .hw_perf_counter_read = cpu_clock_perf_counter_read, }; +static void task_clock_perf_counter_enable(struct perf_counter *counter) +{ +} + +static void task_clock_perf_counter_disable(struct perf_counter *counter) +{ +} + +static void task_clock_perf_counter_read(struct perf_counter *counter) +{ + atomic64_counter_set(counter, current->se.sum_exec_runtime); +} + +static const struct hw_perf_counter_ops perf_ops_task_clock = { + .hw_perf_counter_enable = task_clock_perf_counter_enable, + .hw_perf_counter_disable = task_clock_perf_counter_disable, + .hw_perf_counter_read = task_clock_perf_counter_read, +}; + static const struct hw_perf_counter_ops * sw_perf_counter_init(struct perf_counter *counter) { @@ -864,6 +883,9 @@ sw_perf_counter_init(struct perf_counter *counter) case PERF_COUNT_CPU_CLOCK: hw_ops = &perf_ops_cpu_clock; break; + case PERF_COUNT_TASK_CLOCK: + hw_ops = &perf_ops_task_clock; + break; default: break; } -- cgit v0.10.2 From 1d1c7ddbfab358445a542715551301b7fc363e28 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 11 Dec 2008 14:59:31 +0100 Subject: perf counters: add prctl interface to disable/enable counters Add a way for self-monitoring tasks to disable/enable counters summarily, via a prctl: PR_TASK_PERF_COUNTERS_DISABLE 31 PR_TASK_PERF_COUNTERS_ENABLE 32 Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 30c0ec8..97d86c2 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -213,6 +213,8 @@ extern u64 hw_perf_save_disable(void); extern void hw_perf_restore(u64 ctrl); extern void atomic64_counter_set(struct perf_counter *counter, u64 val64); extern u64 atomic64_counter_read(struct perf_counter *counter); +extern int perf_counter_task_disable(void); +extern int perf_counter_task_enable(void); #else static inline void @@ -226,6 +228,8 @@ static inline void perf_counter_notify(struct pt_regs *regs) { } static inline void perf_counter_print_debug(void) { } static inline void hw_perf_restore(u64 ctrl) { } static inline u64 hw_perf_save_disable(void) { return 0; } +static inline int perf_counter_task_disable(void) { return -EINVAL; } +static inline int perf_counter_task_enable(void) { return -EINVAL; } #endif #endif /* _LINUX_PERF_COUNTER_H */ diff --git a/include/linux/prctl.h b/include/linux/prctl.h index 48d887e..b00df4c 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -85,4 +85,7 @@ #define PR_SET_TIMERSLACK 29 #define PR_GET_TIMERSLACK 30 +#define PR_TASK_PERF_COUNTERS_DISABLE 31 +#define PR_TASK_PERF_COUNTERS_ENABLE 32 + #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index a0fe847..4e679b9 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -415,6 +415,9 @@ counter_sched_in(struct perf_counter *counter, struct perf_counter_context *ctx, int cpu) { + if (counter->active == -1) + return; + counter->hw_ops->hw_perf_counter_enable(counter); counter->active = 1; counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */ @@ -479,6 +482,79 @@ void perf_counter_task_sched_in(struct task_struct *task, int cpu) cpuctx->task_ctx = ctx; } +int perf_counter_task_disable(void) +{ + struct task_struct *curr = current; + struct perf_counter_context *ctx = &curr->perf_counter_ctx; + struct perf_counter *counter; + u64 perf_flags; + int cpu; + + if (likely(!ctx->nr_counters)) + return 0; + + local_irq_disable(); + cpu = smp_processor_id(); + + perf_counter_task_sched_out(curr, cpu); + + spin_lock(&ctx->lock); + + /* + * Disable all the counters: + */ + perf_flags = hw_perf_save_disable(); + + list_for_each_entry(counter, &ctx->counter_list, list_entry) { + WARN_ON_ONCE(counter->active == 1); + counter->active = -1; + } + hw_perf_restore(perf_flags); + + spin_unlock(&ctx->lock); + + local_irq_enable(); + + return 0; +} + +int perf_counter_task_enable(void) +{ + struct task_struct *curr = current; + struct perf_counter_context *ctx = &curr->perf_counter_ctx; + struct perf_counter *counter; + u64 perf_flags; + int cpu; + + if (likely(!ctx->nr_counters)) + return 0; + + local_irq_disable(); + cpu = smp_processor_id(); + + spin_lock(&ctx->lock); + + /* + * Disable all the counters: + */ + perf_flags = hw_perf_save_disable(); + + list_for_each_entry(counter, &ctx->counter_list, list_entry) { + if (counter->active != -1) + continue; + counter->active = 0; + } + hw_perf_restore(perf_flags); + + spin_unlock(&ctx->lock); + + perf_counter_task_sched_in(curr, cpu); + + local_irq_enable(); + + return 0; +} + void perf_counter_task_tick(struct task_struct *curr, int cpu) { struct perf_counter_context *ctx = &curr->perf_counter_ctx; @@ -951,13 +1027,9 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, * @cpu: target cpu * @group_fd: group leader counter fd */ -asmlinkage int sys_perf_counter_open( - - struct perf_counter_hw_event *hw_event_uptr __user, - pid_t pid, - int cpu, - int group_fd) - +asmlinkage int +sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user, + pid_t pid, int cpu, int group_fd) { struct perf_counter *counter, *group_leader; struct perf_counter_hw_event hw_event; diff --git a/kernel/sys.c b/kernel/sys.c index 31deba8..0f66633 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -1716,6 +1717,12 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, case PR_SET_TSC: error = SET_TSC_CTL(arg2); break; + case PR_TASK_PERF_COUNTERS_DISABLE: + error = perf_counter_task_disable(); + break; + case PR_TASK_PERF_COUNTERS_ENABLE: + error = perf_counter_task_enable(); + break; case PR_GET_TIMERSLACK: error = current->timer_slack_ns; break; -- cgit v0.10.2 From 6a930700c8b655a9e25e42fc4adc0b225ebbcefc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 11 Dec 2008 15:17:03 +0100 Subject: perf counters: clean up state transitions Impact: cleanup Introduce a proper enum for the 3 states of a counter: PERF_COUNTER_STATE_OFF = -1 PERF_COUNTER_STATE_INACTIVE = 0 PERF_COUNTER_STATE_ACTIVE = 1 and rename counter->active to counter->state and propagate the changes everywhere. Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 3e1dbeb..4854cca 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -332,7 +332,7 @@ perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown) * Then store sibling timestamps (if any): */ list_for_each_entry(counter, &group_leader->sibling_list, list_entry) { - if (!counter->active) { + if (counter->state != PERF_COUNTER_STATE_ACTIVE) { /* * When counter was not in the overflow mask, we have to * read it from hardware. We read it as well, when it diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 97d86c2..8cb095f 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -128,6 +128,15 @@ struct hw_perf_counter_ops { }; /** + * enum perf_counter_active_state - the states of a counter + */ +enum perf_counter_active_state { + PERF_COUNTER_STATE_OFF = -1, + PERF_COUNTER_STATE_INACTIVE = 0, + PERF_COUNTER_STATE_ACTIVE = 1, +}; + +/** * struct perf_counter - performance counter kernel representation: */ struct perf_counter { @@ -136,7 +145,7 @@ struct perf_counter { struct perf_counter *group_leader; const struct hw_perf_counter_ops *hw_ops; - int active; + enum perf_counter_active_state state; #if BITS_PER_LONG == 64 atomic64_t count; #else diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 4e679b9..559130b 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -167,9 +167,9 @@ static void __perf_counter_remove_from_context(void *info) spin_lock(&ctx->lock); - if (counter->active) { + if (counter->state == PERF_COUNTER_STATE_ACTIVE) { counter->hw_ops->hw_perf_counter_disable(counter); - counter->active = 0; + counter->state = PERF_COUNTER_STATE_INACTIVE; ctx->nr_active--; cpuctx->active_oncpu--; counter->task = NULL; @@ -281,7 +281,7 @@ static void __perf_install_in_context(void *info) if (cpuctx->active_oncpu < perf_max_counters) { counter->hw_ops->hw_perf_counter_enable(counter); - counter->active = 1; + counter->state = PERF_COUNTER_STATE_ACTIVE; counter->oncpu = cpu; ctx->nr_active++; cpuctx->active_oncpu++; @@ -328,7 +328,6 @@ retry: spin_lock_irq(&ctx->lock); /* - * If the context is active and the counter has not been added * we need to retry the smp call. */ if (ctx->nr_active && list_empty(&counter->list_entry)) { @@ -353,12 +352,12 @@ counter_sched_out(struct perf_counter *counter, struct perf_cpu_context *cpuctx, struct perf_counter_context *ctx) { - if (!counter->active) + if (counter->state != PERF_COUNTER_STATE_ACTIVE) return; counter->hw_ops->hw_perf_counter_disable(counter); - counter->active = 0; - counter->oncpu = -1; + counter->state = PERF_COUNTER_STATE_INACTIVE; + counter->oncpu = -1; cpuctx->active_oncpu--; ctx->nr_active--; @@ -415,11 +414,11 @@ counter_sched_in(struct perf_counter *counter, struct perf_counter_context *ctx, int cpu) { - if (counter->active == -1) + if (counter->state == PERF_COUNTER_STATE_OFF) return; counter->hw_ops->hw_perf_counter_enable(counter); - counter->active = 1; + counter->state = PERF_COUNTER_STATE_ACTIVE; counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */ cpuctx->active_oncpu++; @@ -506,8 +505,8 @@ int perf_counter_task_disable(void) perf_flags = hw_perf_save_disable(); list_for_each_entry(counter, &ctx->counter_list, list_entry) { - WARN_ON_ONCE(counter->active == 1); - counter->active = -1; + WARN_ON_ONCE(counter->state == PERF_COUNTER_STATE_ACTIVE); + counter->state = PERF_COUNTER_STATE_OFF; } hw_perf_restore(perf_flags); @@ -540,9 +539,9 @@ int perf_counter_task_enable(void) perf_flags = hw_perf_save_disable(); list_for_each_entry(counter, &ctx->counter_list, list_entry) { - if (counter->active != -1) + if (counter->state != PERF_COUNTER_STATE_OFF) continue; - counter->active = 0; + counter->state = PERF_COUNTER_STATE_INACTIVE; } hw_perf_restore(perf_flags); @@ -620,7 +619,7 @@ static u64 perf_counter_read(struct perf_counter *counter) * If counter is enabled and currently active on a CPU, update the * value in the counter structure: */ - if (counter->active) { + if (counter->state == PERF_COUNTER_STATE_ACTIVE) { smp_call_function_single(counter->oncpu, __hw_perf_counter_read, counter, 1); } @@ -673,7 +672,7 @@ static struct perf_data *perf_switch_irq_data(struct perf_counter *counter) retry: spin_lock_irq(&ctx->lock); - if (!counter->active) { + if (counter->state != PERF_COUNTER_STATE_ACTIVE) { counter->irqdata = counter->usrdata; counter->usrdata = oldirqdata; spin_unlock_irq(&ctx->lock); -- cgit v0.10.2 From 447557ac7ce120306b4a31d6003faef39cb1bf14 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 11 Dec 2008 20:40:18 +0100 Subject: perf counters: update docs Impact: update docs Signed-off-by: Ingo Molnar diff --git a/Documentation/perf-counters.txt b/Documentation/perf-counters.txt index 19033a0..fddd321 100644 --- a/Documentation/perf-counters.txt +++ b/Documentation/perf-counters.txt @@ -10,8 +10,8 @@ trigger interrupts when a threshold number of events have passed - and can thus be used to profile the code that runs on that CPU. The Linux Performance Counter subsystem provides an abstraction of these -hardware capabilities. It provides per task and per CPU counters, and -it provides event capabilities on top of those. +hardware capabilities. It provides per task and per CPU counters, counter +groups, and it provides event capabilities on top of those. Performance counters are accessed via special file descriptors. There's one file descriptor per virtual counter used. @@ -19,12 +19,8 @@ There's one file descriptor per virtual counter used. The special file descriptor is opened via the perf_counter_open() system call: - int - perf_counter_open(u32 hw_event_type, - u32 hw_event_period, - u32 record_type, - pid_t pid, - int cpu); + int sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr, + pid_t pid, int cpu, int group_fd); The syscall returns the new fd. The fd can be used via the normal VFS system calls: read() can be used to read the counter, fcntl() @@ -33,39 +29,78 @@ can be used to set the blocking mode, etc. Multiple counters can be kept open at a time, and the counters can be poll()ed. -When creating a new counter fd, 'hw_event_type' is one of: - - enum hw_event_types { - PERF_COUNT_CYCLES, - PERF_COUNT_INSTRUCTIONS, - PERF_COUNT_CACHE_REFERENCES, - PERF_COUNT_CACHE_MISSES, - PERF_COUNT_BRANCH_INSTRUCTIONS, - PERF_COUNT_BRANCH_MISSES, - }; +When creating a new counter fd, 'perf_counter_hw_event' is: + +/* + * Hardware event to monitor via a performance monitoring counter: + */ +struct perf_counter_hw_event { + s64 type; + + u64 irq_period; + u32 record_type; + + u32 disabled : 1, /* off by default */ + nmi : 1, /* NMI sampling */ + raw : 1, /* raw event type */ + __reserved_1 : 29; + + u64 __reserved_2; +}; + +/* + * Generalized performance counter event types, used by the hw_event.type + * parameter of the sys_perf_counter_open() syscall: + */ +enum hw_event_types { + /* + * Common hardware events, generalized by the kernel: + */ + PERF_COUNT_CYCLES = 0, + PERF_COUNT_INSTRUCTIONS = 1, + PERF_COUNT_CACHE_REFERENCES = 2, + PERF_COUNT_CACHE_MISSES = 3, + PERF_COUNT_BRANCH_INSTRUCTIONS = 4, + PERF_COUNT_BRANCH_MISSES = 5, + + /* + * Special "software" counters provided by the kernel, even if + * the hardware does not support performance counters. These + * counters measure various physical and sw events of the + * kernel (and allow the profiling of them as well): + */ + PERF_COUNT_CPU_CLOCK = -1, + PERF_COUNT_TASK_CLOCK = -2, + /* + * Future software events: + */ + /* PERF_COUNT_PAGE_FAULTS = -3, + PERF_COUNT_CONTEXT_SWITCHES = -4, */ +}; These are standardized types of events that work uniformly on all CPUs that implements Performance Counters support under Linux. If a CPU is not able to count branch-misses, then the system call will return -EINVAL. -[ Note: more hw_event_types are supported as well, but they are CPU - specific and are enumerated via /sys on a per CPU basis. Raw hw event - types can be passed in as negative numbers. For example, to count - "External bus cycles while bus lock signal asserted" events on Intel - Core CPUs, pass in a -0x4064 event type value. ] - -The parameter 'hw_event_period' is the number of events before waking up -a read() that is blocked on a counter fd. Zero value means a non-blocking -counter. +More hw_event_types are supported as well, but they are CPU +specific and are enumerated via /sys on a per CPU basis. Raw hw event +types can be passed in under hw_event.type if hw_event.raw is 1. +For example, to count "External bus cycles while bus lock signal asserted" +events on Intel Core CPUs, pass in a 0x4064 event type value and set +hw_event.raw to 1. 'record_type' is the type of data that a read() will provide for the counter, and it can be one of: - enum perf_record_type { - PERF_RECORD_SIMPLE, - PERF_RECORD_IRQ, - }; +/* + * IRQ-notification data record type: + */ +enum perf_counter_record_type { + PERF_RECORD_SIMPLE = 0, + PERF_RECORD_IRQ = 1, + PERF_RECORD_GROUP = 2, +}; a "simple" counter is one that counts hardware events and allows them to be read out into a u64 count value. (read() returns 8 on @@ -76,6 +111,10 @@ the IP of the interrupted context. In this case read() will return the 8-byte counter value, plus the Instruction Pointer address of the interrupted context. +The parameter 'hw_event_period' is the number of events before waking up +a read() that is blocked on a counter fd. Zero value means a non-blocking +counter. + The 'pid' parameter allows the counter to be specific to a task: pid == 0: if the pid parameter is zero, the counter is attached to the @@ -92,7 +131,7 @@ CPU: cpu >= 0: the counter is restricted to a specific CPU cpu == -1: the counter counts on all CPUs -Note: the combination of 'pid == -1' and 'cpu == -1' is not valid. +(Note: the combination of 'pid == -1' and 'cpu == -1' is not valid.) A 'pid > 0' and 'cpu == -1' counter is a per task counter that counts events of that task and 'follows' that task to whatever CPU the task @@ -102,3 +141,7 @@ their own tasks. A 'pid == -1' and 'cpu == x' counter is a per CPU counter that counts all events on CPU-x. Per CPU counters need CAP_SYS_ADMIN privilege. +Group counters are created by passing in a group_fd of another counter. +Groups are scheduled at once and can be used with PERF_RECORD_GROUP +to record multi-dimensional timestamps. + -- cgit v0.10.2 From 9b194e831fb2c322ed81a373e49620f34edc2778 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 14 Dec 2008 20:22:35 +0100 Subject: x86: implement atomic64_t on 32-bit Impact: new API Implement the atomic64_t APIs on 32-bit as well. Will be used by the performance counters code. Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h index ad5b9f6..9927e01 100644 --- a/arch/x86/include/asm/atomic_32.h +++ b/arch/x86/include/asm/atomic_32.h @@ -255,5 +255,223 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) #define smp_mb__before_atomic_inc() barrier() #define smp_mb__after_atomic_inc() barrier() +/* An 64bit atomic type */ + +typedef struct { + unsigned long long counter; +} atomic64_t; + +#define ATOMIC64_INIT(val) { (val) } + +/** + * atomic64_read - read atomic64 variable + * @v: pointer of type atomic64_t + * + * Atomically reads the value of @v. + * Doesn't imply a read memory barrier. + */ +#define __atomic64_read(ptr) ((ptr)->counter) + +static inline unsigned long long +cmpxchg8b(unsigned long long *ptr, unsigned long long old, unsigned long long new) +{ + asm volatile( + + LOCK_PREFIX "cmpxchg8b (%[ptr])\n" + + : "=A" (old) + + : [ptr] "D" (ptr), + "A" (old), + "b" (ll_low(new)), + "c" (ll_high(new)) + + : "memory"); + + return old; +} + +static inline unsigned long long +atomic64_cmpxchg(atomic64_t *ptr, unsigned long long old_val, + unsigned long long new_val) +{ + return cmpxchg8b(&ptr->counter, old_val, new_val); +} + +/** + * atomic64_set - set atomic64 variable + * @ptr: pointer to type atomic64_t + * @new_val: value to assign + * + * Atomically sets the value of @ptr to @new_val. + */ +static inline void atomic64_set(atomic64_t *ptr, unsigned long long new_val) +{ + unsigned long long old_val; + + do { + old_val = atomic_read(ptr); + } while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val); +} + +/** + * atomic64_read - read atomic64 variable + * @ptr: pointer to type atomic64_t + * + * Atomically reads the value of @ptr and returns it. + */ +static inline unsigned long long atomic64_read(atomic64_t *ptr) +{ + unsigned long long curr_val; + + do { + curr_val = __atomic64_read(ptr); + } while (atomic64_cmpxchg(ptr, curr_val, curr_val) != curr_val); + + return curr_val; +} + +/** + * atomic64_add_return - add and return + * @delta: integer value to add + * @ptr: pointer to type atomic64_t + * + * Atomically adds @delta to @ptr and returns @delta + *@ptr + */ +static inline unsigned long long +atomic64_add_return(unsigned long long delta, atomic64_t *ptr) +{ + unsigned long long old_val, new_val; + + do { + old_val = atomic_read(ptr); + new_val = old_val + delta; + + } while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val); + + return new_val; +} + +static inline long atomic64_sub_return(unsigned long long delta, atomic64_t *ptr) +{ + return atomic64_add_return(-delta, ptr); +} + +static inline long atomic64_inc_return(atomic64_t *ptr) +{ + return atomic64_add_return(1, ptr); +} + +static inline long atomic64_dec_return(atomic64_t *ptr) +{ + return atomic64_sub_return(1, ptr); +} + +/** + * atomic64_add - add integer to atomic64 variable + * @delta: integer value to add + * @ptr: pointer to type atomic64_t + * + * Atomically adds @delta to @ptr. + */ +static inline void atomic64_add(unsigned long long delta, atomic64_t *ptr) +{ + atomic64_add_return(delta, ptr); +} + +/** + * atomic64_sub - subtract the atomic64 variable + * @delta: integer value to subtract + * @ptr: pointer to type atomic64_t + * + * Atomically subtracts @delta from @ptr. + */ +static inline void atomic64_sub(unsigned long long delta, atomic64_t *ptr) +{ + atomic64_add(-delta, ptr); +} + +/** + * atomic64_sub_and_test - subtract value from variable and test result + * @delta: integer value to subtract + * @ptr: pointer to type atomic64_t + * + * Atomically subtracts @delta from @ptr and returns + * true if the result is zero, or false for all + * other cases. + */ +static inline int +atomic64_sub_and_test(unsigned long long delta, atomic64_t *ptr) +{ + unsigned long long old_val = atomic64_sub_return(delta, ptr); + + return old_val == 0; +} + +/** + * atomic64_inc - increment atomic64 variable + * @ptr: pointer to type atomic64_t + * + * Atomically increments @ptr by 1. + */ +static inline void atomic64_inc(atomic64_t *ptr) +{ + atomic64_add(1, ptr); +} + +/** + * atomic64_dec - decrement atomic64 variable + * @ptr: pointer to type atomic64_t + * + * Atomically decrements @ptr by 1. + */ +static inline void atomic64_dec(atomic64_t *ptr) +{ + atomic64_sub(1, ptr); +} + +/** + * atomic64_dec_and_test - decrement and test + * @ptr: pointer to type atomic64_t + * + * Atomically decrements @ptr by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +static inline int atomic64_dec_and_test(atomic64_t *ptr) +{ + return atomic64_sub_and_test(1, ptr); +} + +/** + * atomic64_inc_and_test - increment and test + * @ptr: pointer to type atomic64_t + * + * Atomically increments @ptr by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +static inline int atomic64_inc_and_test(atomic64_t *ptr) +{ + return atomic64_sub_and_test(-1, ptr); +} + +/** + * atomic64_add_negative - add and test if negative + * @delta: integer value to add + * @ptr: pointer to type atomic64_t + * + * Atomically adds @delta to @ptr and returns true + * if the result is negative, or false when + * result is greater than or equal to zero. + */ +static inline int +atomic64_add_negative(unsigned long long delta, atomic64_t *ptr) +{ + long long old_val = atomic64_add_return(delta, ptr); + + return old_val < 0; +} + #include #endif /* _ASM_X86_ATOMIC_32_H */ -- cgit v0.10.2 From ee06094f8279e1312fc0a31591320cc7b6f0ab1e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 13 Dec 2008 09:00:03 +0100 Subject: perfcounters: restructure x86 counter math Impact: restructure code Change counter math from absolute values to clear delta logic. We try to extract elapsed deltas from the raw hw counter - and put that into the generic counter. Signed-off-by: Ingo Molnar diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f2fdc18..fe94490 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -643,7 +643,7 @@ config X86_UP_IOAPIC config X86_LOCAL_APIC def_bool y depends on X86_64 || (X86_32 && (X86_UP_APIC || (SMP && !X86_VOYAGER) || X86_GENERICARCH)) - select HAVE_PERF_COUNTERS + select HAVE_PERF_COUNTERS if (!M386 && !M486) config X86_IO_APIC def_bool y diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index b903f8d..5afae13 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -54,6 +54,48 @@ const int intel_perfmon_event_map[] = const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map); /* + * Propagate counter elapsed time into the generic counter. + * Can only be executed on the CPU where the counter is active. + * Returns the delta events processed. + */ +static void +x86_perf_counter_update(struct perf_counter *counter, + struct hw_perf_counter *hwc, int idx) +{ + u64 prev_raw_count, new_raw_count, delta; + + WARN_ON_ONCE(counter->state != PERF_COUNTER_STATE_ACTIVE); + /* + * Careful: an NMI might modify the previous counter value. + * + * Our tactic to handle this is to first atomically read and + * exchange a new raw count - then add that new-prev delta + * count to the generic counter atomically: + */ +again: + prev_raw_count = atomic64_read(&hwc->prev_count); + rdmsrl(hwc->counter_base + idx, new_raw_count); + + if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count) + goto again; + + /* + * Now we have the new raw value and have updated the prev + * timestamp already. We can now calculate the elapsed delta + * (counter-)time and add that to the generic counter. + * + * Careful, not all hw sign-extends above the physical width + * of the count, so we do that by clipping the delta to 32 bits: + */ + delta = (u64)(u32)((s32)new_raw_count - (s32)prev_raw_count); + WARN_ON_ONCE((int)delta < 0); + + atomic64_add(delta, &counter->count); + atomic64_sub(delta, &hwc->period_left); +} + +/* * Setup the hardware configuration for a given hw_event_type */ static int __hw_perf_counter_init(struct perf_counter *counter) @@ -90,10 +132,10 @@ static int __hw_perf_counter_init(struct perf_counter *counter) * so we install an artificial 1<<31 period regardless of * the generic counter period: */ - if (!hwc->irq_period) + if ((s64)hwc->irq_period <= 0 || hwc->irq_period > 0x7FFFFFFF) hwc->irq_period = 0x7FFFFFFF; - hwc->next_count = -(s32)hwc->irq_period; + atomic64_set(&hwc->period_left, hwc->irq_period); /* * Raw event type provide the config in the event structure @@ -118,12 +160,6 @@ void hw_perf_enable_all(void) wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, perf_counter_mask, 0); } -void hw_perf_restore(u64 ctrl) -{ - wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, ctrl, 0); -} -EXPORT_SYMBOL_GPL(hw_perf_restore); - u64 hw_perf_save_disable(void) { u64 ctrl; @@ -134,27 +170,74 @@ u64 hw_perf_save_disable(void) } EXPORT_SYMBOL_GPL(hw_perf_save_disable); +void hw_perf_restore(u64 ctrl) +{ + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, ctrl, 0); +} +EXPORT_SYMBOL_GPL(hw_perf_restore); + static inline void -__x86_perf_counter_disable(struct hw_perf_counter *hwc, unsigned int idx) +__x86_perf_counter_disable(struct perf_counter *counter, + struct hw_perf_counter *hwc, unsigned int idx) { - wrmsr(hwc->config_base + idx, hwc->config, 0); + int err; + + err = wrmsr_safe(hwc->config_base + idx, hwc->config, 0); + WARN_ON_ONCE(err); } -static DEFINE_PER_CPU(u64, prev_next_count[MAX_HW_COUNTERS]); +static DEFINE_PER_CPU(u64, prev_left[MAX_HW_COUNTERS]); -static void __hw_perf_counter_set_period(struct hw_perf_counter *hwc, int idx) +/* + * Set the next IRQ period, based on the hwc->period_left value. + * To be called with the counter disabled in hw: + */ +static void +__hw_perf_counter_set_period(struct perf_counter *counter, + struct hw_perf_counter *hwc, int idx) { - per_cpu(prev_next_count[idx], smp_processor_id()) = hwc->next_count; + s32 left = atomic64_read(&hwc->period_left); + s32 period = hwc->irq_period; + + WARN_ON_ONCE(period <= 0); + + /* + * If we are way outside a reasoable range then just skip forward: + */ + if (unlikely(left <= -period)) { + left = period; + atomic64_set(&hwc->period_left, left); + } + + if (unlikely(left <= 0)) { + left += period; + atomic64_set(&hwc->period_left, left); + } - wrmsr(hwc->counter_base + idx, hwc->next_count, 0); + WARN_ON_ONCE(left <= 0); + + per_cpu(prev_left[idx], smp_processor_id()) = left; + + /* + * The hw counter starts counting from this counter offset, + * mark it to be able to extra future deltas: + */ + atomic64_set(&hwc->prev_count, (u64)(s64)-left); + + wrmsr(hwc->counter_base + idx, -left, 0); } -static void __x86_perf_counter_enable(struct hw_perf_counter *hwc, int idx) +static void +__x86_perf_counter_enable(struct perf_counter *counter, + struct hw_perf_counter *hwc, int idx) { wrmsr(hwc->config_base + idx, hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE, 0); } +/* + * Find a PMC slot for the freshly enabled / scheduled in counter: + */ static void x86_perf_counter_enable(struct perf_counter *counter) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); @@ -170,55 +253,17 @@ static void x86_perf_counter_enable(struct perf_counter *counter) perf_counters_lapic_init(hwc->nmi); - __x86_perf_counter_disable(hwc, idx); + __x86_perf_counter_disable(counter, hwc, idx); cpuc->counters[idx] = counter; - __hw_perf_counter_set_period(hwc, idx); - __x86_perf_counter_enable(hwc, idx); -} - -static void __hw_perf_save_counter(struct perf_counter *counter, - struct hw_perf_counter *hwc, int idx) -{ - s64 raw = -1; - s64 delta; - - /* - * Get the raw hw counter value: - */ - rdmsrl(hwc->counter_base + idx, raw); - - /* - * Rebase it to zero (it started counting at -irq_period), - * to see the delta since ->prev_count: - */ - delta = (s64)hwc->irq_period + (s64)(s32)raw; - - atomic64_counter_set(counter, hwc->prev_count + delta); - - /* - * Adjust the ->prev_count offset - if we went beyond - * irq_period of units, then we got an IRQ and the counter - * was set back to -irq_period: - */ - while (delta >= (s64)hwc->irq_period) { - hwc->prev_count += hwc->irq_period; - delta -= (s64)hwc->irq_period; - } - - /* - * Calculate the next raw counter value we'll write into - * the counter at the next sched-in time: - */ - delta -= (s64)hwc->irq_period; - - hwc->next_count = (s32)delta; + __hw_perf_counter_set_period(counter, hwc, idx); + __x86_perf_counter_enable(counter, hwc, idx); } void perf_counter_print_debug(void) { - u64 ctrl, status, overflow, pmc_ctrl, pmc_count, next_count; + u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left; int cpu, idx; if (!nr_hw_counters) @@ -241,14 +286,14 @@ void perf_counter_print_debug(void) rdmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, pmc_ctrl); rdmsrl(MSR_ARCH_PERFMON_PERFCTR0 + idx, pmc_count); - next_count = per_cpu(prev_next_count[idx], cpu); + prev_left = per_cpu(prev_left[idx], cpu); printk(KERN_INFO "CPU#%d: PMC%d ctrl: %016llx\n", cpu, idx, pmc_ctrl); printk(KERN_INFO "CPU#%d: PMC%d count: %016llx\n", cpu, idx, pmc_count); - printk(KERN_INFO "CPU#%d: PMC%d next: %016llx\n", - cpu, idx, next_count); + printk(KERN_INFO "CPU#%d: PMC%d left: %016llx\n", + cpu, idx, prev_left); } local_irq_enable(); } @@ -259,29 +304,16 @@ static void x86_perf_counter_disable(struct perf_counter *counter) struct hw_perf_counter *hwc = &counter->hw; unsigned int idx = hwc->idx; - __x86_perf_counter_disable(hwc, idx); + __x86_perf_counter_disable(counter, hwc, idx); clear_bit(idx, cpuc->used); cpuc->counters[idx] = NULL; - __hw_perf_save_counter(counter, hwc, idx); -} -static void x86_perf_counter_read(struct perf_counter *counter) -{ - struct hw_perf_counter *hwc = &counter->hw; - unsigned long addr = hwc->counter_base + hwc->idx; - s64 offs, val = -1LL; - s32 val32; - - /* Careful: NMI might modify the counter offset */ - do { - offs = hwc->prev_count; - rdmsrl(addr, val); - } while (offs != hwc->prev_count); - - val32 = (s32) val; - val = (s64)hwc->irq_period + (s64)val32; - atomic64_counter_set(counter, hwc->prev_count + val); + /* + * Drain the remaining delta count out of a counter + * that we are disabling: + */ + x86_perf_counter_update(counter, hwc, idx); } static void perf_store_irq_data(struct perf_counter *counter, u64 data) @@ -299,7 +331,8 @@ static void perf_store_irq_data(struct perf_counter *counter, u64 data) } /* - * NMI-safe enable method: + * Save and restart an expired counter. Called by NMI contexts, + * so it has to be careful about preempting normal counter ops: */ static void perf_save_and_restart(struct perf_counter *counter) { @@ -309,45 +342,25 @@ static void perf_save_and_restart(struct perf_counter *counter) rdmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, pmc_ctrl); - __hw_perf_save_counter(counter, hwc, idx); - __hw_perf_counter_set_period(hwc, idx); + x86_perf_counter_update(counter, hwc, idx); + __hw_perf_counter_set_period(counter, hwc, idx); if (pmc_ctrl & ARCH_PERFMON_EVENTSEL0_ENABLE) - __x86_perf_counter_enable(hwc, idx); + __x86_perf_counter_enable(counter, hwc, idx); } static void perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown) { struct perf_counter *counter, *group_leader = sibling->group_leader; - int bit; - - /* - * Store the counter's own timestamp first: - */ - perf_store_irq_data(sibling, sibling->hw_event.type); - perf_store_irq_data(sibling, atomic64_counter_read(sibling)); /* - * Then store sibling timestamps (if any): + * Store sibling timestamps (if any): */ list_for_each_entry(counter, &group_leader->sibling_list, list_entry) { - if (counter->state != PERF_COUNTER_STATE_ACTIVE) { - /* - * When counter was not in the overflow mask, we have to - * read it from hardware. We read it as well, when it - * has not been read yet and clear the bit in the - * status mask. - */ - bit = counter->hw.idx; - if (!test_bit(bit, (unsigned long *) overflown) || - test_bit(bit, (unsigned long *) status)) { - clear_bit(bit, (unsigned long *) status); - perf_save_and_restart(counter); - } - } + x86_perf_counter_update(counter, &counter->hw, counter->hw.idx); perf_store_irq_data(sibling, counter->hw_event.type); - perf_store_irq_data(sibling, atomic64_counter_read(counter)); + perf_store_irq_data(sibling, atomic64_read(&counter->count)); } } @@ -540,6 +553,11 @@ void __init init_hw_perf_counters(void) perf_counters_initialized = true; } +static void x86_perf_counter_read(struct perf_counter *counter) +{ + x86_perf_counter_update(counter, &counter->hw, counter->hw.idx); +} + static const struct hw_perf_counter_ops x86_perf_counter_ops = { .hw_perf_counter_enable = x86_perf_counter_enable, .hw_perf_counter_disable = x86_perf_counter_disable, diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 8cb095f..7246028 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -91,14 +91,16 @@ struct perf_counter_hw_event { * struct hw_perf_counter - performance counter hardware details: */ struct hw_perf_counter { +#ifdef CONFIG_PERF_COUNTERS u64 config; unsigned long config_base; unsigned long counter_base; int nmi; unsigned int idx; - u64 prev_count; + atomic64_t prev_count; u64 irq_period; - s32 next_count; + atomic64_t period_left; +#endif }; /* @@ -140,17 +142,15 @@ enum perf_counter_active_state { * struct perf_counter - performance counter kernel representation: */ struct perf_counter { +#ifdef CONFIG_PERF_COUNTERS struct list_head list_entry; struct list_head sibling_list; struct perf_counter *group_leader; const struct hw_perf_counter_ops *hw_ops; enum perf_counter_active_state state; -#if BITS_PER_LONG == 64 atomic64_t count; -#else - atomic_t count32[2]; -#endif + struct perf_counter_hw_event hw_event; struct hw_perf_counter hw; @@ -172,6 +172,7 @@ struct perf_counter { struct perf_data *irqdata; struct perf_data *usrdata; struct perf_data data[2]; +#endif }; /** @@ -220,8 +221,6 @@ extern void perf_counter_notify(struct pt_regs *regs); extern void perf_counter_print_debug(void); extern u64 hw_perf_save_disable(void); extern void hw_perf_restore(u64 ctrl); -extern void atomic64_counter_set(struct perf_counter *counter, u64 val64); -extern u64 atomic64_counter_read(struct perf_counter *counter); extern int perf_counter_task_disable(void); extern int perf_counter_task_enable(void); diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 559130b..416861c 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -44,67 +44,9 @@ hw_perf_counter_init(struct perf_counter *counter) } u64 __weak hw_perf_save_disable(void) { return 0; } -void __weak hw_perf_restore(u64 ctrl) { } +void __weak hw_perf_restore(u64 ctrl) { } void __weak hw_perf_counter_setup(void) { } -#if BITS_PER_LONG == 64 - -/* - * Read the cached counter in counter safe against cross CPU / NMI - * modifications. 64 bit version - no complications. - */ -static inline u64 perf_counter_read_safe(struct perf_counter *counter) -{ - return (u64) atomic64_read(&counter->count); -} - -void atomic64_counter_set(struct perf_counter *counter, u64 val) -{ - atomic64_set(&counter->count, val); -} - -u64 atomic64_counter_read(struct perf_counter *counter) -{ - return atomic64_read(&counter->count); -} - -#else - -/* - * Read the cached counter in counter safe against cross CPU / NMI - * modifications. 32 bit version. - */ -static u64 perf_counter_read_safe(struct perf_counter *counter) -{ - u32 cntl, cnth; - - local_irq_disable(); - do { - cnth = atomic_read(&counter->count32[1]); - cntl = atomic_read(&counter->count32[0]); - } while (cnth != atomic_read(&counter->count32[1])); - - local_irq_enable(); - - return cntl | ((u64) cnth) << 32; -} - -void atomic64_counter_set(struct perf_counter *counter, u64 val64) -{ - u32 *val32 = (void *)&val64; - - atomic_set(counter->count32 + 0, *(val32 + 0)); - atomic_set(counter->count32 + 1, *(val32 + 1)); -} - -u64 atomic64_counter_read(struct perf_counter *counter) -{ - return atomic_read(counter->count32 + 0) | - (u64) atomic_read(counter->count32 + 1) << 32; -} - -#endif - static void list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx) { @@ -280,11 +222,11 @@ static void __perf_install_in_context(void *info) ctx->nr_counters++; if (cpuctx->active_oncpu < perf_max_counters) { - counter->hw_ops->hw_perf_counter_enable(counter); counter->state = PERF_COUNTER_STATE_ACTIVE; counter->oncpu = cpu; ctx->nr_active++; cpuctx->active_oncpu++; + counter->hw_ops->hw_perf_counter_enable(counter); } if (!ctx->task && cpuctx->max_pertask) @@ -624,7 +566,7 @@ static u64 perf_counter_read(struct perf_counter *counter) __hw_perf_counter_read, counter, 1); } - return perf_counter_read_safe(counter); + return atomic64_read(&counter->count); } /* @@ -921,7 +863,7 @@ static void cpu_clock_perf_counter_read(struct perf_counter *counter) { int cpu = raw_smp_processor_id(); - atomic64_counter_set(counter, cpu_clock(cpu)); + atomic64_set(&counter->count, cpu_clock(cpu)); } static const struct hw_perf_counter_ops perf_ops_cpu_clock = { @@ -940,7 +882,7 @@ static void task_clock_perf_counter_disable(struct perf_counter *counter) static void task_clock_perf_counter_read(struct perf_counter *counter) { - atomic64_counter_set(counter, current->se.sum_exec_runtime); + atomic64_set(&counter->count, current->se.sum_exec_runtime); } static const struct hw_perf_counter_ops perf_ops_task_clock = { -- cgit v0.10.2 From 9b51f66dcb09ac5eb6bc68fc111d5c7a1e0131d6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 12 Dec 2008 13:49:45 +0100 Subject: perfcounters: implement "counter inheritance" Impact: implement new performance feature Counter inheritance can be used to run performance counters in a workload, transparently - and pipe back the counter results to the parent counter. Inheritance for performance counters works the following way: when creating a counter it can be marked with the .inherit=1 flag. Such counters are then 'inherited' by all child tasks (be they fork()-ed or clone()-ed). These counters get inherited through exec() boundaries as well (except through setuid boundaries). The counter values get added back to the parent counter(s) when the child task(s) exit - much like stime/utime statistics are gathered. So inherited counters are ideal to gather summary statistics about an application's behavior via shell commands, without having to modify that application. The timec.c command utilizes counter inheritance: http://redhat.com/~mingo/perfcounters/timec.c Sample output: $ ./timec -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null Performance counter stats for 'ls': 163516953 instructions 2295 cache-misses 2855182 branch-misses Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 7246028..e5d25bf 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -75,10 +75,11 @@ struct perf_counter_hw_event { u64 irq_period; u32 record_type; - u32 disabled : 1, /* off by default */ - nmi : 1, /* NMI sampling */ - raw : 1, /* raw event type */ - __reserved_1 : 29; + u32 disabled : 1, /* off by default */ + nmi : 1, /* NMI sampling */ + raw : 1, /* raw event type */ + inherit : 1, /* children inherit it */ + __reserved_1 : 28; u64 __reserved_2; }; @@ -138,6 +139,8 @@ enum perf_counter_active_state { PERF_COUNTER_STATE_ACTIVE = 1, }; +struct file; + /** * struct perf_counter - performance counter kernel representation: */ @@ -156,7 +159,10 @@ struct perf_counter { struct perf_counter_context *ctx; struct task_struct *task; + struct file *filp; + unsigned int nr_inherited; + struct perf_counter *parent; /* * Protect attach/detach: */ @@ -210,13 +216,16 @@ struct perf_cpu_context { extern int perf_max_counters; #ifdef CONFIG_PERF_COUNTERS +extern void +perf_counter_show(struct perf_counter *counter, char *str, int trace); extern const struct hw_perf_counter_ops * hw_perf_counter_init(struct perf_counter *counter); extern void perf_counter_task_sched_in(struct task_struct *task, int cpu); extern void perf_counter_task_sched_out(struct task_struct *task, int cpu); extern void perf_counter_task_tick(struct task_struct *task, int cpu); -extern void perf_counter_init_task(struct task_struct *task); +extern void perf_counter_init_task(struct task_struct *child); +extern void perf_counter_exit_task(struct task_struct *child); extern void perf_counter_notify(struct pt_regs *regs); extern void perf_counter_print_debug(void); extern u64 hw_perf_save_disable(void); @@ -226,12 +235,15 @@ extern int perf_counter_task_enable(void); #else static inline void +perf_counter_show(struct perf_counter *counter, char *str, int trace) { } +static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } static inline void perf_counter_task_sched_out(struct task_struct *task, int cpu) { } static inline void perf_counter_task_tick(struct task_struct *task, int cpu) { } -static inline void perf_counter_init_task(struct task_struct *task) { } +static inline void perf_counter_init_task(struct task_struct *child) { } +static inline void perf_counter_exit_task(struct task_struct *child) { } static inline void perf_counter_notify(struct pt_regs *regs) { } static inline void perf_counter_print_debug(void) { } static inline void hw_perf_restore(u64 ctrl) { } diff --git a/kernel/exit.c b/kernel/exit.c index 2d8be7e..d336c90 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1093,11 +1093,12 @@ NORET_TYPE void do_exit(long code) mpol_put(tsk->mempolicy); tsk->mempolicy = NULL; #endif -#ifdef CONFIG_FUTEX /* - * This must happen late, after the PID is not - * hashed anymore: + * These must happen late, after the PID is not + * hashed anymore, but still at a point that may sleep: */ + perf_counter_exit_task(tsk); +#ifdef CONFIG_FUTEX if (unlikely(!list_empty(&tsk->pi_state_list))) exit_pi_state_list(tsk); if (unlikely(current->pi_state_cache)) diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 416861c..f5e81dd 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -80,8 +80,6 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx) list_del_init(&sibling->list_entry); list_add_tail(&sibling->list_entry, &ctx->counter_list); - WARN_ON_ONCE(!sibling->group_leader); - WARN_ON_ONCE(sibling->group_leader == sibling); sibling->group_leader = sibling; } } @@ -97,6 +95,7 @@ static void __perf_counter_remove_from_context(void *info) struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_counter *counter = info; struct perf_counter_context *ctx = counter->ctx; + unsigned long flags; u64 perf_flags; /* @@ -107,7 +106,7 @@ static void __perf_counter_remove_from_context(void *info) if (ctx->task && cpuctx->task_ctx != ctx) return; - spin_lock(&ctx->lock); + spin_lock_irqsave(&ctx->lock, flags); if (counter->state == PERF_COUNTER_STATE_ACTIVE) { counter->hw_ops->hw_perf_counter_disable(counter); @@ -136,7 +135,7 @@ static void __perf_counter_remove_from_context(void *info) perf_max_counters - perf_reserved_percpu); } - spin_unlock(&ctx->lock); + spin_unlock_irqrestore(&ctx->lock, flags); } @@ -199,6 +198,7 @@ static void __perf_install_in_context(void *info) struct perf_counter *counter = info; struct perf_counter_context *ctx = counter->ctx; int cpu = smp_processor_id(); + unsigned long flags; u64 perf_flags; /* @@ -209,7 +209,7 @@ static void __perf_install_in_context(void *info) if (ctx->task && cpuctx->task_ctx != ctx) return; - spin_lock(&ctx->lock); + spin_lock_irqsave(&ctx->lock, flags); /* * Protect the list operation against NMI by disabling the @@ -232,7 +232,7 @@ static void __perf_install_in_context(void *info) if (!ctx->task && cpuctx->max_pertask) cpuctx->max_pertask--; - spin_unlock(&ctx->lock); + spin_unlock_irqrestore(&ctx->lock, flags); } /* @@ -446,10 +446,9 @@ int perf_counter_task_disable(void) */ perf_flags = hw_perf_save_disable(); - list_for_each_entry(counter, &ctx->counter_list, list_entry) { - WARN_ON_ONCE(counter->state == PERF_COUNTER_STATE_ACTIVE); + list_for_each_entry(counter, &ctx->counter_list, list_entry) counter->state = PERF_COUNTER_STATE_OFF; - } + hw_perf_restore(perf_flags); spin_unlock(&ctx->lock); @@ -526,26 +525,6 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) } /* - * Initialize the perf_counter context in a task_struct: - */ -static void -__perf_counter_init_context(struct perf_counter_context *ctx, - struct task_struct *task) -{ - spin_lock_init(&ctx->lock); - INIT_LIST_HEAD(&ctx->counter_list); - ctx->nr_counters = 0; - ctx->task = task; -} -/* - * Initialize the perf_counter context in task_struct - */ -void perf_counter_init_task(struct task_struct *task) -{ - __perf_counter_init_context(&task->perf_counter_ctx, task); -} - -/* * Cross CPU call to read the hardware counter */ static void __hw_perf_counter_read(void *info) @@ -663,7 +642,6 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu) cpuctx = &per_cpu(perf_cpu_context, cpu); ctx = &cpuctx->ctx; - WARN_ON_ONCE(ctx->task); return ctx; } @@ -915,12 +893,13 @@ sw_perf_counter_init(struct perf_counter *counter) static struct perf_counter * perf_counter_alloc(struct perf_counter_hw_event *hw_event, int cpu, - struct perf_counter *group_leader) + struct perf_counter *group_leader, + gfp_t gfpflags) { const struct hw_perf_counter_ops *hw_ops; struct perf_counter *counter; - counter = kzalloc(sizeof(*counter), GFP_KERNEL); + counter = kzalloc(sizeof(*counter), gfpflags); if (!counter) return NULL; @@ -947,9 +926,8 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, hw_ops = NULL; if (!hw_event->raw && hw_event->type < 0) hw_ops = sw_perf_counter_init(counter); - if (!hw_ops) { + if (!hw_ops) hw_ops = hw_perf_counter_init(counter); - } if (!hw_ops) { kfree(counter); @@ -975,8 +953,10 @@ sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user, struct perf_counter *counter, *group_leader; struct perf_counter_hw_event hw_event; struct perf_counter_context *ctx; + struct file *counter_file = NULL; struct file *group_file = NULL; int fput_needed = 0; + int fput_needed2 = 0; int ret; if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0) @@ -1017,25 +997,29 @@ sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user, } ret = -EINVAL; - counter = perf_counter_alloc(&hw_event, cpu, group_leader); + counter = perf_counter_alloc(&hw_event, cpu, group_leader, GFP_KERNEL); if (!counter) goto err_put_context; - perf_install_in_context(ctx, counter, cpu); - ret = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0); if (ret < 0) - goto err_remove_free_put_context; + goto err_free_put_context; + + counter_file = fget_light(ret, &fput_needed2); + if (!counter_file) + goto err_free_put_context; + + counter->filp = counter_file; + perf_install_in_context(ctx, counter, cpu); + + fput_light(counter_file, fput_needed2); out_fput: fput_light(group_file, fput_needed); return ret; -err_remove_free_put_context: - mutex_lock(&counter->mutex); - perf_counter_remove_from_context(counter); - mutex_unlock(&counter->mutex); +err_free_put_context: kfree(counter); err_put_context: @@ -1044,6 +1028,186 @@ err_put_context: goto out_fput; } +/* + * Initialize the perf_counter context in a task_struct: + */ +static void +__perf_counter_init_context(struct perf_counter_context *ctx, + struct task_struct *task) +{ + memset(ctx, 0, sizeof(*ctx)); + spin_lock_init(&ctx->lock); + INIT_LIST_HEAD(&ctx->counter_list); + ctx->task = task; +} + +/* + * inherit a counter from parent task to child task: + */ +static int +inherit_counter(struct perf_counter *parent_counter, + struct task_struct *parent, + struct perf_counter_context *parent_ctx, + struct task_struct *child, + struct perf_counter_context *child_ctx) +{ + struct perf_counter *child_counter; + + child_counter = perf_counter_alloc(&parent_counter->hw_event, + parent_counter->cpu, NULL, + GFP_ATOMIC); + if (!child_counter) + return -ENOMEM; + + /* + * Link it up in the child's context: + */ + child_counter->ctx = child_ctx; + child_counter->task = child; + list_add_counter(child_counter, child_ctx); + child_ctx->nr_counters++; + + child_counter->parent = parent_counter; + parent_counter->nr_inherited++; + /* + * inherit into child's child as well: + */ + child_counter->hw_event.inherit = 1; + + /* + * Get a reference to the parent filp - we will fput it + * when the child counter exits. This is safe to do because + * we are in the parent and we know that the filp still + * exists and has a nonzero count: + */ + atomic_long_inc(&parent_counter->filp->f_count); + + return 0; +} + +static void +__perf_counter_exit_task(struct task_struct *child, + struct perf_counter *child_counter, + struct perf_counter_context *child_ctx) +{ + struct perf_counter *parent_counter; + u64 parent_val, child_val; + u64 perf_flags; + + /* + * Disable and unlink this counter. + * + * Be careful about zapping the list - IRQ/NMI context + * could still be processing it: + */ + local_irq_disable(); + perf_flags = hw_perf_save_disable(); + + if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) + child_counter->hw_ops->hw_perf_counter_disable(child_counter); + list_del_init(&child_counter->list_entry); + + hw_perf_restore(perf_flags); + local_irq_enable(); + + parent_counter = child_counter->parent; + /* + * It can happen that parent exits first, and has counters + * that are still around due to the child reference. These + * counters need to be zapped - but otherwise linger. + */ + if (!parent_counter) + return; + + parent_val = atomic64_read(&parent_counter->count); + child_val = atomic64_read(&child_counter->count); + + /* + * Add back the child's count to the parent's count: + */ + atomic64_add(child_val, &parent_counter->count); + + fput(parent_counter->filp); + + kfree(child_counter); +} + +/* + * When a child task exist, feed back counter values to parent counters. + * + * Note: we are running in child context, but the PID is not hashed + * anymore so new counters will not be added. + */ +void perf_counter_exit_task(struct task_struct *child) +{ + struct perf_counter *child_counter, *tmp; + struct perf_counter_context *child_ctx; + + child_ctx = &child->perf_counter_ctx; + + if (likely(!child_ctx->nr_counters)) + return; + + list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list, + list_entry) + __perf_counter_exit_task(child, child_counter, child_ctx); +} + +/* + * Initialize the perf_counter context in task_struct + */ +void perf_counter_init_task(struct task_struct *child) +{ + struct perf_counter_context *child_ctx, *parent_ctx; + struct perf_counter *counter, *parent_counter; + struct task_struct *parent = current; + unsigned long flags; + + child_ctx = &child->perf_counter_ctx; + parent_ctx = &parent->perf_counter_ctx; + + __perf_counter_init_context(child_ctx, child); + + /* + * This is executed from the parent task context, so inherit + * counters that have been marked for cloning: + */ + + if (likely(!parent_ctx->nr_counters)) + return; + + /* + * Lock the parent list. No need to lock the child - not PID + * hashed yet and not running, so nobody can access it. + */ + spin_lock_irqsave(&parent_ctx->lock, flags); + + /* + * We dont have to disable NMIs - we are only looking at + * the list, not manipulating it: + */ + list_for_each_entry(counter, &parent_ctx->counter_list, list_entry) { + if (!counter->hw_event.inherit || counter->group_leader != counter) + continue; + + /* + * Instead of creating recursive hierarchies of counters, + * we link inheritd counters back to the original parent, + * which has a filp for sure, which we use as the reference + * count: + */ + parent_counter = counter; + if (counter->parent) + parent_counter = counter->parent; + + if (inherit_counter(parent_counter, parent, + parent_ctx, child, child_ctx)) + break; + } + + spin_unlock_irqrestore(&parent_ctx->lock, flags); +} + static void __cpuinit perf_counter_init_cpu(int cpu) { struct perf_cpu_context *cpuctx; -- cgit v0.10.2 From 8cb391e8786c8072367f0aeb90551903fef074ba Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 14 Dec 2008 12:22:31 +0100 Subject: perfcounters: fix task clock counter Impact: bugfix Update the task clock counter to the new math. Signed-off-by: Ingo Molnar diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index f5e81dd..1f81cde 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -850,17 +850,36 @@ static const struct hw_perf_counter_ops perf_ops_cpu_clock = { .hw_perf_counter_read = cpu_clock_perf_counter_read, }; -static void task_clock_perf_counter_enable(struct perf_counter *counter) +static void task_clock_perf_counter_update(struct perf_counter *counter) { + u64 prev, now; + s64 delta; + + prev = atomic64_read(&counter->hw.prev_count); + now = current->se.sum_exec_runtime; + + atomic64_set(&counter->hw.prev_count, now); + + delta = now - prev; + if (WARN_ON_ONCE(delta < 0)) + delta = 0; + + atomic64_add(delta, &counter->count); } -static void task_clock_perf_counter_disable(struct perf_counter *counter) +static void task_clock_perf_counter_read(struct perf_counter *counter) { + task_clock_perf_counter_update(counter); } -static void task_clock_perf_counter_read(struct perf_counter *counter) +static void task_clock_perf_counter_enable(struct perf_counter *counter) +{ + atomic64_set(&counter->hw.prev_count, current->se.sum_exec_runtime); +} + +static void task_clock_perf_counter_disable(struct perf_counter *counter) { - atomic64_set(&counter->count, current->se.sum_exec_runtime); + task_clock_perf_counter_update(counter); } static const struct hw_perf_counter_ops perf_ops_task_clock = { -- cgit v0.10.2 From 5d6a27d8a096868ae313f71f563b06074a7e34fe Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 14 Dec 2008 12:28:33 +0100 Subject: perfcounters: add context switch counter Impact: add new feature, new sw counter Add a counter that counts the number of context-switches a task is doing. Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index e5d25bf..d2a1656 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -53,8 +53,8 @@ enum hw_event_types { /* * Future software events: */ - /* PERF_COUNT_PAGE_FAULTS = -3, - PERF_COUNT_CONTEXT_SWITCHES = -4, */ + PERF_COUNT_PAGE_FAULTS = -3, + PERF_COUNT_CONTEXT_SWITCHES = -4, }; /* diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 1f81cde..0928709 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -888,6 +888,54 @@ static const struct hw_perf_counter_ops perf_ops_task_clock = { .hw_perf_counter_read = task_clock_perf_counter_read, }; +static u64 get_context_switches(void) +{ + struct task_struct *curr = current; + + return curr->nvcsw + curr->nivcsw; +} + +static void context_switches_perf_counter_update(struct perf_counter *counter) +{ + u64 prev, now; + s64 delta; + + prev = atomic64_read(&counter->hw.prev_count); + now = get_context_switches(); + + atomic64_set(&counter->hw.prev_count, now); + + delta = now - prev; + if (WARN_ON_ONCE(delta < 0)) + delta = 0; + + atomic64_add(delta, &counter->count); +} + +static void context_switches_perf_counter_read(struct perf_counter *counter) +{ + context_switches_perf_counter_update(counter); +} + +static void context_switches_perf_counter_enable(struct perf_counter *counter) +{ + /* + * ->nvcsw + curr->nivcsw is a per-task value already, + * so we dont have to clear it on switch-in. + */ +} + +static void context_switches_perf_counter_disable(struct perf_counter *counter) +{ + context_switches_perf_counter_update(counter); +} + +static const struct hw_perf_counter_ops perf_ops_context_switches = { + .hw_perf_counter_enable = context_switches_perf_counter_enable, + .hw_perf_counter_disable = context_switches_perf_counter_disable, + .hw_perf_counter_read = context_switches_perf_counter_read, +}; + static const struct hw_perf_counter_ops * sw_perf_counter_init(struct perf_counter *counter) { @@ -900,6 +948,9 @@ sw_perf_counter_init(struct perf_counter *counter) case PERF_COUNT_TASK_CLOCK: hw_ops = &perf_ops_task_clock; break; + case PERF_COUNT_CONTEXT_SWITCHES: + hw_ops = &perf_ops_context_switches; + break; default: break; } -- cgit v0.10.2 From 6c594c21fcb02c662f11c97be4d7d2b73060a205 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 14 Dec 2008 12:34:15 +0100 Subject: perfcounters: add task migrations counter Impact: add new feature, new sw counter Add a counter that counts the number of cross-CPU migrations a task is suffering. Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index d2a1656..f30486f 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -42,6 +42,8 @@ enum hw_event_types { PERF_COUNT_BRANCH_INSTRUCTIONS = 4, PERF_COUNT_BRANCH_MISSES = 5, + PERF_HW_EVENTS_MAX = 6, + /* * Special "software" counters provided by the kernel, even if * the hardware does not support performance counters. These @@ -50,11 +52,11 @@ enum hw_event_types { */ PERF_COUNT_CPU_CLOCK = -1, PERF_COUNT_TASK_CLOCK = -2, - /* - * Future software events: - */ PERF_COUNT_PAGE_FAULTS = -3, PERF_COUNT_CONTEXT_SWITCHES = -4, + PERF_COUNT_CPU_MIGRATIONS = -5, + + PERF_SW_EVENTS_MIN = -6, }; /* diff --git a/include/linux/sched.h b/include/linux/sched.h index 4c53027..2e15be8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1014,6 +1014,8 @@ struct sched_entity { u64 last_wakeup; u64 avg_overlap; + u64 nr_migrations; + #ifdef CONFIG_SCHEDSTATS u64 wait_start; u64 wait_max; @@ -1029,7 +1031,6 @@ struct sched_entity { u64 exec_max; u64 slice_max; - u64 nr_migrations; u64 nr_migrations_cold; u64 nr_failed_migrations_affine; u64 nr_failed_migrations_running; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 0928709..fb11e35 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -936,6 +936,52 @@ static const struct hw_perf_counter_ops perf_ops_context_switches = { .hw_perf_counter_read = context_switches_perf_counter_read, }; +static inline u64 get_cpu_migrations(void) +{ + return current->se.nr_migrations; +} + +static void cpu_migrations_perf_counter_update(struct perf_counter *counter) +{ + u64 prev, now; + s64 delta; + + prev = atomic64_read(&counter->hw.prev_count); + now = get_cpu_migrations(); + + atomic64_set(&counter->hw.prev_count, now); + + delta = now - prev; + if (WARN_ON_ONCE(delta < 0)) + delta = 0; + + atomic64_add(delta, &counter->count); +} + +static void cpu_migrations_perf_counter_read(struct perf_counter *counter) +{ + cpu_migrations_perf_counter_update(counter); +} + +static void cpu_migrations_perf_counter_enable(struct perf_counter *counter) +{ + /* + * se.nr_migrations is a per-task value already, + * so we dont have to clear it on switch-in. + */ +} + +static void cpu_migrations_perf_counter_disable(struct perf_counter *counter) +{ + cpu_migrations_perf_counter_update(counter); +} + +static const struct hw_perf_counter_ops perf_ops_cpu_migrations = { + .hw_perf_counter_enable = cpu_migrations_perf_counter_enable, + .hw_perf_counter_disable = cpu_migrations_perf_counter_disable, + .hw_perf_counter_read = cpu_migrations_perf_counter_read, +}; + static const struct hw_perf_counter_ops * sw_perf_counter_init(struct perf_counter *counter) { @@ -951,6 +997,9 @@ sw_perf_counter_init(struct perf_counter *counter) case PERF_COUNT_CONTEXT_SWITCHES: hw_ops = &perf_ops_context_switches; break; + case PERF_COUNT_CPU_MIGRATIONS: + hw_ops = &perf_ops_cpu_migrations; + break; default: break; } diff --git a/kernel/sched.c b/kernel/sched.c index 5c3f410..382cfdb 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1852,12 +1852,14 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) p->se.sleep_start -= clock_offset; if (p->se.block_start) p->se.block_start -= clock_offset; +#endif if (old_cpu != new_cpu) { - schedstat_inc(p, se.nr_migrations); + p->se.nr_migrations++; +#ifdef CONFIG_SCHEDSTATS if (task_hot(p, old_rq->clock, NULL)) schedstat_inc(p, se.nr_forced2_migrations); - } #endif + } p->se.vruntime -= old_cfsrq->min_vruntime - new_cfsrq->min_vruntime; @@ -2375,6 +2377,7 @@ static void __sched_fork(struct task_struct *p) p->se.exec_start = 0; p->se.sum_exec_runtime = 0; p->se.prev_sum_exec_runtime = 0; + p->se.nr_migrations = 0; p->se.last_wakeup = 0; p->se.avg_overlap = 0; -- cgit v0.10.2 From e06c61a879910869aa5bf3f8f634abfee1a7bebc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 14 Dec 2008 14:44:31 +0100 Subject: perfcounters: add nr-of-faults counter Impact: add new feature, new sw counter Add a counter that counts the number of pagefaults a task is experiencing. Signed-off-by: Ingo Molnar diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index fb11e35..59c52f9 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -888,6 +888,54 @@ static const struct hw_perf_counter_ops perf_ops_task_clock = { .hw_perf_counter_read = task_clock_perf_counter_read, }; +static u64 get_page_faults(void) +{ + struct task_struct *curr = current; + + return curr->maj_flt + curr->min_flt; +} + +static void page_faults_perf_counter_update(struct perf_counter *counter) +{ + u64 prev, now; + s64 delta; + + prev = atomic64_read(&counter->hw.prev_count); + now = get_page_faults(); + + atomic64_set(&counter->hw.prev_count, now); + + delta = now - prev; + if (WARN_ON_ONCE(delta < 0)) + delta = 0; + + atomic64_add(delta, &counter->count); +} + +static void page_faults_perf_counter_read(struct perf_counter *counter) +{ + page_faults_perf_counter_update(counter); +} + +static void page_faults_perf_counter_enable(struct perf_counter *counter) +{ + /* + * page-faults is a per-task value already, + * so we dont have to clear it on switch-in. + */ +} + +static void page_faults_perf_counter_disable(struct perf_counter *counter) +{ + page_faults_perf_counter_update(counter); +} + +static const struct hw_perf_counter_ops perf_ops_page_faults = { + .hw_perf_counter_enable = page_faults_perf_counter_enable, + .hw_perf_counter_disable = page_faults_perf_counter_disable, + .hw_perf_counter_read = page_faults_perf_counter_read, +}; + static u64 get_context_switches(void) { struct task_struct *curr = current; @@ -994,6 +1042,9 @@ sw_perf_counter_init(struct perf_counter *counter) case PERF_COUNT_TASK_CLOCK: hw_ops = &perf_ops_task_clock; break; + case PERF_COUNT_PAGE_FAULTS: + hw_ops = &perf_ops_page_faults; + break; case PERF_COUNT_CONTEXT_SWITCHES: hw_ops = &perf_ops_context_switches; break; -- cgit v0.10.2 From 2b9ff0db19b5e2c77000b7201525f9c3d6e8328d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 14 Dec 2008 18:36:30 +0100 Subject: perfcounters: fix non-intel-perfmon CPUs Do not write MSR_CORE_PERF_GLOBAL_CTRL on CPUs where it does not exist. Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 5afae13..6d30f60 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -157,6 +157,9 @@ static int __hw_perf_counter_init(struct perf_counter *counter) void hw_perf_enable_all(void) { + if (unlikely(!perf_counters_initialized)) + return; + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, perf_counter_mask, 0); } @@ -164,14 +167,21 @@ u64 hw_perf_save_disable(void) { u64 ctrl; + if (unlikely(!perf_counters_initialized)) + return 0; + rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0, 0); + return ctrl; } EXPORT_SYMBOL_GPL(hw_perf_save_disable); void hw_perf_restore(u64 ctrl) { + if (unlikely(!perf_counters_initialized)) + return; + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, ctrl, 0); } EXPORT_SYMBOL_GPL(hw_perf_restore); -- cgit v0.10.2 From 088e2852c858159d47f71ee8da38e0fb1b21f806 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 14 Dec 2008 20:21:00 +0100 Subject: perfcounters, x86: fix sw counters on non-PMC CPUs Make perf_max_counters default to at least 1 - this allows the sw counters to be used. Signed-off-by: Ingo Molnar diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 59c52f9..539fa82 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -25,7 +25,7 @@ */ DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); -int perf_max_counters __read_mostly; +int perf_max_counters __read_mostly = 1; static int perf_reserved_percpu __read_mostly; static int perf_overcommit __read_mostly = 1; -- cgit v0.10.2 From 75f224cf7700ed6006574dc3f2efa29860727570 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 14 Dec 2008 21:58:46 +0100 Subject: perfcounters: fix lapic initialization Fix non-working NMI sampling in certain bootup scenarios. Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 6d30f60..8a154bd 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -557,10 +557,10 @@ void __init init_hw_perf_counters(void) printk(KERN_INFO "... bit_width: %d\n", eax.split.bit_width); printk(KERN_INFO "... mask_length: %d\n", eax.split.mask_length); + perf_counters_initialized = true; + perf_counters_lapic_init(0); register_die_notifier(&perf_counter_nmi_notifier); - - perf_counters_initialized = true; } static void x86_perf_counter_read(struct perf_counter *counter) -- cgit v0.10.2 From 0cc0c027d4e028632933f1be2dc4cd730358183b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 14 Dec 2008 23:20:36 +0100 Subject: perfcounters: release CPU context when exiting task counters If counters are exiting via do_exit() not via filp close, then the CPU context needs to be released - otherwise future percpu counter creations might fail. Signed-off-by: Ingo Molnar diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 539fa82..16396e9 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1273,8 +1273,19 @@ __perf_counter_exit_task(struct task_struct *child, local_irq_disable(); perf_flags = hw_perf_save_disable(); - if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) + if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) { + struct perf_cpu_context *cpuctx; + + cpuctx = &__get_cpu_var(perf_cpu_context); + child_counter->hw_ops->hw_perf_counter_disable(child_counter); + child_counter->state = PERF_COUNTER_STATE_INACTIVE; + child_counter->oncpu = -1; + + cpuctx->active_oncpu--; + child_ctx->nr_active--; + } + list_del_init(&child_counter->list_entry); hw_perf_restore(perf_flags); @@ -1539,4 +1550,3 @@ static int __init perf_counter_sysfs_init(void) &perfclass_attr_group); } device_initcall(perf_counter_sysfs_init); - -- cgit v0.10.2 From f65cb45cba63f249458b669aa67069eabc37b2f5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 16 Dec 2008 13:40:44 +0100 Subject: perfcounters: flush on setuid exec Pavel Machek pointed out that performance counters should be flushed when crossing protection domains on setuid execution. Reported-by: Pavel Machek Acked-by: Pavel Machek Signed-off-by: Ingo Molnar diff --git a/fs/exec.c b/fs/exec.c index ec5df9a..d5165d8 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -1017,6 +1018,13 @@ int flush_old_exec(struct linux_binprm * bprm) set_dumpable(current->mm, suid_dumpable); } + /* + * Flush performance counters when crossing a + * security domain: + */ + if (!get_dumpable(current->mm)) + perf_counter_exit_task(current); + /* An exec changes our domain. We are no longer part of the thread group */ -- cgit v0.10.2 From a86ed50859d65a08beec9474df97b88438a996df Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 17 Dec 2008 00:43:10 +0100 Subject: perfcounters: use hw_event.disable flag Impact: implement default-off counters Make sure that counters that are created with counter.hw_event.disabled=1, get created in disabled state. They can be enabled via: prctl(PR_TASK_PERF_COUNTERS_ENABLE); Signed-off-by: Ingo Molnar diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 16396e9..5431e79 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1093,6 +1093,9 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, counter->group_leader = group_leader; counter->hw_ops = NULL; + if (hw_event->disabled) + counter->state = PERF_COUNTER_STATE_OFF; + hw_ops = NULL; if (!hw_event->raw && hw_event->type < 0) hw_ops = sw_perf_counter_init(counter); -- cgit v0.10.2 From 94c46572a6d9bb497eda0a14099d9f1360d57d5d Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Fri, 19 Dec 2008 22:37:58 +0530 Subject: x86: perf_counter.c intel_perfmon_event_map and max_intel_perfmon_events should be static Impact: cleanup, avoid sparse warnings, reduce kernel size a bit Fixes these sparse warnings: arch/x86/kernel/cpu/perf_counter.c:44:11: warning: symbol 'intel_perfmon_event_map' was not declared. Should it be static? arch/x86/kernel/cpu/perf_counter.c:54:11: warning: symbol 'max_intel_perfmon_events' was not declared. Should it be static? Signed-off-by: Jaswinder Singh Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 8a154bd..bdbdb56 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -41,7 +41,7 @@ struct cpu_hw_counters { */ static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters); -const int intel_perfmon_event_map[] = +static const int intel_perfmon_event_map[] = { [PERF_COUNT_CYCLES] = 0x003c, [PERF_COUNT_INSTRUCTIONS] = 0x00c0, @@ -51,7 +51,7 @@ const int intel_perfmon_event_map[] = [PERF_COUNT_BRANCH_MISSES] = 0x00c5, }; -const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map); +static const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map); /* * Propagate counter elapsed time into the generic counter. -- cgit v0.10.2 From 8fb9331391af95ca1f4e5c0a0da8120b13cbae01 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 23 Dec 2008 12:04:16 +0100 Subject: perfcounters: remove warnings Impact: remove debug checks Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index bdbdb56..89fad5d 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -64,7 +64,6 @@ x86_perf_counter_update(struct perf_counter *counter, { u64 prev_raw_count, new_raw_count, delta; - WARN_ON_ONCE(counter->state != PERF_COUNTER_STATE_ACTIVE); /* * Careful: an NMI might modify the previous counter value. * @@ -89,7 +88,6 @@ again: * of the count, so we do that by clipping the delta to 32 bits: */ delta = (u64)(u32)((s32)new_raw_count - (s32)prev_raw_count); - WARN_ON_ONCE((int)delta < 0); atomic64_add(delta, &counter->count); atomic64_sub(delta, &hwc->period_left); @@ -193,7 +191,6 @@ __x86_perf_counter_disable(struct perf_counter *counter, int err; err = wrmsr_safe(hwc->config_base + idx, hwc->config, 0); - WARN_ON_ONCE(err); } static DEFINE_PER_CPU(u64, prev_left[MAX_HW_COUNTERS]); @@ -209,8 +206,6 @@ __hw_perf_counter_set_period(struct perf_counter *counter, s32 left = atomic64_read(&hwc->period_left); s32 period = hwc->irq_period; - WARN_ON_ONCE(period <= 0); - /* * If we are way outside a reasoable range then just skip forward: */ @@ -224,8 +219,6 @@ __hw_perf_counter_set_period(struct perf_counter *counter, atomic64_set(&hwc->period_left, left); } - WARN_ON_ONCE(left <= 0); - per_cpu(prev_left[idx], smp_processor_id()) = left; /* diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index f30486f..d038450 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -218,8 +218,6 @@ struct perf_cpu_context { extern int perf_max_counters; #ifdef CONFIG_PERF_COUNTERS -extern void -perf_counter_show(struct perf_counter *counter, char *str, int trace); extern const struct hw_perf_counter_ops * hw_perf_counter_init(struct perf_counter *counter); @@ -237,8 +235,6 @@ extern int perf_counter_task_enable(void); #else static inline void -perf_counter_show(struct perf_counter *counter, char *str, int trace) { } -static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } static inline void perf_counter_task_sched_out(struct task_struct *task, int cpu) { } diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 5431e79..aab6c12 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -861,8 +861,6 @@ static void task_clock_perf_counter_update(struct perf_counter *counter) atomic64_set(&counter->hw.prev_count, now); delta = now - prev; - if (WARN_ON_ONCE(delta < 0)) - delta = 0; atomic64_add(delta, &counter->count); } @@ -906,8 +904,6 @@ static void page_faults_perf_counter_update(struct perf_counter *counter) atomic64_set(&counter->hw.prev_count, now); delta = now - prev; - if (WARN_ON_ONCE(delta < 0)) - delta = 0; atomic64_add(delta, &counter->count); } @@ -954,8 +950,6 @@ static void context_switches_perf_counter_update(struct perf_counter *counter) atomic64_set(&counter->hw.prev_count, now); delta = now - prev; - if (WARN_ON_ONCE(delta < 0)) - delta = 0; atomic64_add(delta, &counter->count); } @@ -1000,8 +994,6 @@ static void cpu_migrations_perf_counter_update(struct perf_counter *counter) atomic64_set(&counter->hw.prev_count, now); delta = now - prev; - if (WARN_ON_ONCE(delta < 0)) - delta = 0; atomic64_add(delta, &counter->count); } -- cgit v0.10.2 From 7995888fcb0246543ee8027bf2835a250ba8c925 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 17 Dec 2008 08:54:56 +0100 Subject: perfcounters: tweak group scheduling Impact: schedule in groups atomically If there are multiple groups in a task, make sure they are scheduled in and out atomically. Signed-off-by: Ingo Molnar diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index aab6c12..f8a4d9a 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -367,21 +367,26 @@ counter_sched_in(struct perf_counter *counter, ctx->nr_active++; } -static void +static int group_sched_in(struct perf_counter *group_counter, struct perf_cpu_context *cpuctx, struct perf_counter_context *ctx, int cpu) { struct perf_counter *counter; + int was_group = 0; counter_sched_in(group_counter, cpuctx, ctx, cpu); /* * Schedule in siblings as one group (if any): */ - list_for_each_entry(counter, &group_counter->sibling_list, list_entry) + list_for_each_entry(counter, &group_counter->sibling_list, list_entry) { counter_sched_in(counter, cpuctx, ctx, cpu); + was_group = 1; + } + + return was_group; } /* @@ -416,7 +421,12 @@ void perf_counter_task_sched_in(struct task_struct *task, int cpu) if (counter->cpu != -1 && counter->cpu != cpu) continue; - group_sched_in(counter, cpuctx, ctx, cpu); + /* + * If we scheduled in a group atomically and + * exclusively, break out: + */ + if (group_sched_in(counter, cpuctx, ctx, cpu)) + break; } spin_unlock(&ctx->lock); -- cgit v0.10.2 From 5c167b8585c8d91206b395d57011ead7711e322f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 17 Dec 2008 09:02:19 +0100 Subject: x86, perfcounters: rename intel_arch_perfmon.h => perf_counter.h Impact: rename include file We'll be providing an asm/perf_counter.h to the generic perfcounter code, so use the already existing x86 file for this purpose and rename it. Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/intel_arch_perfmon.h b/arch/x86/include/asm/intel_arch_perfmon.h deleted file mode 100644 index 71598a9..0000000 --- a/arch/x86/include/asm/intel_arch_perfmon.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef _ASM_X86_INTEL_ARCH_PERFMON_H -#define _ASM_X86_INTEL_ARCH_PERFMON_H - -#define MSR_ARCH_PERFMON_PERFCTR0 0xc1 -#define MSR_ARCH_PERFMON_PERFCTR1 0xc2 - -#define MSR_ARCH_PERFMON_EVENTSEL0 0x186 -#define MSR_ARCH_PERFMON_EVENTSEL1 0x187 - -#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) -#define ARCH_PERFMON_EVENTSEL_INT (1 << 20) -#define ARCH_PERFMON_EVENTSEL_OS (1 << 17) -#define ARCH_PERFMON_EVENTSEL_USR (1 << 16) - -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ - (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) - -#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 - -union cpuid10_eax { - struct { - unsigned int version_id:8; - unsigned int num_counters:8; - unsigned int bit_width:8; - unsigned int mask_length:8; - } split; - unsigned int full; -}; - -#ifdef CONFIG_PERF_COUNTERS -extern void init_hw_perf_counters(void); -extern void perf_counters_lapic_init(int nmi); -#else -static inline void init_hw_perf_counters(void) { } -static inline void perf_counters_lapic_init(int nmi) { } -#endif - -#endif /* _ASM_X86_INTEL_ARCH_PERFMON_H */ diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h new file mode 100644 index 0000000..9dadce1 --- /dev/null +++ b/arch/x86/include/asm/perf_counter.h @@ -0,0 +1,41 @@ +#ifndef _ASM_X86_PERF_COUNTER_H +#define _ASM_X86_PERF_COUNTER_H + +#define MSR_ARCH_PERFMON_PERFCTR0 0xc1 +#define MSR_ARCH_PERFMON_PERFCTR1 0xc2 + +#define MSR_ARCH_PERFMON_EVENTSEL0 0x186 +#define MSR_ARCH_PERFMON_EVENTSEL1 0x187 + +#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) +#define ARCH_PERFMON_EVENTSEL_INT (1 << 20) +#define ARCH_PERFMON_EVENTSEL_OS (1 << 17) +#define ARCH_PERFMON_EVENTSEL_USR (1 << 16) + +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ + (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) + +#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 + +union cpuid10_eax { + struct { + unsigned int version_id:8; + unsigned int num_counters:8; + unsigned int bit_width:8; + unsigned int mask_length:8; + } split; + unsigned int full; +}; + +#ifdef CONFIG_PERF_COUNTERS +extern void init_hw_perf_counters(void); +extern void perf_counters_lapic_init(int nmi); +#else +static inline void init_hw_perf_counters(void) { } +static inline void perf_counters_lapic_init(int nmi) { } +#endif + +#endif /* _ASM_X86_PERF_COUNTER_H */ diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 0579ec1..4f859ac 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -31,7 +31,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4461011..ad331b4 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 89fad5d..a4a3a09 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -16,7 +16,7 @@ #include #include -#include +#include #include static bool perf_counters_initialized __read_mostly; diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 9abd48b..d6f5b9f 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -20,7 +20,7 @@ #include #include -#include +#include struct nmi_watchdog_ctlblk { unsigned int cccr_msr; diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index e9f80c7..07c9145 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include "op_x86_model.h" #include "op_counter.h" -- cgit v0.10.2 From eb2b861810d4ff72454c83996b891df4e0aaff9a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 17 Dec 2008 09:09:13 +0100 Subject: x86, perfcounters: prepare for fixed-mode PMCs Impact: refactor the x86 code for fixed-mode PMCs Extend the data structures and rename the existing facilities to allow for a 'generic' versus 'fixed' counter distinction. Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index 9dadce1..dd5a4a5 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -1,6 +1,13 @@ #ifndef _ASM_X86_PERF_COUNTER_H #define _ASM_X86_PERF_COUNTER_H +/* + * Performance counter hw details: + */ + +#define X86_PMC_MAX_GENERIC 8 +#define X86_PMC_MAX_FIXED 3 + #define MSR_ARCH_PERFMON_PERFCTR0 0xc1 #define MSR_ARCH_PERFMON_PERFCTR1 0xc2 @@ -20,6 +27,10 @@ #define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 +/* + * Intel "Architectural Performance Monitoring" CPUID + * detection/enumeration details: + */ union cpuid10_eax { struct { unsigned int version_id:8; diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index a4a3a09..fc3af86 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -27,13 +27,12 @@ static bool perf_counters_initialized __read_mostly; static int nr_hw_counters __read_mostly; static u32 perf_counter_mask __read_mostly; -/* No support for fixed function counters yet */ - -#define MAX_HW_COUNTERS 8 - struct cpu_hw_counters { - struct perf_counter *counters[MAX_HW_COUNTERS]; - unsigned long used[BITS_TO_LONGS(MAX_HW_COUNTERS)]; + struct perf_counter *generic[X86_PMC_MAX_GENERIC]; + unsigned long used[BITS_TO_LONGS(X86_PMC_MAX_GENERIC)]; + + struct perf_counter *fixed[X86_PMC_MAX_FIXED]; + unsigned long used_fixed[BITS_TO_LONGS(X86_PMC_MAX_FIXED)]; }; /* @@ -185,7 +184,7 @@ void hw_perf_restore(u64 ctrl) EXPORT_SYMBOL_GPL(hw_perf_restore); static inline void -__x86_perf_counter_disable(struct perf_counter *counter, +__pmc_generic_disable(struct perf_counter *counter, struct hw_perf_counter *hwc, unsigned int idx) { int err; @@ -193,7 +192,7 @@ __x86_perf_counter_disable(struct perf_counter *counter, err = wrmsr_safe(hwc->config_base + idx, hwc->config, 0); } -static DEFINE_PER_CPU(u64, prev_left[MAX_HW_COUNTERS]); +static DEFINE_PER_CPU(u64, prev_left[X86_PMC_MAX_GENERIC]); /* * Set the next IRQ period, based on the hwc->period_left value. @@ -231,7 +230,7 @@ __hw_perf_counter_set_period(struct perf_counter *counter, } static void -__x86_perf_counter_enable(struct perf_counter *counter, +__pmc_generic_enable(struct perf_counter *counter, struct hw_perf_counter *hwc, int idx) { wrmsr(hwc->config_base + idx, @@ -241,7 +240,7 @@ __x86_perf_counter_enable(struct perf_counter *counter, /* * Find a PMC slot for the freshly enabled / scheduled in counter: */ -static void x86_perf_counter_enable(struct perf_counter *counter) +static void pmc_generic_enable(struct perf_counter *counter) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); struct hw_perf_counter *hwc = &counter->hw; @@ -256,12 +255,12 @@ static void x86_perf_counter_enable(struct perf_counter *counter) perf_counters_lapic_init(hwc->nmi); - __x86_perf_counter_disable(counter, hwc, idx); + __pmc_generic_disable(counter, hwc, idx); - cpuc->counters[idx] = counter; + cpuc->generic[idx] = counter; __hw_perf_counter_set_period(counter, hwc, idx); - __x86_perf_counter_enable(counter, hwc, idx); + __pmc_generic_enable(counter, hwc, idx); } void perf_counter_print_debug(void) @@ -301,16 +300,16 @@ void perf_counter_print_debug(void) local_irq_enable(); } -static void x86_perf_counter_disable(struct perf_counter *counter) +static void pmc_generic_disable(struct perf_counter *counter) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); struct hw_perf_counter *hwc = &counter->hw; unsigned int idx = hwc->idx; - __x86_perf_counter_disable(counter, hwc, idx); + __pmc_generic_disable(counter, hwc, idx); clear_bit(idx, cpuc->used); - cpuc->counters[idx] = NULL; + cpuc->generic[idx] = NULL; /* * Drain the remaining delta count out of a counter @@ -349,7 +348,7 @@ static void perf_save_and_restart(struct perf_counter *counter) __hw_perf_counter_set_period(counter, hwc, idx); if (pmc_ctrl & ARCH_PERFMON_EVENTSEL0_ENABLE) - __x86_perf_counter_enable(counter, hwc, idx); + __pmc_generic_enable(counter, hwc, idx); } static void @@ -392,7 +391,7 @@ static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi) again: ack = status; for_each_bit(bit, (unsigned long *) &status, nr_hw_counters) { - struct perf_counter *counter = cpuc->counters[bit]; + struct perf_counter *counter = cpuc->generic[bit]; clear_bit(bit, (unsigned long *) &status); if (!counter) @@ -412,7 +411,7 @@ again: } /* * From NMI context we cannot call into the scheduler to - * do a task wakeup - but we mark these counters as + * do a task wakeup - but we mark these generic as * wakeup_pending and initate a wakeup callback: */ if (nmi) { @@ -462,7 +461,7 @@ void perf_counter_notify(struct pt_regs *regs) cpuc = &per_cpu(cpu_hw_counters, cpu); for_each_bit(bit, cpuc->used, nr_hw_counters) { - struct perf_counter *counter = cpuc->counters[bit]; + struct perf_counter *counter = cpuc->generic[bit]; if (!counter) continue; @@ -539,10 +538,10 @@ void __init init_hw_perf_counters(void) printk(KERN_INFO "... version: %d\n", eax.split.version_id); printk(KERN_INFO "... num_counters: %d\n", eax.split.num_counters); nr_hw_counters = eax.split.num_counters; - if (nr_hw_counters > MAX_HW_COUNTERS) { - nr_hw_counters = MAX_HW_COUNTERS; + if (nr_hw_counters > X86_PMC_MAX_GENERIC) { + nr_hw_counters = X86_PMC_MAX_GENERIC; WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!", - nr_hw_counters, MAX_HW_COUNTERS); + nr_hw_counters, X86_PMC_MAX_GENERIC); } perf_counter_mask = (1 << nr_hw_counters) - 1; perf_max_counters = nr_hw_counters; @@ -556,15 +555,15 @@ void __init init_hw_perf_counters(void) register_die_notifier(&perf_counter_nmi_notifier); } -static void x86_perf_counter_read(struct perf_counter *counter) +static void pmc_generic_read(struct perf_counter *counter) { x86_perf_counter_update(counter, &counter->hw, counter->hw.idx); } static const struct hw_perf_counter_ops x86_perf_counter_ops = { - .hw_perf_counter_enable = x86_perf_counter_enable, - .hw_perf_counter_disable = x86_perf_counter_disable, - .hw_perf_counter_read = x86_perf_counter_read, + .hw_perf_counter_enable = pmc_generic_enable, + .hw_perf_counter_disable = pmc_generic_disable, + .hw_perf_counter_read = pmc_generic_read, }; const struct hw_perf_counter_ops * diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index d038450..984da54 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -14,6 +14,7 @@ #define _LINUX_PERF_COUNTER_H #include +#include #include #include -- cgit v0.10.2 From 703e937c83bbad79075a7846e062e447c2fee6a4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 17 Dec 2008 10:51:15 +0100 Subject: perfcounters: add fixed-mode PMC enumeration Enumerate fixed-mode PMCs based on CPUID, and feed that into the perfcounter code. Does not use fixed-mode PMCs yet. Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index dd5a4a5..945a315 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -41,6 +41,29 @@ union cpuid10_eax { unsigned int full; }; +union cpuid10_edx { + struct { + unsigned int num_counters_fixed:4; + unsigned int reserved:28; + } split; + unsigned int full; +}; + + +/* + * Fixed-purpose performance counters: + */ + +/* Instr_Retired.Any: */ +#define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 + +/* CPU_CLK_Unhalted.Core: */ +#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a + +/* CPU_CLK_Unhalted.Ref: */ +#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b + + #ifdef CONFIG_PERF_COUNTERS extern void init_hw_perf_counters(void); extern void perf_counters_lapic_init(int nmi); diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index fc3af86..2fca50c 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -27,6 +27,8 @@ static bool perf_counters_initialized __read_mostly; static int nr_hw_counters __read_mostly; static u32 perf_counter_mask __read_mostly; +static int nr_hw_counters_fixed __read_mostly; + struct cpu_hw_counters { struct perf_counter *generic[X86_PMC_MAX_GENERIC]; unsigned long used[BITS_TO_LONGS(X86_PMC_MAX_GENERIC)]; @@ -519,8 +521,9 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = { void __init init_hw_perf_counters(void) { union cpuid10_eax eax; - unsigned int unused; unsigned int ebx; + unsigned int unused; + union cpuid10_edx edx; if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) return; @@ -529,14 +532,14 @@ void __init init_hw_perf_counters(void) * Check whether the Architectural PerfMon supports * Branch Misses Retired Event or not. */ - cpuid(10, &(eax.full), &ebx, &unused, &unused); + cpuid(10, &eax.full, &ebx, &unused, &edx.full); if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) return; printk(KERN_INFO "Intel Performance Monitoring support detected.\n"); - printk(KERN_INFO "... version: %d\n", eax.split.version_id); - printk(KERN_INFO "... num_counters: %d\n", eax.split.num_counters); + printk(KERN_INFO "... version: %d\n", eax.split.version_id); + printk(KERN_INFO "... num counters: %d\n", eax.split.num_counters); nr_hw_counters = eax.split.num_counters; if (nr_hw_counters > X86_PMC_MAX_GENERIC) { nr_hw_counters = X86_PMC_MAX_GENERIC; @@ -546,8 +549,16 @@ void __init init_hw_perf_counters(void) perf_counter_mask = (1 << nr_hw_counters) - 1; perf_max_counters = nr_hw_counters; - printk(KERN_INFO "... bit_width: %d\n", eax.split.bit_width); - printk(KERN_INFO "... mask_length: %d\n", eax.split.mask_length); + printk(KERN_INFO "... bit width: %d\n", eax.split.bit_width); + printk(KERN_INFO "... mask length: %d\n", eax.split.mask_length); + + nr_hw_counters_fixed = edx.split.num_counters_fixed; + if (nr_hw_counters_fixed > X86_PMC_MAX_FIXED) { + nr_hw_counters_fixed = X86_PMC_MAX_FIXED; + WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!", + nr_hw_counters_fixed, X86_PMC_MAX_FIXED); + } + printk(KERN_INFO "... fixed counters: %d\n", nr_hw_counters_fixed); perf_counters_initialized = true; -- cgit v0.10.2 From 862a1a5f346fe7e9181ea51eaae48cf2cd70f746 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 17 Dec 2008 13:09:20 +0100 Subject: x86, perfcounters: refactor code for fixed-function PMCs Impact: clean up Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index 945a315..13745de 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -8,6 +8,10 @@ #define X86_PMC_MAX_GENERIC 8 #define X86_PMC_MAX_FIXED 3 +#define X86_PMC_IDX_GENERIC 0 +#define X86_PMC_IDX_FIXED 32 +#define X86_PMC_IDX_MAX 64 + #define MSR_ARCH_PERFMON_PERFCTR0 0xc1 #define MSR_ARCH_PERFMON_PERFCTR1 0xc2 @@ -54,6 +58,15 @@ union cpuid10_edx { * Fixed-purpose performance counters: */ +/* + * All 3 fixed-mode PMCs are configured via this single MSR: + */ +#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d + +/* + * The counts are available in three separate MSRs: + */ + /* Instr_Retired.Any: */ #define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 @@ -63,7 +76,6 @@ union cpuid10_edx { /* CPU_CLK_Unhalted.Ref: */ #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b - #ifdef CONFIG_PERF_COUNTERS extern void init_hw_perf_counters(void); extern void perf_counters_lapic_init(int nmi); diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 2fca50c..358af52 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -24,17 +24,14 @@ static bool perf_counters_initialized __read_mostly; /* * Number of (generic) HW counters: */ -static int nr_hw_counters __read_mostly; -static u32 perf_counter_mask __read_mostly; +static int nr_counters_generic __read_mostly; +static u64 perf_counter_mask __read_mostly; -static int nr_hw_counters_fixed __read_mostly; +static int nr_counters_fixed __read_mostly; struct cpu_hw_counters { - struct perf_counter *generic[X86_PMC_MAX_GENERIC]; - unsigned long used[BITS_TO_LONGS(X86_PMC_MAX_GENERIC)]; - - struct perf_counter *fixed[X86_PMC_MAX_FIXED]; - unsigned long used_fixed[BITS_TO_LONGS(X86_PMC_MAX_FIXED)]; + struct perf_counter *counters[X86_PMC_IDX_MAX]; + unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; }; /* @@ -159,7 +156,7 @@ void hw_perf_enable_all(void) if (unlikely(!perf_counters_initialized)) return; - wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, perf_counter_mask, 0); + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, perf_counter_mask); } u64 hw_perf_save_disable(void) @@ -170,7 +167,7 @@ u64 hw_perf_save_disable(void) return 0; rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); - wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0, 0); + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); return ctrl; } @@ -181,7 +178,7 @@ void hw_perf_restore(u64 ctrl) if (unlikely(!perf_counters_initialized)) return; - wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, ctrl, 0); + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); } EXPORT_SYMBOL_GPL(hw_perf_restore); @@ -239,6 +236,11 @@ __pmc_generic_enable(struct perf_counter *counter, hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE, 0); } +static int fixed_mode_idx(struct hw_perf_counter *hwc) +{ + return -1; +} + /* * Find a PMC slot for the freshly enabled / scheduled in counter: */ @@ -250,7 +252,7 @@ static void pmc_generic_enable(struct perf_counter *counter) /* Try to get the previous counter again */ if (test_and_set_bit(idx, cpuc->used)) { - idx = find_first_zero_bit(cpuc->used, nr_hw_counters); + idx = find_first_zero_bit(cpuc->used, nr_counters_generic); set_bit(idx, cpuc->used); hwc->idx = idx; } @@ -259,7 +261,7 @@ static void pmc_generic_enable(struct perf_counter *counter) __pmc_generic_disable(counter, hwc, idx); - cpuc->generic[idx] = counter; + cpuc->counters[idx] = counter; __hw_perf_counter_set_period(counter, hwc, idx); __pmc_generic_enable(counter, hwc, idx); @@ -270,7 +272,7 @@ void perf_counter_print_debug(void) u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left; int cpu, idx; - if (!nr_hw_counters) + if (!nr_counters_generic) return; local_irq_disable(); @@ -286,7 +288,7 @@ void perf_counter_print_debug(void) printk(KERN_INFO "CPU#%d: status: %016llx\n", cpu, status); printk(KERN_INFO "CPU#%d: overflow: %016llx\n", cpu, overflow); - for (idx = 0; idx < nr_hw_counters; idx++) { + for (idx = 0; idx < nr_counters_generic; idx++) { rdmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, pmc_ctrl); rdmsrl(MSR_ARCH_PERFMON_PERFCTR0 + idx, pmc_count); @@ -311,7 +313,7 @@ static void pmc_generic_disable(struct perf_counter *counter) __pmc_generic_disable(counter, hwc, idx); clear_bit(idx, cpuc->used); - cpuc->generic[idx] = NULL; + cpuc->counters[idx] = NULL; /* * Drain the remaining delta count out of a counter @@ -381,7 +383,7 @@ static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi) rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, saved_global); /* Disable counters globally */ - wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0, 0); + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); ack_APIC_irq(); cpuc = &per_cpu(cpu_hw_counters, cpu); @@ -392,8 +394,8 @@ static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi) again: ack = status; - for_each_bit(bit, (unsigned long *) &status, nr_hw_counters) { - struct perf_counter *counter = cpuc->generic[bit]; + for_each_bit(bit, (unsigned long *) &status, nr_counters_generic) { + struct perf_counter *counter = cpuc->counters[bit]; clear_bit(bit, (unsigned long *) &status); if (!counter) @@ -424,7 +426,7 @@ again: } } - wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack, 0); + wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); /* * Repeat if there is more work to be done: @@ -436,7 +438,7 @@ out: /* * Restore - do not reenable when global enable is off: */ - wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, saved_global, 0); + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, saved_global); } void smp_perf_counter_interrupt(struct pt_regs *regs) @@ -462,8 +464,8 @@ void perf_counter_notify(struct pt_regs *regs) cpu = smp_processor_id(); cpuc = &per_cpu(cpu_hw_counters, cpu); - for_each_bit(bit, cpuc->used, nr_hw_counters) { - struct perf_counter *counter = cpuc->generic[bit]; + for_each_bit(bit, cpuc->used, X86_PMC_IDX_MAX) { + struct perf_counter *counter = cpuc->counters[bit]; if (!counter) continue; @@ -540,26 +542,29 @@ void __init init_hw_perf_counters(void) printk(KERN_INFO "... version: %d\n", eax.split.version_id); printk(KERN_INFO "... num counters: %d\n", eax.split.num_counters); - nr_hw_counters = eax.split.num_counters; - if (nr_hw_counters > X86_PMC_MAX_GENERIC) { - nr_hw_counters = X86_PMC_MAX_GENERIC; + nr_counters_generic = eax.split.num_counters; + if (nr_counters_generic > X86_PMC_MAX_GENERIC) { + nr_counters_generic = X86_PMC_MAX_GENERIC; WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!", - nr_hw_counters, X86_PMC_MAX_GENERIC); + nr_counters_generic, X86_PMC_MAX_GENERIC); } - perf_counter_mask = (1 << nr_hw_counters) - 1; - perf_max_counters = nr_hw_counters; + perf_counter_mask = (1 << nr_counters_generic) - 1; + perf_max_counters = nr_counters_generic; printk(KERN_INFO "... bit width: %d\n", eax.split.bit_width); printk(KERN_INFO "... mask length: %d\n", eax.split.mask_length); - nr_hw_counters_fixed = edx.split.num_counters_fixed; - if (nr_hw_counters_fixed > X86_PMC_MAX_FIXED) { - nr_hw_counters_fixed = X86_PMC_MAX_FIXED; + nr_counters_fixed = edx.split.num_counters_fixed; + if (nr_counters_fixed > X86_PMC_MAX_FIXED) { + nr_counters_fixed = X86_PMC_MAX_FIXED; WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!", - nr_hw_counters_fixed, X86_PMC_MAX_FIXED); + nr_counters_fixed, X86_PMC_MAX_FIXED); } - printk(KERN_INFO "... fixed counters: %d\n", nr_hw_counters_fixed); + printk(KERN_INFO "... fixed counters: %d\n", nr_counters_fixed); + + perf_counter_mask |= ((1LL << nr_counters_fixed)-1) << X86_PMC_IDX_FIXED; + printk(KERN_INFO "... counter mask: %016Lx\n", perf_counter_mask); perf_counters_initialized = true; perf_counters_lapic_init(0); -- cgit v0.10.2 From 7671581f1666ef4b54a1c1e598c51ac44c060a9b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 17 Dec 2008 14:20:28 +0100 Subject: perfcounters: hw ops rename Impact: rename field names Shorten them. Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 358af52..b675571 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -577,9 +577,9 @@ static void pmc_generic_read(struct perf_counter *counter) } static const struct hw_perf_counter_ops x86_perf_counter_ops = { - .hw_perf_counter_enable = pmc_generic_enable, - .hw_perf_counter_disable = pmc_generic_disable, - .hw_perf_counter_read = pmc_generic_read, + .enable = pmc_generic_enable, + .disable = pmc_generic_disable, + .read = pmc_generic_read, }; const struct hw_perf_counter_ops * diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 984da54..48f76d2 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -128,9 +128,9 @@ struct perf_counter; * struct hw_perf_counter_ops - performance counter hw ops */ struct hw_perf_counter_ops { - void (*hw_perf_counter_enable) (struct perf_counter *counter); - void (*hw_perf_counter_disable) (struct perf_counter *counter); - void (*hw_perf_counter_read) (struct perf_counter *counter); + void (*enable) (struct perf_counter *counter); + void (*disable) (struct perf_counter *counter); + void (*read) (struct perf_counter *counter); }; /** diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index f8a4d9a..961d651 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -109,7 +109,7 @@ static void __perf_counter_remove_from_context(void *info) spin_lock_irqsave(&ctx->lock, flags); if (counter->state == PERF_COUNTER_STATE_ACTIVE) { - counter->hw_ops->hw_perf_counter_disable(counter); + counter->hw_ops->disable(counter); counter->state = PERF_COUNTER_STATE_INACTIVE; ctx->nr_active--; cpuctx->active_oncpu--; @@ -226,7 +226,7 @@ static void __perf_install_in_context(void *info) counter->oncpu = cpu; ctx->nr_active++; cpuctx->active_oncpu++; - counter->hw_ops->hw_perf_counter_enable(counter); + counter->hw_ops->enable(counter); } if (!ctx->task && cpuctx->max_pertask) @@ -297,7 +297,7 @@ counter_sched_out(struct perf_counter *counter, if (counter->state != PERF_COUNTER_STATE_ACTIVE) return; - counter->hw_ops->hw_perf_counter_disable(counter); + counter->hw_ops->disable(counter); counter->state = PERF_COUNTER_STATE_INACTIVE; counter->oncpu = -1; @@ -327,7 +327,7 @@ group_sched_out(struct perf_counter *group_counter, * * We stop each counter and update the counter value in counter->count. * - * This does not protect us against NMI, but hw_perf_counter_disable() + * This does not protect us against NMI, but disable() * sets the disabled bit in the control field of counter _before_ * accessing the counter control register. If a NMI hits, then it will * not restart the counter. @@ -359,7 +359,7 @@ counter_sched_in(struct perf_counter *counter, if (counter->state == PERF_COUNTER_STATE_OFF) return; - counter->hw_ops->hw_perf_counter_enable(counter); + counter->hw_ops->enable(counter); counter->state = PERF_COUNTER_STATE_ACTIVE; counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */ @@ -395,7 +395,7 @@ group_sched_in(struct perf_counter *group_counter, * * We restore the counter value and then enable it. * - * This does not protect us against NMI, but hw_perf_counter_enable() + * This does not protect us against NMI, but enable() * sets the enabled bit in the control field of counter _before_ * accessing the counter control register. If a NMI hits, then it will * keep the counter running. @@ -537,11 +537,11 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) /* * Cross CPU call to read the hardware counter */ -static void __hw_perf_counter_read(void *info) +static void __read(void *info) { struct perf_counter *counter = info; - counter->hw_ops->hw_perf_counter_read(counter); + counter->hw_ops->read(counter); } static u64 perf_counter_read(struct perf_counter *counter) @@ -552,7 +552,7 @@ static u64 perf_counter_read(struct perf_counter *counter) */ if (counter->state == PERF_COUNTER_STATE_ACTIVE) { smp_call_function_single(counter->oncpu, - __hw_perf_counter_read, counter, 1); + __read, counter, 1); } return atomic64_read(&counter->count); @@ -855,9 +855,9 @@ static void cpu_clock_perf_counter_read(struct perf_counter *counter) } static const struct hw_perf_counter_ops perf_ops_cpu_clock = { - .hw_perf_counter_enable = cpu_clock_perf_counter_enable, - .hw_perf_counter_disable = cpu_clock_perf_counter_disable, - .hw_perf_counter_read = cpu_clock_perf_counter_read, + .enable = cpu_clock_perf_counter_enable, + .disable = cpu_clock_perf_counter_disable, + .read = cpu_clock_perf_counter_read, }; static void task_clock_perf_counter_update(struct perf_counter *counter) @@ -891,9 +891,9 @@ static void task_clock_perf_counter_disable(struct perf_counter *counter) } static const struct hw_perf_counter_ops perf_ops_task_clock = { - .hw_perf_counter_enable = task_clock_perf_counter_enable, - .hw_perf_counter_disable = task_clock_perf_counter_disable, - .hw_perf_counter_read = task_clock_perf_counter_read, + .enable = task_clock_perf_counter_enable, + .disable = task_clock_perf_counter_disable, + .read = task_clock_perf_counter_read, }; static u64 get_page_faults(void) @@ -937,9 +937,9 @@ static void page_faults_perf_counter_disable(struct perf_counter *counter) } static const struct hw_perf_counter_ops perf_ops_page_faults = { - .hw_perf_counter_enable = page_faults_perf_counter_enable, - .hw_perf_counter_disable = page_faults_perf_counter_disable, - .hw_perf_counter_read = page_faults_perf_counter_read, + .enable = page_faults_perf_counter_enable, + .disable = page_faults_perf_counter_disable, + .read = page_faults_perf_counter_read, }; static u64 get_context_switches(void) @@ -983,9 +983,9 @@ static void context_switches_perf_counter_disable(struct perf_counter *counter) } static const struct hw_perf_counter_ops perf_ops_context_switches = { - .hw_perf_counter_enable = context_switches_perf_counter_enable, - .hw_perf_counter_disable = context_switches_perf_counter_disable, - .hw_perf_counter_read = context_switches_perf_counter_read, + .enable = context_switches_perf_counter_enable, + .disable = context_switches_perf_counter_disable, + .read = context_switches_perf_counter_read, }; static inline u64 get_cpu_migrations(void) @@ -1027,9 +1027,9 @@ static void cpu_migrations_perf_counter_disable(struct perf_counter *counter) } static const struct hw_perf_counter_ops perf_ops_cpu_migrations = { - .hw_perf_counter_enable = cpu_migrations_perf_counter_enable, - .hw_perf_counter_disable = cpu_migrations_perf_counter_disable, - .hw_perf_counter_read = cpu_migrations_perf_counter_read, + .enable = cpu_migrations_perf_counter_enable, + .disable = cpu_migrations_perf_counter_disable, + .read = cpu_migrations_perf_counter_read, }; static const struct hw_perf_counter_ops * @@ -1283,7 +1283,7 @@ __perf_counter_exit_task(struct task_struct *child, cpuctx = &__get_cpu_var(perf_cpu_context); - child_counter->hw_ops->hw_perf_counter_disable(child_counter); + child_counter->hw_ops->disable(child_counter); child_counter->state = PERF_COUNTER_STATE_INACTIVE; child_counter->oncpu = -1; -- cgit v0.10.2 From aa9c4c0f967fdb482ea95e8473ec3d201e6e0781 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 17 Dec 2008 14:10:57 +0100 Subject: perfcounters: fix task clock counter Impact: fix per task clock counter precision Signed-off-by: Ingo Molnar diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 4a145ca..1b2e324 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -66,7 +66,15 @@ static inline unsigned int kstat_irqs(unsigned int irq) return sum; } + +/* + * Lock/unlock the current runqueue - to extract task statistics: + */ +extern void curr_rq_lock_irq_save(unsigned long *flags); +extern void curr_rq_unlock_irq_restore(unsigned long *flags); +extern unsigned long long __task_delta_exec(struct task_struct *tsk, int update); extern unsigned long long task_delta_exec(struct task_struct *); + extern void account_user_time(struct task_struct *, cputime_t); extern void account_user_time_scaled(struct task_struct *, cputime_t); extern void account_system_time(struct task_struct *, int, cputime_t); diff --git a/kernel/exit.c b/kernel/exit.c index d336c90..244edfd 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -922,6 +922,12 @@ static void exit_notify(struct task_struct *tsk, int group_dead) forget_original_parent(tsk); exit_task_namespaces(tsk); + /* + * Flush inherited counters to the parent - before the parent + * gets woken up by child-exit notifications. + */ + perf_counter_exit_task(tsk); + write_lock_irq(&tasklist_lock); if (group_dead) kill_orphaned_pgrp(tsk->group_leader, NULL); @@ -1093,11 +1099,6 @@ NORET_TYPE void do_exit(long code) mpol_put(tsk->mempolicy); tsk->mempolicy = NULL; #endif - /* - * These must happen late, after the PID is not - * hashed anymore, but still at a point that may sleep: - */ - perf_counter_exit_task(tsk); #ifdef CONFIG_FUTEX if (unlikely(!list_empty(&tsk->pi_state_list))) exit_pi_state_list(tsk); @@ -1121,6 +1122,12 @@ NORET_TYPE void do_exit(long code) if (tsk->splice_pipe) __free_pipe_info(tsk->splice_pipe); + /* + * These must happen late, after the PID is not + * hashed anymore, but still at a point that may sleep: + */ + perf_counter_exit_task(tsk); + preempt_disable(); /* causes final put_task_struct in finish_task_switch(). */ tsk->state = TASK_DEAD; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 961d651..f1110ac 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -18,6 +18,7 @@ #include #include #include +#include #include /* @@ -106,7 +107,8 @@ static void __perf_counter_remove_from_context(void *info) if (ctx->task && cpuctx->task_ctx != ctx) return; - spin_lock_irqsave(&ctx->lock, flags); + curr_rq_lock_irq_save(&flags); + spin_lock(&ctx->lock); if (counter->state == PERF_COUNTER_STATE_ACTIVE) { counter->hw_ops->disable(counter); @@ -135,7 +137,8 @@ static void __perf_counter_remove_from_context(void *info) perf_max_counters - perf_reserved_percpu); } - spin_unlock_irqrestore(&ctx->lock, flags); + spin_unlock(&ctx->lock); + curr_rq_unlock_irq_restore(&flags); } @@ -209,7 +212,8 @@ static void __perf_install_in_context(void *info) if (ctx->task && cpuctx->task_ctx != ctx) return; - spin_lock_irqsave(&ctx->lock, flags); + curr_rq_lock_irq_save(&flags); + spin_lock(&ctx->lock); /* * Protect the list operation against NMI by disabling the @@ -232,7 +236,8 @@ static void __perf_install_in_context(void *info) if (!ctx->task && cpuctx->max_pertask) cpuctx->max_pertask--; - spin_unlock_irqrestore(&ctx->lock, flags); + spin_unlock(&ctx->lock); + curr_rq_unlock_irq_restore(&flags); } /* @@ -438,15 +443,19 @@ int perf_counter_task_disable(void) struct task_struct *curr = current; struct perf_counter_context *ctx = &curr->perf_counter_ctx; struct perf_counter *counter; + unsigned long flags; u64 perf_flags; int cpu; if (likely(!ctx->nr_counters)) return 0; - local_irq_disable(); + curr_rq_lock_irq_save(&flags); cpu = smp_processor_id(); + /* force the update of the task clock: */ + __task_delta_exec(curr, 1); + perf_counter_task_sched_out(curr, cpu); spin_lock(&ctx->lock); @@ -463,7 +472,7 @@ int perf_counter_task_disable(void) spin_unlock(&ctx->lock); - local_irq_enable(); + curr_rq_unlock_irq_restore(&flags); return 0; } @@ -473,15 +482,19 @@ int perf_counter_task_enable(void) struct task_struct *curr = current; struct perf_counter_context *ctx = &curr->perf_counter_ctx; struct perf_counter *counter; + unsigned long flags; u64 perf_flags; int cpu; if (likely(!ctx->nr_counters)) return 0; - local_irq_disable(); + curr_rq_lock_irq_save(&flags); cpu = smp_processor_id(); + /* force the update of the task clock: */ + __task_delta_exec(curr, 1); + spin_lock(&ctx->lock); /* @@ -493,6 +506,7 @@ int perf_counter_task_enable(void) if (counter->state != PERF_COUNTER_STATE_OFF) continue; counter->state = PERF_COUNTER_STATE_INACTIVE; + counter->hw_event.disabled = 0; } hw_perf_restore(perf_flags); @@ -500,7 +514,7 @@ int perf_counter_task_enable(void) perf_counter_task_sched_in(curr, cpu); - local_irq_enable(); + curr_rq_unlock_irq_restore(&flags); return 0; } @@ -540,8 +554,11 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) static void __read(void *info) { struct perf_counter *counter = info; + unsigned long flags; + curr_rq_lock_irq_save(&flags); counter->hw_ops->read(counter); + curr_rq_unlock_irq_restore(&flags); } static u64 perf_counter_read(struct perf_counter *counter) @@ -860,13 +877,27 @@ static const struct hw_perf_counter_ops perf_ops_cpu_clock = { .read = cpu_clock_perf_counter_read, }; -static void task_clock_perf_counter_update(struct perf_counter *counter) +/* + * Called from within the scheduler: + */ +static u64 task_clock_perf_counter_val(struct perf_counter *counter, int update) { - u64 prev, now; + struct task_struct *curr = counter->task; + u64 delta; + + WARN_ON_ONCE(counter->task != current); + + delta = __task_delta_exec(curr, update); + + return curr->se.sum_exec_runtime + delta; +} + +static void task_clock_perf_counter_update(struct perf_counter *counter, u64 now) +{ + u64 prev; s64 delta; prev = atomic64_read(&counter->hw.prev_count); - now = current->se.sum_exec_runtime; atomic64_set(&counter->hw.prev_count, now); @@ -877,17 +908,23 @@ static void task_clock_perf_counter_update(struct perf_counter *counter) static void task_clock_perf_counter_read(struct perf_counter *counter) { - task_clock_perf_counter_update(counter); + u64 now = task_clock_perf_counter_val(counter, 1); + + task_clock_perf_counter_update(counter, now); } static void task_clock_perf_counter_enable(struct perf_counter *counter) { - atomic64_set(&counter->hw.prev_count, current->se.sum_exec_runtime); + u64 now = task_clock_perf_counter_val(counter, 0); + + atomic64_set(&counter->hw.prev_count, now); } static void task_clock_perf_counter_disable(struct perf_counter *counter) { - task_clock_perf_counter_update(counter); + u64 now = task_clock_perf_counter_val(counter, 0); + + task_clock_perf_counter_update(counter, now); } static const struct hw_perf_counter_ops perf_ops_task_clock = { @@ -1267,6 +1304,7 @@ __perf_counter_exit_task(struct task_struct *child, { struct perf_counter *parent_counter; u64 parent_val, child_val; + unsigned long flags; u64 perf_flags; /* @@ -1275,7 +1313,7 @@ __perf_counter_exit_task(struct task_struct *child, * Be careful about zapping the list - IRQ/NMI context * could still be processing it: */ - local_irq_disable(); + curr_rq_lock_irq_save(&flags); perf_flags = hw_perf_save_disable(); if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) { @@ -1294,7 +1332,7 @@ __perf_counter_exit_task(struct task_struct *child, list_del_init(&child_counter->list_entry); hw_perf_restore(perf_flags); - local_irq_enable(); + curr_rq_unlock_irq_restore(&flags); parent_counter = child_counter->parent; /* diff --git a/kernel/sched.c b/kernel/sched.c index 382cfdb..4d84ff4 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -638,7 +638,7 @@ static inline int cpu_of(struct rq *rq) #define task_rq(p) cpu_rq(task_cpu(p)) #define cpu_curr(cpu) (cpu_rq(cpu)->curr) -static inline void update_rq_clock(struct rq *rq) +inline void update_rq_clock(struct rq *rq) { rq->clock = sched_clock_cpu(cpu_of(rq)); } @@ -969,6 +969,26 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) } } +void curr_rq_lock_irq_save(unsigned long *flags) + __acquires(rq->lock) +{ + struct rq *rq; + + local_irq_save(*flags); + rq = cpu_rq(smp_processor_id()); + spin_lock(&rq->lock); +} + +void curr_rq_unlock_irq_restore(unsigned long *flags) + __releases(rq->lock) +{ + struct rq *rq; + + rq = cpu_rq(smp_processor_id()); + spin_unlock(&rq->lock); + local_irq_restore(*flags); +} + void task_rq_unlock_wait(struct task_struct *p) { struct rq *rq = task_rq(p); @@ -2558,7 +2578,6 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next) { fire_sched_out_preempt_notifiers(prev, next); - perf_counter_task_sched_out(prev, cpu_of(rq)); prepare_lock_switch(rq, next); prepare_arch_switch(next); } @@ -4093,6 +4112,29 @@ EXPORT_PER_CPU_SYMBOL(kstat); * Return any ns on the sched_clock that have not yet been banked in * @p in case that task is currently running. */ +unsigned long long __task_delta_exec(struct task_struct *p, int update) +{ + s64 delta_exec; + struct rq *rq; + + rq = task_rq(p); + WARN_ON_ONCE(!runqueue_is_locked()); + WARN_ON_ONCE(!task_current(rq, p)); + + if (update) + update_rq_clock(rq); + + delta_exec = rq->clock - p->se.exec_start; + + WARN_ON_ONCE(delta_exec < 0); + + return delta_exec; +} + +/* + * Return any ns on the sched_clock that have not yet been banked in + * @p in case that task is currently running. + */ unsigned long long task_delta_exec(struct task_struct *p) { unsigned long flags; @@ -4316,13 +4358,13 @@ void scheduler_tick(void) update_rq_clock(rq); update_cpu_load(rq); curr->sched_class->task_tick(rq, curr, 0); + perf_counter_task_tick(curr, cpu); spin_unlock(&rq->lock); #ifdef CONFIG_SMP rq->idle_at_tick = idle_cpu(cpu); trigger_load_balance(rq, cpu); #endif - perf_counter_task_tick(curr, cpu); } #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \ @@ -4512,6 +4554,7 @@ need_resched_nonpreemptible: if (likely(prev != next)) { sched_info_switch(prev, next); + perf_counter_task_sched_out(prev, cpu); rq->nr_switches++; rq->curr = next; -- cgit v0.10.2 From eef6cbf5844c620d9db9be99e4908cdf92492fb9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 19 Dec 2008 10:20:42 +0100 Subject: perfcounters: pull inherited counters Change counter inheritance from a 'push' to a 'pull' model: instead of child tasks pushing their final counts to the parent, reuse the wait4 infrastructure to pull counters as child tasks are exit-processed, much like how cutime/cstime is collected. Signed-off-by: Ingo Molnar diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 23fd890..54fa2fa 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -113,6 +113,14 @@ extern struct group_info init_groups; # define CAP_INIT_BSET CAP_INIT_EFF_SET #endif +#ifdef CONFIG_PERF_COUNTERS +# define INIT_PERF_COUNTERS(tsk) \ + .perf_counter_ctx.counter_list = \ + LIST_HEAD_INIT(tsk.perf_counter_ctx.counter_list), +#else +# define INIT_PERF_COUNTERS(tsk) +#endif + /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -180,6 +188,7 @@ extern struct group_info init_groups; INIT_IDS \ INIT_TRACE_IRQFLAGS \ INIT_LOCKDEP \ + INIT_PERF_COUNTERS(tsk) \ } diff --git a/kernel/exit.c b/kernel/exit.c index 244edfd..101b7ee 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -153,6 +153,9 @@ static void delayed_put_task_struct(struct rcu_head *rhp) { struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); +#ifdef CONFIG_PERF_COUNTERS + WARN_ON_ONCE(!list_empty(&tsk->perf_counter_ctx.counter_list)); +#endif trace_sched_process_free(tsk); put_task_struct(tsk); } @@ -922,12 +925,6 @@ static void exit_notify(struct task_struct *tsk, int group_dead) forget_original_parent(tsk); exit_task_namespaces(tsk); - /* - * Flush inherited counters to the parent - before the parent - * gets woken up by child-exit notifications. - */ - perf_counter_exit_task(tsk); - write_lock_irq(&tasklist_lock); if (group_dead) kill_orphaned_pgrp(tsk->group_leader, NULL); @@ -1122,12 +1119,6 @@ NORET_TYPE void do_exit(long code) if (tsk->splice_pipe) __free_pipe_info(tsk->splice_pipe); - /* - * These must happen late, after the PID is not - * hashed anymore, but still at a point that may sleep: - */ - perf_counter_exit_task(tsk); - preempt_disable(); /* causes final put_task_struct in finish_task_switch(). */ tsk->state = TASK_DEAD; @@ -1371,6 +1362,12 @@ static int wait_task_zombie(struct task_struct *p, int options, */ read_unlock(&tasklist_lock); + /* + * Flush inherited counters to the parent - before the parent + * gets woken up by child-exit notifications. + */ + perf_counter_exit_task(p); + retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; status = (p->signal->flags & SIGNAL_GROUP_EXIT) ? p->signal->group_exit_code : p->exit_code; -- cgit v0.10.2 From 78b6084c907cea15bb40a564b974e072f5163781 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 21 Dec 2008 15:07:49 +0100 Subject: perfcounters: fix init context lock Signed-off-by: Ingo Molnar diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 54fa2fa..467cff5 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -116,7 +116,9 @@ extern struct group_info init_groups; #ifdef CONFIG_PERF_COUNTERS # define INIT_PERF_COUNTERS(tsk) \ .perf_counter_ctx.counter_list = \ - LIST_HEAD_INIT(tsk.perf_counter_ctx.counter_list), + LIST_HEAD_INIT(tsk.perf_counter_ctx.counter_list), \ + .perf_counter_ctx.lock = \ + __SPIN_LOCK_UNLOCKED(tsk.perf_counter_ctx.lock), #else # define INIT_PERF_COUNTERS(tsk) #endif -- cgit v0.10.2 From 95cdd2e7851cce79ab839cb0b3cbe68d7911d0f1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 21 Dec 2008 13:50:42 +0100 Subject: perfcounters: enable lowlevel pmc code to schedule counters Allow lowlevel ->enable() op to return an error if a counter can not be added. This can be used to handle counter constraints. Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index b675571..74090a3 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -244,7 +244,7 @@ static int fixed_mode_idx(struct hw_perf_counter *hwc) /* * Find a PMC slot for the freshly enabled / scheduled in counter: */ -static void pmc_generic_enable(struct perf_counter *counter) +static int pmc_generic_enable(struct perf_counter *counter) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); struct hw_perf_counter *hwc = &counter->hw; @@ -253,6 +253,8 @@ static void pmc_generic_enable(struct perf_counter *counter) /* Try to get the previous counter again */ if (test_and_set_bit(idx, cpuc->used)) { idx = find_first_zero_bit(cpuc->used, nr_counters_generic); + if (idx == nr_counters_generic) + return -EAGAIN; set_bit(idx, cpuc->used); hwc->idx = idx; } @@ -265,6 +267,8 @@ static void pmc_generic_enable(struct perf_counter *counter) __hw_perf_counter_set_period(counter, hwc, idx); __pmc_generic_enable(counter, hwc, idx); + + return 0; } void perf_counter_print_debug(void) diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 48f76d2..53af11d 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -128,7 +128,7 @@ struct perf_counter; * struct hw_perf_counter_ops - performance counter hw ops */ struct hw_perf_counter_ops { - void (*enable) (struct perf_counter *counter); + int (*enable) (struct perf_counter *counter); void (*disable) (struct perf_counter *counter); void (*read) (struct perf_counter *counter); }; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index f1110ac..2e73929 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -355,21 +355,25 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu) cpuctx->task_ctx = NULL; } -static void +static int counter_sched_in(struct perf_counter *counter, struct perf_cpu_context *cpuctx, struct perf_counter_context *ctx, int cpu) { if (counter->state == PERF_COUNTER_STATE_OFF) - return; + return 0; + + if (counter->hw_ops->enable(counter)) + return -EAGAIN; - counter->hw_ops->enable(counter); counter->state = PERF_COUNTER_STATE_ACTIVE; counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */ cpuctx->active_oncpu++; ctx->nr_active++; + + return 0; } static int @@ -378,20 +382,38 @@ group_sched_in(struct perf_counter *group_counter, struct perf_counter_context *ctx, int cpu) { - struct perf_counter *counter; - int was_group = 0; + struct perf_counter *counter, *partial_group; + int ret = 0; - counter_sched_in(group_counter, cpuctx, ctx, cpu); + if (counter_sched_in(group_counter, cpuctx, ctx, cpu)) + return -EAGAIN; /* * Schedule in siblings as one group (if any): */ list_for_each_entry(counter, &group_counter->sibling_list, list_entry) { - counter_sched_in(counter, cpuctx, ctx, cpu); - was_group = 1; + if (counter_sched_in(counter, cpuctx, ctx, cpu)) { + partial_group = counter; + goto group_error; + } + ret = -EAGAIN; } - return was_group; + return ret; + +group_error: + /* + * Groups can be scheduled in as one unit only, so undo any + * partial group before returning: + */ + list_for_each_entry(counter, &group_counter->sibling_list, list_entry) { + if (counter == partial_group) + break; + counter_sched_out(counter, cpuctx, ctx); + } + counter_sched_out(group_counter, cpuctx, ctx); + + return -EAGAIN; } /* @@ -416,9 +438,6 @@ void perf_counter_task_sched_in(struct task_struct *task, int cpu) spin_lock(&ctx->lock); list_for_each_entry(counter, &ctx->counter_list, list_entry) { - if (ctx->nr_active == cpuctx->max_pertask) - break; - /* * Listen to the 'cpu' scheduling filter constraint * of counters: @@ -856,8 +875,9 @@ static const struct file_operations perf_fops = { .poll = perf_poll, }; -static void cpu_clock_perf_counter_enable(struct perf_counter *counter) +static int cpu_clock_perf_counter_enable(struct perf_counter *counter) { + return 0; } static void cpu_clock_perf_counter_disable(struct perf_counter *counter) @@ -913,11 +933,13 @@ static void task_clock_perf_counter_read(struct perf_counter *counter) task_clock_perf_counter_update(counter, now); } -static void task_clock_perf_counter_enable(struct perf_counter *counter) +static int task_clock_perf_counter_enable(struct perf_counter *counter) { u64 now = task_clock_perf_counter_val(counter, 0); atomic64_set(&counter->hw.prev_count, now); + + return 0; } static void task_clock_perf_counter_disable(struct perf_counter *counter) @@ -960,12 +982,14 @@ static void page_faults_perf_counter_read(struct perf_counter *counter) page_faults_perf_counter_update(counter); } -static void page_faults_perf_counter_enable(struct perf_counter *counter) +static int page_faults_perf_counter_enable(struct perf_counter *counter) { /* * page-faults is a per-task value already, * so we dont have to clear it on switch-in. */ + + return 0; } static void page_faults_perf_counter_disable(struct perf_counter *counter) @@ -1006,12 +1030,14 @@ static void context_switches_perf_counter_read(struct perf_counter *counter) context_switches_perf_counter_update(counter); } -static void context_switches_perf_counter_enable(struct perf_counter *counter) +static int context_switches_perf_counter_enable(struct perf_counter *counter) { /* * ->nvcsw + curr->nivcsw is a per-task value already, * so we dont have to clear it on switch-in. */ + + return 0; } static void context_switches_perf_counter_disable(struct perf_counter *counter) @@ -1050,12 +1076,14 @@ static void cpu_migrations_perf_counter_read(struct perf_counter *counter) cpu_migrations_perf_counter_update(counter); } -static void cpu_migrations_perf_counter_enable(struct perf_counter *counter) +static int cpu_migrations_perf_counter_enable(struct perf_counter *counter) { /* * se.nr_migrations is a per-task value already, * so we dont have to clear it on switch-in. */ + + return 0; } static void cpu_migrations_perf_counter_disable(struct perf_counter *counter) -- cgit v0.10.2 From 0dff86aa7b9ec65a6d07167b7afb050b5fc98ddc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 23 Dec 2008 12:28:12 +0100 Subject: x86, perfcounters: print out the ->used bitmask Impact: extend debug printouts Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 74090a3..f3359c2 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -255,6 +255,7 @@ static int pmc_generic_enable(struct perf_counter *counter) idx = find_first_zero_bit(cpuc->used, nr_counters_generic); if (idx == nr_counters_generic) return -EAGAIN; + set_bit(idx, cpuc->used); hwc->idx = idx; } @@ -274,6 +275,7 @@ static int pmc_generic_enable(struct perf_counter *counter) void perf_counter_print_debug(void) { u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left; + struct cpu_hw_counters *cpuc; int cpu, idx; if (!nr_counters_generic) @@ -282,6 +284,7 @@ void perf_counter_print_debug(void) local_irq_disable(); cpu = smp_processor_id(); + cpuc = &per_cpu(cpu_hw_counters, cpu); rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); @@ -291,6 +294,7 @@ void perf_counter_print_debug(void) printk(KERN_INFO "CPU#%d: ctrl: %016llx\n", cpu, ctrl); printk(KERN_INFO "CPU#%d: status: %016llx\n", cpu, status); printk(KERN_INFO "CPU#%d: overflow: %016llx\n", cpu, overflow); + printk(KERN_INFO "CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used); for (idx = 0; idx < nr_counters_generic; idx++) { rdmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, pmc_ctrl); -- cgit v0.10.2 From 8fe91e61cdc407c7556d3cd71cf20141a25bbcea Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 23 Dec 2008 12:29:25 +0100 Subject: perfcounters: remove ->nr_inherited Impact: remove dead code nr_inherited was not maintained correctly (not decremented) - and also not used - remove it. Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 53af11d..1ea08e9 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -164,7 +164,6 @@ struct perf_counter { struct task_struct *task; struct file *filp; - unsigned int nr_inherited; struct perf_counter *parent; /* * Protect attach/detach: diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 2e73929..48e1dbc 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1308,7 +1308,6 @@ inherit_counter(struct perf_counter *parent_counter, child_ctx->nr_counters++; child_counter->parent = parent_counter; - parent_counter->nr_inherited++; /* * inherit into child's child as well: */ -- cgit v0.10.2 From 235c7fc7c500e4fd1700c4ad01b5612bcdc1b449 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 21 Dec 2008 14:43:25 +0100 Subject: perfcounters: generalize the counter scheduler Impact: clean up and refactor code refactor the counter scheduler: separate out in/out functions and introduce a counter-rotation function as well. Signed-off-by: Ingo Molnar diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 48e1dbc..d7a79f3 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -111,11 +111,12 @@ static void __perf_counter_remove_from_context(void *info) spin_lock(&ctx->lock); if (counter->state == PERF_COUNTER_STATE_ACTIVE) { - counter->hw_ops->disable(counter); counter->state = PERF_COUNTER_STATE_INACTIVE; + counter->hw_ops->disable(counter); ctx->nr_active--; cpuctx->active_oncpu--; counter->task = NULL; + counter->oncpu = -1; } ctx->nr_counters--; @@ -192,8 +193,36 @@ retry: spin_unlock_irq(&ctx->lock); } +static int +counter_sched_in(struct perf_counter *counter, + struct perf_cpu_context *cpuctx, + struct perf_counter_context *ctx, + int cpu) +{ + if (counter->state == PERF_COUNTER_STATE_OFF) + return 0; + + counter->state = PERF_COUNTER_STATE_ACTIVE; + counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */ + /* + * The new state must be visible before we turn it on in the hardware: + */ + smp_wmb(); + + if (counter->hw_ops->enable(counter)) { + counter->state = PERF_COUNTER_STATE_INACTIVE; + counter->oncpu = -1; + return -EAGAIN; + } + + cpuctx->active_oncpu++; + ctx->nr_active++; + + return 0; +} + /* - * Cross CPU call to install and enable a preformance counter + * Cross CPU call to install and enable a performance counter */ static void __perf_install_in_context(void *info) { @@ -220,22 +249,17 @@ static void __perf_install_in_context(void *info) * counters on a global level. NOP for non NMI based counters. */ perf_flags = hw_perf_save_disable(); - list_add_counter(counter, ctx); - hw_perf_restore(perf_flags); + list_add_counter(counter, ctx); ctx->nr_counters++; - if (cpuctx->active_oncpu < perf_max_counters) { - counter->state = PERF_COUNTER_STATE_ACTIVE; - counter->oncpu = cpu; - ctx->nr_active++; - cpuctx->active_oncpu++; - counter->hw_ops->enable(counter); - } + counter_sched_in(counter, cpuctx, ctx, cpu); if (!ctx->task && cpuctx->max_pertask) cpuctx->max_pertask--; + hw_perf_restore(perf_flags); + spin_unlock(&ctx->lock); curr_rq_unlock_irq_restore(&flags); } @@ -302,8 +326,8 @@ counter_sched_out(struct perf_counter *counter, if (counter->state != PERF_COUNTER_STATE_ACTIVE) return; - counter->hw_ops->disable(counter); counter->state = PERF_COUNTER_STATE_INACTIVE; + counter->hw_ops->disable(counter); counter->oncpu = -1; cpuctx->active_oncpu--; @@ -326,6 +350,22 @@ group_sched_out(struct perf_counter *group_counter, counter_sched_out(counter, cpuctx, ctx); } +void __perf_counter_sched_out(struct perf_counter_context *ctx, + struct perf_cpu_context *cpuctx) +{ + struct perf_counter *counter; + + if (likely(!ctx->nr_counters)) + return; + + spin_lock(&ctx->lock); + if (ctx->nr_active) { + list_for_each_entry(counter, &ctx->counter_list, list_entry) + group_sched_out(counter, cpuctx, ctx); + } + spin_unlock(&ctx->lock); +} + /* * Called from scheduler to remove the counters of the current task, * with interrupts disabled. @@ -341,39 +381,18 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu) { struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); struct perf_counter_context *ctx = &task->perf_counter_ctx; - struct perf_counter *counter; if (likely(!cpuctx->task_ctx)) return; - spin_lock(&ctx->lock); - if (ctx->nr_active) { - list_for_each_entry(counter, &ctx->counter_list, list_entry) - group_sched_out(counter, cpuctx, ctx); - } - spin_unlock(&ctx->lock); + __perf_counter_sched_out(ctx, cpuctx); + cpuctx->task_ctx = NULL; } -static int -counter_sched_in(struct perf_counter *counter, - struct perf_cpu_context *cpuctx, - struct perf_counter_context *ctx, - int cpu) +static void perf_counter_cpu_sched_out(struct perf_cpu_context *cpuctx) { - if (counter->state == PERF_COUNTER_STATE_OFF) - return 0; - - if (counter->hw_ops->enable(counter)) - return -EAGAIN; - - counter->state = PERF_COUNTER_STATE_ACTIVE; - counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */ - - cpuctx->active_oncpu++; - ctx->nr_active++; - - return 0; + __perf_counter_sched_out(&cpuctx->ctx, cpuctx); } static int @@ -416,21 +435,10 @@ group_error: return -EAGAIN; } -/* - * Called from scheduler to add the counters of the current task - * with interrupts disabled. - * - * We restore the counter value and then enable it. - * - * This does not protect us against NMI, but enable() - * sets the enabled bit in the control field of counter _before_ - * accessing the counter control register. If a NMI hits, then it will - * keep the counter running. - */ -void perf_counter_task_sched_in(struct task_struct *task, int cpu) +static void +__perf_counter_sched_in(struct perf_counter_context *ctx, + struct perf_cpu_context *cpuctx, int cpu) { - struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); - struct perf_counter_context *ctx = &task->perf_counter_ctx; struct perf_counter *counter; if (likely(!ctx->nr_counters)) @@ -453,10 +461,35 @@ void perf_counter_task_sched_in(struct task_struct *task, int cpu) break; } spin_unlock(&ctx->lock); +} +/* + * Called from scheduler to add the counters of the current task + * with interrupts disabled. + * + * We restore the counter value and then enable it. + * + * This does not protect us against NMI, but enable() + * sets the enabled bit in the control field of counter _before_ + * accessing the counter control register. If a NMI hits, then it will + * keep the counter running. + */ +void perf_counter_task_sched_in(struct task_struct *task, int cpu) +{ + struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); + struct perf_counter_context *ctx = &task->perf_counter_ctx; + + __perf_counter_sched_in(ctx, cpuctx, cpu); cpuctx->task_ctx = ctx; } +static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu) +{ + struct perf_counter_context *ctx = &cpuctx->ctx; + + __perf_counter_sched_in(ctx, cpuctx, cpu); +} + int perf_counter_task_disable(void) { struct task_struct *curr = current; @@ -514,6 +547,8 @@ int perf_counter_task_enable(void) /* force the update of the task clock: */ __task_delta_exec(curr, 1); + perf_counter_task_sched_out(curr, cpu); + spin_lock(&ctx->lock); /* @@ -538,19 +573,18 @@ int perf_counter_task_enable(void) return 0; } -void perf_counter_task_tick(struct task_struct *curr, int cpu) +/* + * Round-robin a context's counters: + */ +static void rotate_ctx(struct perf_counter_context *ctx) { - struct perf_counter_context *ctx = &curr->perf_counter_ctx; struct perf_counter *counter; u64 perf_flags; - if (likely(!ctx->nr_counters)) + if (!ctx->nr_counters) return; - perf_counter_task_sched_out(curr, cpu); - spin_lock(&ctx->lock); - /* * Rotate the first entry last (works just fine for group counters too): */ @@ -563,7 +597,24 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) hw_perf_restore(perf_flags); spin_unlock(&ctx->lock); +} + +void perf_counter_task_tick(struct task_struct *curr, int cpu) +{ + struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); + struct perf_counter_context *ctx = &curr->perf_counter_ctx; + const int rotate_percpu = 0; + + if (rotate_percpu) + perf_counter_cpu_sched_out(cpuctx); + perf_counter_task_sched_out(curr, cpu); + if (rotate_percpu) + rotate_ctx(&cpuctx->ctx); + rotate_ctx(ctx); + + if (rotate_percpu) + perf_counter_cpu_sched_in(cpuctx, cpu); perf_counter_task_sched_in(curr, cpu); } @@ -905,8 +956,6 @@ static u64 task_clock_perf_counter_val(struct perf_counter *counter, int update) struct task_struct *curr = counter->task; u64 delta; - WARN_ON_ONCE(counter->task != current); - delta = __task_delta_exec(curr, update); return curr->se.sum_exec_runtime + delta; @@ -1160,6 +1209,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, counter->group_leader = group_leader; counter->hw_ops = NULL; + counter->state = PERF_COUNTER_STATE_INACTIVE; if (hw_event->disabled) counter->state = PERF_COUNTER_STATE_OFF; @@ -1331,35 +1381,49 @@ __perf_counter_exit_task(struct task_struct *child, { struct perf_counter *parent_counter; u64 parent_val, child_val; - unsigned long flags; - u64 perf_flags; /* - * Disable and unlink this counter. - * - * Be careful about zapping the list - IRQ/NMI context - * could still be processing it: + * If we do not self-reap then we have to wait for the + * child task to unschedule (it will happen for sure), + * so that its counter is at its final count. (This + * condition triggers rarely - child tasks usually get + * off their CPU before the parent has a chance to + * get this far into the reaping action) */ - curr_rq_lock_irq_save(&flags); - perf_flags = hw_perf_save_disable(); - - if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) { + if (child != current) { + wait_task_inactive(child, 0); + list_del_init(&child_counter->list_entry); + } else { struct perf_cpu_context *cpuctx; + unsigned long flags; + u64 perf_flags; + + /* + * Disable and unlink this counter. + * + * Be careful about zapping the list - IRQ/NMI context + * could still be processing it: + */ + curr_rq_lock_irq_save(&flags); + perf_flags = hw_perf_save_disable(); cpuctx = &__get_cpu_var(perf_cpu_context); - child_counter->hw_ops->disable(child_counter); - child_counter->state = PERF_COUNTER_STATE_INACTIVE; - child_counter->oncpu = -1; + if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) { + child_counter->state = PERF_COUNTER_STATE_INACTIVE; + child_counter->hw_ops->disable(child_counter); + cpuctx->active_oncpu--; + child_ctx->nr_active--; + child_counter->oncpu = -1; + } - cpuctx->active_oncpu--; - child_ctx->nr_active--; - } + list_del_init(&child_counter->list_entry); - list_del_init(&child_counter->list_entry); + child_ctx->nr_counters--; - hw_perf_restore(perf_flags); - curr_rq_unlock_irq_restore(&flags); + hw_perf_restore(perf_flags); + curr_rq_unlock_irq_restore(&flags); + } parent_counter = child_counter->parent; /* -- cgit v0.10.2 From f650a672359819454c3d8d4135ecd1558cde0b24 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 23 Dec 2008 12:17:29 +0100 Subject: perfcounters: add PERF_COUNT_BUS_CYCLES Generalize "bus cycles" hw events - and map them to CPU_CLK_Unhalted.Ref on x86. (which is a good enough approximation) Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index f3359c2..86b2fdd 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -41,12 +41,13 @@ static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters); static const int intel_perfmon_event_map[] = { - [PERF_COUNT_CYCLES] = 0x003c, + [PERF_COUNT_CPU_CYCLES] = 0x003c, [PERF_COUNT_INSTRUCTIONS] = 0x00c0, [PERF_COUNT_CACHE_REFERENCES] = 0x4f2e, [PERF_COUNT_CACHE_MISSES] = 0x412e, [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x00c4, [PERF_COUNT_BRANCH_MISSES] = 0x00c5, + [PERF_COUNT_BUS_CYCLES] = 0x013c, }; static const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map); diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 1ea08e9..ec77d16 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -36,14 +36,15 @@ enum hw_event_types { /* * Common hardware events, generalized by the kernel: */ - PERF_COUNT_CYCLES = 0, + PERF_COUNT_CPU_CYCLES = 0, PERF_COUNT_INSTRUCTIONS = 1, PERF_COUNT_CACHE_REFERENCES = 2, PERF_COUNT_CACHE_MISSES = 3, PERF_COUNT_BRANCH_INSTRUCTIONS = 4, PERF_COUNT_BRANCH_MISSES = 5, + PERF_COUNT_BUS_CYCLES = 6, - PERF_HW_EVENTS_MAX = 6, + PERF_HW_EVENTS_MAX = 7, /* * Special "software" counters provided by the kernel, even if -- cgit v0.10.2 From 2f18d1e8d07ae67dd0afce875287756d4bd31a46 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 22 Dec 2008 11:10:42 +0100 Subject: x86, perfcounters: add support for fixed-function pmcs Impact: extend performance counter support on x86 Intel CPUs Modern Intel CPUs have 3 "fixed-function" performance counters, which count these hardware events: Instr_Retired.Any CPU_CLK_Unhalted.Core CPU_CLK_Unhalted.Ref Add support for them to the performance counters subsystem. Their use is transparent to user-space: the counter scheduler is extended to automatically recognize the cases where a fixed-function PMC can be utilized instead of a generic PMC. In such cases the generic PMC is kept available for more counters. The above fixed-function events map to these generic counter hw events: PERF_COUNT_INSTRUCTIONS PERF_COUNT_CPU_CYCLES PERF_COUNT_BUS_CYCLES (The 'bus' cycles are in reality often CPU-ish cycles, just with a fixed frequency.) Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index 13745de..2e08ed7 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -23,6 +23,11 @@ #define ARCH_PERFMON_EVENTSEL_OS (1 << 17) #define ARCH_PERFMON_EVENTSEL_USR (1 << 16) +/* + * Includes eventsel and unit mask as well: + */ +#define ARCH_PERFMON_EVENT_MASK 0xffff + #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 @@ -69,12 +74,15 @@ union cpuid10_edx { /* Instr_Retired.Any: */ #define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 +#define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0) /* CPU_CLK_Unhalted.Core: */ #define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a +#define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1) /* CPU_CLK_Unhalted.Ref: */ #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b +#define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) #ifdef CONFIG_PERF_COUNTERS extern void init_hw_perf_counters(void); diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 86b2fdd..da46eca 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -26,6 +26,7 @@ static bool perf_counters_initialized __read_mostly; */ static int nr_counters_generic __read_mostly; static u64 perf_counter_mask __read_mostly; +static u64 counter_value_mask __read_mostly; static int nr_counters_fixed __read_mostly; @@ -120,9 +121,6 @@ static int __hw_perf_counter_init(struct perf_counter *counter) hwc->nmi = 1; } - hwc->config_base = MSR_ARCH_PERFMON_EVENTSEL0; - hwc->counter_base = MSR_ARCH_PERFMON_PERFCTR0; - hwc->irq_period = hw_event->irq_period; /* * Intel PMCs cannot be accessed sanely above 32 bit width, @@ -184,15 +182,33 @@ void hw_perf_restore(u64 ctrl) EXPORT_SYMBOL_GPL(hw_perf_restore); static inline void +__pmc_fixed_disable(struct perf_counter *counter, + struct hw_perf_counter *hwc, unsigned int __idx) +{ + int idx = __idx - X86_PMC_IDX_FIXED; + u64 ctrl_val, mask; + int err; + + mask = 0xfULL << (idx * 4); + + rdmsrl(hwc->config_base, ctrl_val); + ctrl_val &= ~mask; + err = checking_wrmsrl(hwc->config_base, ctrl_val); +} + +static inline void __pmc_generic_disable(struct perf_counter *counter, struct hw_perf_counter *hwc, unsigned int idx) { int err; + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) + return __pmc_fixed_disable(counter, hwc, idx); + err = wrmsr_safe(hwc->config_base + idx, hwc->config, 0); } -static DEFINE_PER_CPU(u64, prev_left[X86_PMC_MAX_GENERIC]); +static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]); /* * Set the next IRQ period, based on the hwc->period_left value. @@ -202,8 +218,9 @@ static void __hw_perf_counter_set_period(struct perf_counter *counter, struct hw_perf_counter *hwc, int idx) { - s32 left = atomic64_read(&hwc->period_left); + s64 left = atomic64_read(&hwc->period_left); s32 period = hwc->irq_period; + int err; /* * If we are way outside a reasoable range then just skip forward: @@ -224,21 +241,64 @@ __hw_perf_counter_set_period(struct perf_counter *counter, * The hw counter starts counting from this counter offset, * mark it to be able to extra future deltas: */ - atomic64_set(&hwc->prev_count, (u64)(s64)-left); + atomic64_set(&hwc->prev_count, (u64)-left); - wrmsr(hwc->counter_base + idx, -left, 0); + err = checking_wrmsrl(hwc->counter_base + idx, + (u64)(-left) & counter_value_mask); +} + +static inline void +__pmc_fixed_enable(struct perf_counter *counter, + struct hw_perf_counter *hwc, unsigned int __idx) +{ + int idx = __idx - X86_PMC_IDX_FIXED; + u64 ctrl_val, bits, mask; + int err; + + /* + * Enable IRQ generation (0x8) and ring-3 counting (0x2), + * and enable ring-0 counting if allowed: + */ + bits = 0x8ULL | 0x2ULL; + if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) + bits |= 0x1; + bits <<= (idx * 4); + mask = 0xfULL << (idx * 4); + + rdmsrl(hwc->config_base, ctrl_val); + ctrl_val &= ~mask; + ctrl_val |= bits; + err = checking_wrmsrl(hwc->config_base, ctrl_val); } static void __pmc_generic_enable(struct perf_counter *counter, struct hw_perf_counter *hwc, int idx) { + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) + return __pmc_fixed_enable(counter, hwc, idx); + wrmsr(hwc->config_base + idx, hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE, 0); } -static int fixed_mode_idx(struct hw_perf_counter *hwc) +static int +fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc) { + unsigned int event; + + if (unlikely(hwc->nmi)) + return -1; + + event = hwc->config & ARCH_PERFMON_EVENT_MASK; + + if (unlikely(event == intel_perfmon_event_map[PERF_COUNT_INSTRUCTIONS])) + return X86_PMC_IDX_FIXED_INSTRUCTIONS; + if (unlikely(event == intel_perfmon_event_map[PERF_COUNT_CPU_CYCLES])) + return X86_PMC_IDX_FIXED_CPU_CYCLES; + if (unlikely(event == intel_perfmon_event_map[PERF_COUNT_BUS_CYCLES])) + return X86_PMC_IDX_FIXED_BUS_CYCLES; + return -1; } @@ -249,16 +309,39 @@ static int pmc_generic_enable(struct perf_counter *counter) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); struct hw_perf_counter *hwc = &counter->hw; - int idx = hwc->idx; + int idx; - /* Try to get the previous counter again */ - if (test_and_set_bit(idx, cpuc->used)) { - idx = find_first_zero_bit(cpuc->used, nr_counters_generic); - if (idx == nr_counters_generic) - return -EAGAIN; + idx = fixed_mode_idx(counter, hwc); + if (idx >= 0) { + /* + * Try to get the fixed counter, if that is already taken + * then try to get a generic counter: + */ + if (test_and_set_bit(idx, cpuc->used)) + goto try_generic; - set_bit(idx, cpuc->used); + hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; + /* + * We set it so that counter_base + idx in wrmsr/rdmsr maps to + * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2: + */ + hwc->counter_base = + MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED; hwc->idx = idx; + } else { + idx = hwc->idx; + /* Try to get the previous generic counter again */ + if (test_and_set_bit(idx, cpuc->used)) { +try_generic: + idx = find_first_zero_bit(cpuc->used, nr_counters_generic); + if (idx == nr_counters_generic) + return -EAGAIN; + + set_bit(idx, cpuc->used); + hwc->idx = idx; + } + hwc->config_base = MSR_ARCH_PERFMON_EVENTSEL0; + hwc->counter_base = MSR_ARCH_PERFMON_PERFCTR0; } perf_counters_lapic_init(hwc->nmi); @@ -266,6 +349,10 @@ static int pmc_generic_enable(struct perf_counter *counter) __pmc_generic_disable(counter, hwc, idx); cpuc->counters[idx] = counter; + /* + * Make it visible before enabling the hw: + */ + smp_wmb(); __hw_perf_counter_set_period(counter, hwc, idx); __pmc_generic_enable(counter, hwc, idx); @@ -275,7 +362,7 @@ static int pmc_generic_enable(struct perf_counter *counter) void perf_counter_print_debug(void) { - u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left; + u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; struct cpu_hw_counters *cpuc; int cpu, idx; @@ -290,11 +377,13 @@ void perf_counter_print_debug(void) rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); + rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); printk(KERN_INFO "\n"); printk(KERN_INFO "CPU#%d: ctrl: %016llx\n", cpu, ctrl); printk(KERN_INFO "CPU#%d: status: %016llx\n", cpu, status); printk(KERN_INFO "CPU#%d: overflow: %016llx\n", cpu, overflow); + printk(KERN_INFO "CPU#%d: fixed: %016llx\n", cpu, fixed); printk(KERN_INFO "CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used); for (idx = 0; idx < nr_counters_generic; idx++) { @@ -303,13 +392,19 @@ void perf_counter_print_debug(void) prev_left = per_cpu(prev_left[idx], cpu); - printk(KERN_INFO "CPU#%d: PMC%d ctrl: %016llx\n", + printk(KERN_INFO "CPU#%d: gen-PMC%d ctrl: %016llx\n", cpu, idx, pmc_ctrl); - printk(KERN_INFO "CPU#%d: PMC%d count: %016llx\n", + printk(KERN_INFO "CPU#%d: gen-PMC%d count: %016llx\n", cpu, idx, pmc_count); - printk(KERN_INFO "CPU#%d: PMC%d left: %016llx\n", + printk(KERN_INFO "CPU#%d: gen-PMC%d left: %016llx\n", cpu, idx, prev_left); } + for (idx = 0; idx < nr_counters_fixed; idx++) { + rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); + + printk(KERN_INFO "CPU#%d: fixed-PMC%d count: %016llx\n", + cpu, idx, pmc_count); + } local_irq_enable(); } @@ -323,6 +418,11 @@ static void pmc_generic_disable(struct perf_counter *counter) clear_bit(idx, cpuc->used); cpuc->counters[idx] = NULL; + /* + * Make sure the cleared pointer becomes visible before we + * (potentially) free the counter: + */ + smp_wmb(); /* * Drain the remaining delta count out of a counter @@ -353,14 +453,11 @@ static void perf_save_and_restart(struct perf_counter *counter) { struct hw_perf_counter *hwc = &counter->hw; int idx = hwc->idx; - u64 pmc_ctrl; - - rdmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, pmc_ctrl); x86_perf_counter_update(counter, hwc, idx); __hw_perf_counter_set_period(counter, hwc, idx); - if (pmc_ctrl & ARCH_PERFMON_EVENTSEL0_ENABLE) + if (counter->state == PERF_COUNTER_STATE_ACTIVE) __pmc_generic_enable(counter, hwc, idx); } @@ -373,6 +470,7 @@ perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown) * Store sibling timestamps (if any): */ list_for_each_entry(counter, &group_leader->sibling_list, list_entry) { + x86_perf_counter_update(counter, &counter->hw, counter->hw.idx); perf_store_irq_data(sibling, counter->hw_event.type); perf_store_irq_data(sibling, atomic64_read(&counter->count)); @@ -403,7 +501,7 @@ static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi) again: ack = status; - for_each_bit(bit, (unsigned long *) &status, nr_counters_generic) { + for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { struct perf_counter *counter = cpuc->counters[bit]; clear_bit(bit, (unsigned long *) &status); @@ -561,6 +659,9 @@ void __init init_hw_perf_counters(void) perf_max_counters = nr_counters_generic; printk(KERN_INFO "... bit width: %d\n", eax.split.bit_width); + counter_value_mask = (1ULL << eax.split.bit_width) - 1; + printk(KERN_INFO "... value mask: %016Lx\n", counter_value_mask); + printk(KERN_INFO "... mask length: %d\n", eax.split.mask_length); nr_counters_fixed = edx.split.num_counters_fixed; -- cgit v0.10.2 From e44aef58ecfbe061eb4c53b939bcc35fb1bee82d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 25 Dec 2008 09:02:11 +0100 Subject: perfcounters: include asm/perf_counter.h only if CONFIG_PERF_COUNTERS=y Impact: build fix on ia64 KOSAKI Motohiro reported that -tip doesnt build on ia64 because asm/perf_counter.h only exists on x86 for now. Fix it. Reported-by: KOSAKI Motohiro Tested-by: KOSAKI Motohiro Acked-by: KOSAKI Motohiro Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index ec77d16..cc3a75a 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -14,7 +14,10 @@ #define _LINUX_PERF_COUNTER_H #include -#include + +#ifdef CONFIG_PERF_COUNTERS +# include +#endif #include #include -- cgit v0.10.2 From 01ea1ccaa24dea3552e103be13b7897211607a8b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 26 Dec 2008 21:05:06 -0800 Subject: perf_counter: more barrier in blank weak function Impact: fix panic possible panic Some versions of GCC inline the weak global function if it's empty. Add a barrier() to work it around. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index d7a79f3..37f7716 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -45,8 +45,8 @@ hw_perf_counter_init(struct perf_counter *counter) } u64 __weak hw_perf_save_disable(void) { return 0; } -void __weak hw_perf_restore(u64 ctrl) { } -void __weak hw_perf_counter_setup(void) { } +void __weak hw_perf_restore(u64 ctrl) { barrier(); } +void __weak hw_perf_counter_setup(void) { barrier(); } static void list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx) -- cgit v0.10.2 From 2b583d8bc8d7105b58d7481a4a0ceb718dac49c6 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 27 Dec 2008 19:15:43 +0530 Subject: x86: perf_counter remove unwanted hw_perf_enable_all Impact: clean, reduce kernel size a bit, avoid sparse warnings Fixes sparse warnings: arch/x86/kernel/cpu/perf_counter.c:153:6: warning: symbol 'hw_perf_enable_all' was not declared. Should it be static? arch/x86/kernel/cpu/perf_counter.c:279:3: warning: returning void-valued expression arch/x86/kernel/cpu/perf_counter.c:206:3: warning: returning void-valued expression arch/x86/kernel/cpu/perf_counter.c:206:3: warning: returning void-valued expression Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index da46eca..9376771 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -150,14 +150,6 @@ static int __hw_perf_counter_init(struct perf_counter *counter) return 0; } -void hw_perf_enable_all(void) -{ - if (unlikely(!perf_counters_initialized)) - return; - - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, perf_counter_mask); -} - u64 hw_perf_save_disable(void) { u64 ctrl; @@ -200,12 +192,10 @@ static inline void __pmc_generic_disable(struct perf_counter *counter, struct hw_perf_counter *hwc, unsigned int idx) { - int err; - if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) - return __pmc_fixed_disable(counter, hwc, idx); - - err = wrmsr_safe(hwc->config_base + idx, hwc->config, 0); + __pmc_fixed_disable(counter, hwc, idx); + else + wrmsr_safe(hwc->config_base + idx, hwc->config, 0); } static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]); @@ -276,10 +266,10 @@ __pmc_generic_enable(struct perf_counter *counter, struct hw_perf_counter *hwc, int idx) { if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) - return __pmc_fixed_enable(counter, hwc, idx); - - wrmsr(hwc->config_base + idx, - hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE, 0); + __pmc_fixed_enable(counter, hwc, idx); + else + wrmsr(hwc->config_base + idx, + hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE, 0); } static int -- cgit v0.10.2 From ff6f05416ece2caec1a7a1f8180d6598e0ab9272 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 9 Jan 2009 16:19:25 +1100 Subject: perf_counter: Fix return value from dummy hw_perf_counter_init Impact: fix oops-causing bug Currently, if you try to use perf_counters on an architecture that has no hardware support, and you select an event that doesn't map to any of the defined software counters, you get an oops rather than an error. This is because the dummy hw_perf_counter_init returns ERR_PTR(-EINVAL) but the caller (perf_counter_alloc) only tests for NULL. This makes the dummy hw_perf_counter_init return NULL instead. Signed-off-by: Paul Mackerras diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 37f7716..4be1a8d 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -41,7 +41,7 @@ static DEFINE_MUTEX(perf_resource_mutex); extern __weak const struct hw_perf_counter_ops * hw_perf_counter_init(struct perf_counter *counter) { - return ERR_PTR(-EINVAL); + return NULL; } u64 __weak hw_perf_save_disable(void) { return 0; } -- cgit v0.10.2 From 9abf8a08bc8f18a3b125f834f00e2e71b49c15d2 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 9 Jan 2009 16:26:43 +1100 Subject: perf_counter: Fix the cpu_clock software counter Impact: bug fix Currently if you do (e.g.) timec -e -1 ls, it will report 0 for the value of the cpu_clock counter. The reason is that the core assumes that a counter's count field is up-to-date when the counter is inactive, and doesn't call the counter's read function. However, the cpu_clock counter code only updates the count in the read function. This fixes it by making both the read and disable functions update the count. It also makes the counter ignore time passing while the counter is disabled, by making the enable function update the hw.prev_count field. Signed-off-by: Paul Mackerras diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 4be1a8d..b7a027a 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -928,18 +928,32 @@ static const struct file_operations perf_fops = { static int cpu_clock_perf_counter_enable(struct perf_counter *counter) { + int cpu = raw_smp_processor_id(); + + atomic64_set(&counter->hw.prev_count, cpu_clock(cpu)); return 0; } +static void cpu_clock_perf_counter_update(struct perf_counter *counter) +{ + int cpu = raw_smp_processor_id(); + s64 prev; + u64 now; + + now = cpu_clock(cpu); + prev = atomic64_read(&counter->hw.prev_count); + atomic64_set(&counter->hw.prev_count, now); + atomic64_add(now - prev, &counter->count); +} + static void cpu_clock_perf_counter_disable(struct perf_counter *counter) { + cpu_clock_perf_counter_update(counter); } static void cpu_clock_perf_counter_read(struct perf_counter *counter) { - int cpu = raw_smp_processor_id(); - - atomic64_set(&counter->count, cpu_clock(cpu)); + cpu_clock_perf_counter_update(counter); } static const struct hw_perf_counter_ops perf_ops_cpu_clock = { -- cgit v0.10.2 From 3cbed429a9ccdb7a243f733b1056fe5c39e9004c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 9 Jan 2009 16:43:42 +1100 Subject: perf_counter: Add optional hw_perf_group_sched_in arch function Impact: extend perf_counter infrastructure This adds an optional hw_perf_group_sched_in() arch function that enables a whole group of counters in one go. It returns 1 if it added the group successfully, 0 if it did nothing (and therefore the core needs to add the counters individually), or a negative number if an error occurred. It should add all the counters and enable any software counters in the group, or else add none of them and return an error. There are a couple of related changes/improvements in the group handling here: * As an optimization, group_sched_out() and group_sched_in() now check the state of the group leader, and do nothing if the leader is not active or disabled. * We now call hw_perf_save_disable/hw_perf_restore around the complete set of counter enable/disable calls in __perf_counter_sched_in/out, to give the arch code the opportunity to defer updating the hardware state until the hw_perf_restore call if it wants. * We no longer stop adding groups after we get to a group that has more than one counter. We will ultimately add an option for a group to be exclusive. The current code doesn't really implement exclusive groups anyway, since a group could end up going on with other counters that get added before it. Signed-off-by: Paul Mackerras diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index cc3a75a..b21d1ea 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -236,6 +236,9 @@ extern u64 hw_perf_save_disable(void); extern void hw_perf_restore(u64 ctrl); extern int perf_counter_task_disable(void); extern int perf_counter_task_enable(void); +extern int hw_perf_group_sched_in(struct perf_counter *group_leader, + struct perf_cpu_context *cpuctx, + struct perf_counter_context *ctx, int cpu); #else static inline void diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index b7a027a..9ad11e4 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -47,6 +47,12 @@ hw_perf_counter_init(struct perf_counter *counter) u64 __weak hw_perf_save_disable(void) { return 0; } void __weak hw_perf_restore(u64 ctrl) { barrier(); } void __weak hw_perf_counter_setup(void) { barrier(); } +int __weak hw_perf_group_sched_in(struct perf_counter *group_leader, + struct perf_cpu_context *cpuctx, + struct perf_counter_context *ctx, int cpu) +{ + return 0; +} static void list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx) @@ -341,6 +347,9 @@ group_sched_out(struct perf_counter *group_counter, { struct perf_counter *counter; + if (group_counter->state != PERF_COUNTER_STATE_ACTIVE) + return; + counter_sched_out(group_counter, cpuctx, ctx); /* @@ -354,15 +363,18 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx, struct perf_cpu_context *cpuctx) { struct perf_counter *counter; + u64 flags; if (likely(!ctx->nr_counters)) return; spin_lock(&ctx->lock); + flags = hw_perf_save_disable(); if (ctx->nr_active) { list_for_each_entry(counter, &ctx->counter_list, list_entry) group_sched_out(counter, cpuctx, ctx); } + hw_perf_restore(flags); spin_unlock(&ctx->lock); } @@ -402,7 +414,14 @@ group_sched_in(struct perf_counter *group_counter, int cpu) { struct perf_counter *counter, *partial_group; - int ret = 0; + int ret; + + if (group_counter->state == PERF_COUNTER_STATE_OFF) + return 0; + + ret = hw_perf_group_sched_in(group_counter, cpuctx, ctx, cpu); + if (ret) + return ret < 0 ? ret : 0; if (counter_sched_in(group_counter, cpuctx, ctx, cpu)) return -EAGAIN; @@ -415,10 +434,9 @@ group_sched_in(struct perf_counter *group_counter, partial_group = counter; goto group_error; } - ret = -EAGAIN; } - return ret; + return 0; group_error: /* @@ -440,11 +458,13 @@ __perf_counter_sched_in(struct perf_counter_context *ctx, struct perf_cpu_context *cpuctx, int cpu) { struct perf_counter *counter; + u64 flags; if (likely(!ctx->nr_counters)) return; spin_lock(&ctx->lock); + flags = hw_perf_save_disable(); list_for_each_entry(counter, &ctx->counter_list, list_entry) { /* * Listen to the 'cpu' scheduling filter constraint @@ -454,12 +474,13 @@ __perf_counter_sched_in(struct perf_counter_context *ctx, continue; /* - * If we scheduled in a group atomically and - * exclusively, break out: + * If we scheduled in a group atomically and exclusively, + * or if this group can't go on, break out: */ if (group_sched_in(counter, cpuctx, ctx, cpu)) break; } + hw_perf_restore(flags); spin_unlock(&ctx->lock); } -- cgit v0.10.2 From 4eb96fcfe07b7f2a05577e57533840f8e26bea53 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 9 Jan 2009 17:24:34 +1100 Subject: perf_counter: Add dummy perf_counter_print_debug function Impact: minimize requirements on architectures Currently, an architecture just enabling CONFIG_PERF_COUNTERS but not providing any extra functions will fail to build with perf_counter_print_debug being undefined, since we don't provide an empty dummy definition like we do with the hw_perf_* functions. This provides an empty dummy perf_counter_print_debug() to make it easier for architectures to turn on CONFIG_PERF_COUNTERS. Signed-off-by: Paul Mackerras diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 9ad11e4..4c0dccb 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -54,6 +54,8 @@ int __weak hw_perf_group_sched_in(struct perf_counter *group_leader, return 0; } +void __weak perf_counter_print_debug(void) { } + static void list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx) { -- cgit v0.10.2 From d662ed26734473d4cb5f3d78cebfec8f9126e97c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 9 Jan 2009 17:01:53 +1100 Subject: powerpc/perf_counter: Add perf_counter system call on powerpc ... with an empty/dummy asm/perf_counter.h so it builds. Signed-off-by: Paul Mackerras diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h new file mode 100644 index 0000000..59530ae --- /dev/null +++ b/arch/powerpc/include/asm/perf_counter.h @@ -0,0 +1,10 @@ +/* + * Performance counter support - PowerPC-specific definitions. + * + * Copyright 2008-2009 Paul Mackerras, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 803def23..da300c4 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -322,3 +322,4 @@ SYSCALL_SPU(epoll_create1) SYSCALL_SPU(dup3) SYSCALL_SPU(pipe2) SYSCALL(inotify_init1) +SYSCALL(perf_counter_open) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index e07d0c7..7cef5af 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -341,10 +341,11 @@ #define __NR_dup3 316 #define __NR_pipe2 317 #define __NR_inotify_init1 318 +#define __NR_perf_counter_open 319 #ifdef __KERNEL__ -#define __NR_syscalls 319 +#define __NR_syscalls 320 #define __NR__exit __NR_exit #define NR_syscalls __NR_syscalls diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 3d0c776..94dd1fb 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -1,6 +1,7 @@ config PPC64 bool "64-bit kernel" default n + select HAVE_PERF_COUNTERS help This option selects whether a 32-bit or a 64-bit kernel will be built. -- cgit v0.10.2 From 93a6d3ce6962044fe9badf528fed46b455d58292 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 9 Jan 2009 16:52:19 +1100 Subject: powerpc: Provide a way to defer perf counter work until interrupts are enabled Because 64-bit powerpc uses lazy (soft) interrupt disabling, it is possible for a performance monitor exception to come in when the kernel thinks interrupts are disabled (i.e. when they are soft-disabled but hard-enabled). In such a situation the performance monitor exception handler might have some processing to do (such as process wakeups) which can't be done in what is effectively an NMI handler. This provides a way to defer that work until interrupts get enabled, either in raw_local_irq_restore() or by returning from an interrupt handler to code that had interrupts enabled. We have a per-processor flag that indicates that there is work pending to do when interrupts subsequently get re-enabled. This flag is checked in the interrupt return path and in raw_local_irq_restore(), and if it is set, perf_counter_do_pending() is called to do the pending work. Signed-off-by: Paul Mackerras diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index f75a5fc..e10f151 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -131,5 +131,36 @@ static inline int irqs_disabled_flags(unsigned long flags) */ struct hw_interrupt_type; +#ifdef CONFIG_PERF_COUNTERS +static inline unsigned long get_perf_counter_pending(void) +{ + unsigned long x; + + asm volatile("lbz %0,%1(13)" + : "=r" (x) + : "i" (offsetof(struct paca_struct, perf_counter_pending))); + return x; +} + +static inline void set_perf_counter_pending(int x) +{ + asm volatile("stb %0,%1(13)" : : + "r" (x), + "i" (offsetof(struct paca_struct, perf_counter_pending))); +} + +extern void perf_counter_do_pending(void); + +#else + +static inline unsigned long get_perf_counter_pending(void) +{ + return 0; +} + +static inline void set_perf_counter_pending(int x) {} +static inline void perf_counter_do_pending(void) {} +#endif /* CONFIG_PERF_COUNTERS */ + #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_HW_IRQ_H */ diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 082b3ae..6ef0557 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -99,6 +99,7 @@ struct paca_struct { u8 soft_enabled; /* irq soft-enable flag */ u8 hard_enabled; /* set if irqs are enabled in MSR */ u8 io_sync; /* writel() needs spin_unlock sync */ + u8 perf_counter_pending; /* PM interrupt while soft-disabled */ /* Stuff for accurate time accounting */ u64 user_time; /* accumulated usermode TB ticks */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 661d07d..cea4629 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -127,6 +127,7 @@ int main(void) DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled)); + DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_counter_pending)); DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 383ed6e..f30b4e5 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -526,6 +526,15 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) 2: TRACE_AND_RESTORE_IRQ(r5); +#ifdef CONFIG_PERF_COUNTERS + /* check paca->perf_counter_pending if we're enabling ints */ + lbz r3,PACAPERFPEND(r13) + and. r3,r3,r5 + beq 27f + bl .perf_counter_do_pending +27: +#endif /* CONFIG_PERF_COUNTERS */ + /* extract EE bit and use it to restore paca->hard_enabled */ ld r3,_MSR(r1) rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index ac222d0..4efb886 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -104,6 +104,13 @@ static inline notrace void set_soft_enabled(unsigned long enable) : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled))); } +#ifdef CONFIG_PERF_COUNTERS +notrace void __weak perf_counter_do_pending(void) +{ + set_perf_counter_pending(0); +} +#endif + notrace void raw_local_irq_restore(unsigned long en) { /* @@ -135,6 +142,9 @@ notrace void raw_local_irq_restore(unsigned long en) iseries_handle_interrupts(); } + if (get_perf_counter_pending()) + perf_counter_do_pending(); + /* * if (get_paca()->hard_enabled) return; * But again we need to take care that gcc gets hard_enabled directly -- cgit v0.10.2 From 4574910e5087085a1f330ff8373cee4503f5c77c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 9 Jan 2009 20:21:55 +1100 Subject: powerpc/perf_counter: Add generic support for POWER-family PMU hardware This provides the architecture-specific functions needed to access PMU hardware on the 64-bit PowerPC processors. It has been designed for the IBM POWER family (POWER 4/4+/5/5+/6 and PPC970) but will hopefully also suit other 64-bit PowerPC machines (although probably not Cell given how different it is in this area). This doesn't include back-ends for any specific processors. This implements a system which allows back-ends to express the constraints that their hardware has on what events can be counted simultaneously. The constraints are expressed as a 64-bit mask + 64-bit value for each event, and the encoding is capable of expressing the constraints arising from having a set of multiplexers feeding an event bus, with some events being available through multiple multiplexer settings, such as we get on POWER4 and PPC970. Furthermore, the back-end can supply alternative event codes for each event, and the constraint checking code will try all possible combinations of alternative event codes to try to find a combination that will fit. Signed-off-by: Paul Mackerras diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h index 59530ae..9d7ff6d 100644 --- a/arch/powerpc/include/asm/perf_counter.h +++ b/arch/powerpc/include/asm/perf_counter.h @@ -8,3 +8,65 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ +#include + +#define MAX_HWCOUNTERS 8 +#define MAX_EVENT_ALTERNATIVES 8 + +/* + * This struct provides the constants and functions needed to + * describe the PMU on a particular POWER-family CPU. + */ +struct power_pmu { + int n_counter; + int max_alternatives; + u64 add_fields; + u64 test_adder; + int (*compute_mmcr)(unsigned int events[], int n_ev, + unsigned int hwc[], u64 mmcr[]); + int (*get_constraint)(unsigned int event, u64 *mskp, u64 *valp); + int (*get_alternatives)(unsigned int event, unsigned int alt[]); + void (*disable_pmc)(unsigned int pmc, u64 mmcr[]); + int n_generic; + int *generic_events; +}; + +extern struct power_pmu *ppmu; + +/* + * The power_pmu.get_constraint function returns a 64-bit value and + * a 64-bit mask that express the constraints between this event and + * other events. + * + * The value and mask are divided up into (non-overlapping) bitfields + * of three different types: + * + * Select field: this expresses the constraint that some set of bits + * in MMCR* needs to be set to a specific value for this event. For a + * select field, the mask contains 1s in every bit of the field, and + * the value contains a unique value for each possible setting of the + * MMCR* bits. The constraint checking code will ensure that two events + * that set the same field in their masks have the same value in their + * value dwords. + * + * Add field: this expresses the constraint that there can be at most + * N events in a particular class. A field of k bits can be used for + * N <= 2^(k-1) - 1. The mask has the most significant bit of the field + * set (and the other bits 0), and the value has only the least significant + * bit of the field set. In addition, the 'add_fields' and 'test_adder' + * in the struct power_pmu for this processor come into play. The + * add_fields value contains 1 in the LSB of the field, and the + * test_adder contains 2^(k-1) - 1 - N in the field. + * + * NAND field: this expresses the constraint that you may not have events + * in all of a set of classes. (For example, on PPC970, you can't select + * events from the FPU, ISU and IDU simultaneously, although any two are + * possible.) For N classes, the field is N+1 bits wide, and each class + * is assigned one bit from the least-significant N bits. The mask has + * only the most-significant bit set, and the value has only the bit + * for the event's class set. The test_adder has the least significant + * bit set in the field. + * + * If an event is not subject to the constraint expressed by a particular + * field, then it will have 0 in both the mask and value for that field. + */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 1308a86..fde190b 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -94,6 +94,7 @@ obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o +obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c new file mode 100644 index 0000000..c7d4c29 --- /dev/null +++ b/arch/powerpc/kernel/perf_counter.c @@ -0,0 +1,754 @@ +/* + * Performance counter support - powerpc architecture code + * + * Copyright 2008-2009 Paul Mackerras, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include + +struct cpu_hw_counters { + int n_counters; + int n_percpu; + int disabled; + int n_added; + struct perf_counter *counter[MAX_HWCOUNTERS]; + unsigned int events[MAX_HWCOUNTERS]; + u64 mmcr[3]; +}; +DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters); + +struct power_pmu *ppmu; + +void perf_counter_print_debug(void) +{ +} + +/* + * Return 1 for a software counter, 0 for a hardware counter + */ +static inline int is_software_counter(struct perf_counter *counter) +{ + return !counter->hw_event.raw && counter->hw_event.type < 0; +} + +/* + * Read one performance monitor counter (PMC). + */ +static unsigned long read_pmc(int idx) +{ + unsigned long val; + + switch (idx) { + case 1: + val = mfspr(SPRN_PMC1); + break; + case 2: + val = mfspr(SPRN_PMC2); + break; + case 3: + val = mfspr(SPRN_PMC3); + break; + case 4: + val = mfspr(SPRN_PMC4); + break; + case 5: + val = mfspr(SPRN_PMC5); + break; + case 6: + val = mfspr(SPRN_PMC6); + break; + case 7: + val = mfspr(SPRN_PMC7); + break; + case 8: + val = mfspr(SPRN_PMC8); + break; + default: + printk(KERN_ERR "oops trying to read PMC%d\n", idx); + val = 0; + } + return val; +} + +/* + * Write one PMC. + */ +static void write_pmc(int idx, unsigned long val) +{ + switch (idx) { + case 1: + mtspr(SPRN_PMC1, val); + break; + case 2: + mtspr(SPRN_PMC2, val); + break; + case 3: + mtspr(SPRN_PMC3, val); + break; + case 4: + mtspr(SPRN_PMC4, val); + break; + case 5: + mtspr(SPRN_PMC5, val); + break; + case 6: + mtspr(SPRN_PMC6, val); + break; + case 7: + mtspr(SPRN_PMC7, val); + break; + case 8: + mtspr(SPRN_PMC8, val); + break; + default: + printk(KERN_ERR "oops trying to write PMC%d\n", idx); + } +} + +/* + * Check if a set of events can all go on the PMU at once. + * If they can't, this will look at alternative codes for the events + * and see if any combination of alternative codes is feasible. + * The feasible set is returned in event[]. + */ +static int power_check_constraints(unsigned int event[], int n_ev) +{ + u64 mask, value, nv; + unsigned int alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; + u64 amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; + u64 avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; + u64 smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS]; + int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS]; + int i, j; + u64 addf = ppmu->add_fields; + u64 tadd = ppmu->test_adder; + + if (n_ev > ppmu->n_counter) + return -1; + + /* First see if the events will go on as-is */ + for (i = 0; i < n_ev; ++i) { + alternatives[i][0] = event[i]; + if (ppmu->get_constraint(event[i], &amasks[i][0], + &avalues[i][0])) + return -1; + choice[i] = 0; + } + value = mask = 0; + for (i = 0; i < n_ev; ++i) { + nv = (value | avalues[i][0]) + (value & avalues[i][0] & addf); + if ((((nv + tadd) ^ value) & mask) != 0 || + (((nv + tadd) ^ avalues[i][0]) & amasks[i][0]) != 0) + break; + value = nv; + mask |= amasks[i][0]; + } + if (i == n_ev) + return 0; /* all OK */ + + /* doesn't work, gather alternatives... */ + if (!ppmu->get_alternatives) + return -1; + for (i = 0; i < n_ev; ++i) { + n_alt[i] = ppmu->get_alternatives(event[i], alternatives[i]); + for (j = 1; j < n_alt[i]; ++j) + ppmu->get_constraint(alternatives[i][j], + &amasks[i][j], &avalues[i][j]); + } + + /* enumerate all possibilities and see if any will work */ + i = 0; + j = -1; + value = mask = nv = 0; + while (i < n_ev) { + if (j >= 0) { + /* we're backtracking, restore context */ + value = svalues[i]; + mask = smasks[i]; + j = choice[i]; + } + /* + * See if any alternative k for event i, + * where k > j, will satisfy the constraints. + */ + while (++j < n_alt[i]) { + nv = (value | avalues[i][j]) + + (value & avalues[i][j] & addf); + if ((((nv + tadd) ^ value) & mask) == 0 && + (((nv + tadd) ^ avalues[i][j]) + & amasks[i][j]) == 0) + break; + } + if (j >= n_alt[i]) { + /* + * No feasible alternative, backtrack + * to event i-1 and continue enumerating its + * alternatives from where we got up to. + */ + if (--i < 0) + return -1; + } else { + /* + * Found a feasible alternative for event i, + * remember where we got up to with this event, + * go on to the next event, and start with + * the first alternative for it. + */ + choice[i] = j; + svalues[i] = value; + smasks[i] = mask; + value = nv; + mask |= amasks[i][j]; + ++i; + j = -1; + } + } + + /* OK, we have a feasible combination, tell the caller the solution */ + for (i = 0; i < n_ev; ++i) + event[i] = alternatives[i][choice[i]]; + return 0; +} + +static void power_perf_read(struct perf_counter *counter) +{ + long val, delta, prev; + + if (!counter->hw.idx) + return; + /* + * Performance monitor interrupts come even when interrupts + * are soft-disabled, as long as interrupts are hard-enabled. + * Therefore we treat them like NMIs. + */ + do { + prev = atomic64_read(&counter->hw.prev_count); + barrier(); + val = read_pmc(counter->hw.idx); + } while (atomic64_cmpxchg(&counter->hw.prev_count, prev, val) != prev); + + /* The counters are only 32 bits wide */ + delta = (val - prev) & 0xfffffffful; + atomic64_add(delta, &counter->count); + atomic64_sub(delta, &counter->hw.period_left); +} + +/* + * Disable all counters to prevent PMU interrupts and to allow + * counters to be added or removed. + */ +u64 hw_perf_save_disable(void) +{ + struct cpu_hw_counters *cpuhw; + unsigned long ret; + unsigned long flags; + + local_irq_save(flags); + cpuhw = &__get_cpu_var(cpu_hw_counters); + + ret = cpuhw->disabled; + if (!ret) { + cpuhw->disabled = 1; + cpuhw->n_added = 0; + + /* + * Set the 'freeze counters' bit. + * The barrier is to make sure the mtspr has been + * executed and the PMU has frozen the counters + * before we return. + */ + mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC); + mb(); + } + local_irq_restore(flags); + return ret; +} + +/* + * Re-enable all counters if disable == 0. + * If we were previously disabled and counters were added, then + * put the new config on the PMU. + */ +void hw_perf_restore(u64 disable) +{ + struct perf_counter *counter; + struct cpu_hw_counters *cpuhw; + unsigned long flags; + long i; + unsigned long val; + s64 left; + unsigned int hwc_index[MAX_HWCOUNTERS]; + + if (disable) + return; + local_irq_save(flags); + cpuhw = &__get_cpu_var(cpu_hw_counters); + cpuhw->disabled = 0; + + /* + * If we didn't change anything, or only removed counters, + * no need to recalculate MMCR* settings and reset the PMCs. + * Just reenable the PMU with the current MMCR* settings + * (possibly updated for removal of counters). + */ + if (!cpuhw->n_added) { + mtspr(SPRN_MMCRA, cpuhw->mmcr[2]); + mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); + mtspr(SPRN_MMCR0, cpuhw->mmcr[0]); + goto out; + } + + /* + * Compute MMCR* values for the new set of counters + */ + if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_counters, hwc_index, + cpuhw->mmcr)) { + /* shouldn't ever get here */ + printk(KERN_ERR "oops compute_mmcr failed\n"); + goto out; + } + + /* + * Write the new configuration to MMCR* with the freeze + * bit set and set the hardware counters to their initial values. + * Then unfreeze the counters. + */ + mtspr(SPRN_MMCRA, cpuhw->mmcr[2]); + mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); + mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) + | MMCR0_FC); + + /* + * Read off any pre-existing counters that need to move + * to another PMC. + */ + for (i = 0; i < cpuhw->n_counters; ++i) { + counter = cpuhw->counter[i]; + if (counter->hw.idx && counter->hw.idx != hwc_index[i] + 1) { + power_perf_read(counter); + write_pmc(counter->hw.idx, 0); + counter->hw.idx = 0; + } + } + + /* + * Initialize the PMCs for all the new and moved counters. + */ + for (i = 0; i < cpuhw->n_counters; ++i) { + counter = cpuhw->counter[i]; + if (counter->hw.idx) + continue; + val = 0; + if (counter->hw_event.irq_period) { + left = atomic64_read(&counter->hw.period_left); + if (left < 0x80000000L) + val = 0x80000000L - left; + } + atomic64_set(&counter->hw.prev_count, val); + counter->hw.idx = hwc_index[i] + 1; + write_pmc(counter->hw.idx, val); + } + mb(); + cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; + mtspr(SPRN_MMCR0, cpuhw->mmcr[0]); + + out: + local_irq_restore(flags); +} + +static int collect_events(struct perf_counter *group, int max_count, + struct perf_counter *ctrs[], unsigned int *events) +{ + int n = 0; + struct perf_counter *counter; + + if (!is_software_counter(group)) { + if (n >= max_count) + return -1; + ctrs[n] = group; + events[n++] = group->hw.config; + } + list_for_each_entry(counter, &group->sibling_list, list_entry) { + if (!is_software_counter(counter) && + counter->state != PERF_COUNTER_STATE_OFF) { + if (n >= max_count) + return -1; + ctrs[n] = counter; + events[n++] = counter->hw.config; + } + } + return n; +} + +static void counter_sched_in(struct perf_counter *counter, int cpu) +{ + counter->state = PERF_COUNTER_STATE_ACTIVE; + counter->oncpu = cpu; + if (is_software_counter(counter)) + counter->hw_ops->enable(counter); +} + +/* + * Called to enable a whole group of counters. + * Returns 1 if the group was enabled, or -EAGAIN if it could not be. + * Assumes the caller has disabled interrupts and has + * frozen the PMU with hw_perf_save_disable. + */ +int hw_perf_group_sched_in(struct perf_counter *group_leader, + struct perf_cpu_context *cpuctx, + struct perf_counter_context *ctx, int cpu) +{ + struct cpu_hw_counters *cpuhw; + long i, n, n0; + struct perf_counter *sub; + + cpuhw = &__get_cpu_var(cpu_hw_counters); + n0 = cpuhw->n_counters; + n = collect_events(group_leader, ppmu->n_counter - n0, + &cpuhw->counter[n0], &cpuhw->events[n0]); + if (n < 0) + return -EAGAIN; + if (power_check_constraints(cpuhw->events, n + n0)) + return -EAGAIN; + cpuhw->n_counters = n0 + n; + cpuhw->n_added += n; + + /* + * OK, this group can go on; update counter states etc., + * and enable any software counters + */ + for (i = n0; i < n0 + n; ++i) + cpuhw->counter[i]->hw.config = cpuhw->events[i]; + n = 1; + counter_sched_in(group_leader, cpu); + list_for_each_entry(sub, &group_leader->sibling_list, list_entry) { + if (sub->state != PERF_COUNTER_STATE_OFF) { + counter_sched_in(sub, cpu); + ++n; + } + } + cpuctx->active_oncpu += n; + ctx->nr_active += n; + + return 1; +} + +/* + * Add a counter to the PMU. + * If all counters are not already frozen, then we disable and + * re-enable the PMU in order to get hw_perf_restore to do the + * actual work of reconfiguring the PMU. + */ +static int power_perf_enable(struct perf_counter *counter) +{ + struct cpu_hw_counters *cpuhw; + unsigned long flags; + u64 pmudis; + int n0; + int ret = -EAGAIN; + + local_irq_save(flags); + pmudis = hw_perf_save_disable(); + + /* + * Add the counter to the list (if there is room) + * and check whether the total set is still feasible. + */ + cpuhw = &__get_cpu_var(cpu_hw_counters); + n0 = cpuhw->n_counters; + if (n0 >= ppmu->n_counter) + goto out; + cpuhw->counter[n0] = counter; + cpuhw->events[n0] = counter->hw.config; + if (power_check_constraints(cpuhw->events, n0 + 1)) + goto out; + + counter->hw.config = cpuhw->events[n0]; + ++cpuhw->n_counters; + ++cpuhw->n_added; + + ret = 0; + out: + hw_perf_restore(pmudis); + local_irq_restore(flags); + return ret; +} + +/* + * Remove a counter from the PMU. + */ +static void power_perf_disable(struct perf_counter *counter) +{ + struct cpu_hw_counters *cpuhw; + long i; + u64 pmudis; + unsigned long flags; + + local_irq_save(flags); + pmudis = hw_perf_save_disable(); + + power_perf_read(counter); + + cpuhw = &__get_cpu_var(cpu_hw_counters); + for (i = 0; i < cpuhw->n_counters; ++i) { + if (counter == cpuhw->counter[i]) { + while (++i < cpuhw->n_counters) + cpuhw->counter[i-1] = cpuhw->counter[i]; + --cpuhw->n_counters; + ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr); + write_pmc(counter->hw.idx, 0); + counter->hw.idx = 0; + break; + } + } + if (cpuhw->n_counters == 0) { + /* disable exceptions if no counters are running */ + cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); + } + + hw_perf_restore(pmudis); + local_irq_restore(flags); +} + +struct hw_perf_counter_ops power_perf_ops = { + .enable = power_perf_enable, + .disable = power_perf_disable, + .read = power_perf_read +}; + +const struct hw_perf_counter_ops * +hw_perf_counter_init(struct perf_counter *counter) +{ + unsigned long ev; + struct perf_counter *ctrs[MAX_HWCOUNTERS]; + unsigned int events[MAX_HWCOUNTERS]; + int n; + + if (!ppmu) + return NULL; + if ((s64)counter->hw_event.irq_period < 0) + return NULL; + ev = counter->hw_event.type; + if (!counter->hw_event.raw) { + if (ev >= ppmu->n_generic || + ppmu->generic_events[ev] == 0) + return NULL; + ev = ppmu->generic_events[ev]; + } + counter->hw.config_base = ev; + counter->hw.idx = 0; + + /* + * If this is in a group, check if it can go on with all the + * other hardware counters in the group. We assume the counter + * hasn't been linked into its leader's sibling list at this point. + */ + n = 0; + if (counter->group_leader != counter) { + n = collect_events(counter->group_leader, ppmu->n_counter - 1, + ctrs, events); + if (n < 0) + return NULL; + } + events[n++] = ev; + if (power_check_constraints(events, n)) + return NULL; + + counter->hw.config = events[n - 1]; + atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period); + return &power_perf_ops; +} + +/* + * Handle wakeups. + */ +void perf_counter_do_pending(void) +{ + int i; + struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters); + struct perf_counter *counter; + + set_perf_counter_pending(0); + for (i = 0; i < cpuhw->n_counters; ++i) { + counter = cpuhw->counter[i]; + if (counter && counter->wakeup_pending) { + counter->wakeup_pending = 0; + wake_up(&counter->waitq); + } + } +} + +/* + * Record data for an irq counter. + * This function was lifted from the x86 code; maybe it should + * go in the core? + */ +static void perf_store_irq_data(struct perf_counter *counter, u64 data) +{ + struct perf_data *irqdata = counter->irqdata; + + if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) { + irqdata->overrun++; + } else { + u64 *p = (u64 *) &irqdata->data[irqdata->len]; + + *p = data; + irqdata->len += sizeof(u64); + } +} + +/* + * Record all the values of the counters in a group + */ +static void perf_handle_group(struct perf_counter *counter) +{ + struct perf_counter *leader, *sub; + + leader = counter->group_leader; + list_for_each_entry(sub, &leader->sibling_list, list_entry) { + if (sub != counter) + sub->hw_ops->read(sub); + perf_store_irq_data(counter, sub->hw_event.type); + perf_store_irq_data(counter, atomic64_read(&sub->count)); + } +} + +/* + * A counter has overflowed; update its count and record + * things if requested. Note that interrupts are hard-disabled + * here so there is no possibility of being interrupted. + */ +static void record_and_restart(struct perf_counter *counter, long val, + struct pt_regs *regs) +{ + s64 prev, delta, left; + int record = 0; + + /* we don't have to worry about interrupts here */ + prev = atomic64_read(&counter->hw.prev_count); + delta = (val - prev) & 0xfffffffful; + atomic64_add(delta, &counter->count); + + /* + * See if the total period for this counter has expired, + * and update for the next period. + */ + val = 0; + left = atomic64_read(&counter->hw.period_left) - delta; + if (counter->hw_event.irq_period) { + if (left <= 0) { + left += counter->hw_event.irq_period; + if (left <= 0) + left = counter->hw_event.irq_period; + record = 1; + } + if (left < 0x80000000L) + val = 0x80000000L - left; + } + write_pmc(counter->hw.idx, val); + atomic64_set(&counter->hw.prev_count, val); + atomic64_set(&counter->hw.period_left, left); + + /* + * Finally record data if requested. + */ + if (record) { + switch (counter->hw_event.record_type) { + case PERF_RECORD_SIMPLE: + break; + case PERF_RECORD_IRQ: + perf_store_irq_data(counter, instruction_pointer(regs)); + counter->wakeup_pending = 1; + break; + case PERF_RECORD_GROUP: + perf_handle_group(counter); + counter->wakeup_pending = 1; + break; + } + } +} + +/* + * Performance monitor interrupt stuff + */ +static void perf_counter_interrupt(struct pt_regs *regs) +{ + int i; + struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters); + struct perf_counter *counter; + long val; + int need_wakeup = 0, found = 0; + + for (i = 0; i < cpuhw->n_counters; ++i) { + counter = cpuhw->counter[i]; + val = read_pmc(counter->hw.idx); + if ((int)val < 0) { + /* counter has overflowed */ + found = 1; + record_and_restart(counter, val, regs); + if (counter->wakeup_pending) + need_wakeup = 1; + } + } + + /* + * In case we didn't find and reset the counter that caused + * the interrupt, scan all counters and reset any that are + * negative, to avoid getting continual interrupts. + * Any that we processed in the previous loop will not be negative. + */ + if (!found) { + for (i = 0; i < ppmu->n_counter; ++i) { + val = read_pmc(i + 1); + if ((int)val < 0) + write_pmc(i + 1, 0); + } + } + + /* + * Reset MMCR0 to its normal value. This will set PMXE and + * clear FC (freeze counters) and PMAO (perf mon alert occurred) + * and thus allow interrupts to occur again. + * XXX might want to use MSR.PM to keep the counters frozen until + * we get back out of this interrupt. + */ + mtspr(SPRN_MMCR0, cpuhw->mmcr[0]); + + /* + * If we need a wakeup, check whether interrupts were soft-enabled + * when we took the interrupt. If they were, we can wake stuff up + * immediately; otherwise we'll have to set a flag and do the + * wakeup when interrupts get soft-enabled. + */ + if (need_wakeup) { + if (regs->softe) { + irq_enter(); + perf_counter_do_pending(); + irq_exit(); + } else { + set_perf_counter_pending(1); + } + } +} + +static int init_perf_counters(void) +{ + if (reserve_pmc_hardware(perf_counter_interrupt)) { + printk(KERN_ERR "Couldn't init performance monitor subsystem\n"); + return -EBUSY; + } + + return 0; +} + +arch_initcall(init_perf_counters); -- cgit v0.10.2 From 16b067993dee3dfde61b20027e0b168dc06201ee Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sat, 10 Jan 2009 16:34:07 +1100 Subject: powerpc/perf_counter: Add support for PPC970 family This adds the back-end for the PMU on the PPC970 family. The PPC970 allows events from the ISU to be selected in two different ways. Rather than use alternative event codes to express this, we instead use a single encoding for ISU events and express the resulting constraint (that you can't select events from all three of FPU/IFU/VPU, ISU and IDU/STS at the same time, since they all come in through only 2 multiplexers) using a NAND constraint field, and work out which multiplexer is used for ISU events at compute_mmcr time. Signed-off-by: Paul Mackerras diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index fde190b..45798f6 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -94,7 +94,7 @@ obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o -obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o +obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o ppc970-pmu.o obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index c7d4c29..5561ecb 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -741,13 +741,26 @@ static void perf_counter_interrupt(struct pt_regs *regs) } } +extern struct power_pmu ppc970_pmu; + static int init_perf_counters(void) { + unsigned long pvr; + if (reserve_pmc_hardware(perf_counter_interrupt)) { printk(KERN_ERR "Couldn't init performance monitor subsystem\n"); return -EBUSY; } + /* XXX should get this from cputable */ + pvr = mfspr(SPRN_PVR); + switch (PVR_VER(pvr)) { + case PV_970: + case PV_970FX: + case PV_970MP: + ppmu = &ppc970_pmu; + break; + } return 0; } diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c new file mode 100644 index 0000000..c325658 --- /dev/null +++ b/arch/powerpc/kernel/ppc970-pmu.c @@ -0,0 +1,375 @@ +/* + * Performance counter support for PPC970-family processors. + * + * Copyright 2008-2009 Paul Mackerras, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include + +/* + * Bits in event code for PPC970 + */ +#define PM_PMC_SH 12 /* PMC number (1-based) for direct events */ +#define PM_PMC_MSK 0xf +#define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */ +#define PM_UNIT_MSK 0xf +#define PM_BYTE_SH 4 /* Byte number of event bus to use */ +#define PM_BYTE_MSK 3 +#define PM_PMCSEL_MSK 0xf + +/* Values in PM_UNIT field */ +#define PM_NONE 0 +#define PM_FPU 1 +#define PM_VPU 2 +#define PM_ISU 3 +#define PM_IFU 4 +#define PM_IDU 5 +#define PM_STS 6 +#define PM_LSU0 7 +#define PM_LSU1U 8 +#define PM_LSU1L 9 +#define PM_LASTUNIT 9 + +/* + * Bits in MMCR0 for PPC970 + */ +#define MMCR0_PMC1SEL_SH 8 +#define MMCR0_PMC2SEL_SH 1 +#define MMCR_PMCSEL_MSK 0x1f + +/* + * Bits in MMCR1 for PPC970 + */ +#define MMCR1_TTM0SEL_SH 62 +#define MMCR1_TTM1SEL_SH 59 +#define MMCR1_TTM3SEL_SH 53 +#define MMCR1_TTMSEL_MSK 3 +#define MMCR1_TD_CP_DBG0SEL_SH 50 +#define MMCR1_TD_CP_DBG1SEL_SH 48 +#define MMCR1_TD_CP_DBG2SEL_SH 46 +#define MMCR1_TD_CP_DBG3SEL_SH 44 +#define MMCR1_PMC1_ADDER_SEL_SH 39 +#define MMCR1_PMC2_ADDER_SEL_SH 38 +#define MMCR1_PMC6_ADDER_SEL_SH 37 +#define MMCR1_PMC5_ADDER_SEL_SH 36 +#define MMCR1_PMC8_ADDER_SEL_SH 35 +#define MMCR1_PMC7_ADDER_SEL_SH 34 +#define MMCR1_PMC3_ADDER_SEL_SH 33 +#define MMCR1_PMC4_ADDER_SEL_SH 32 +#define MMCR1_PMC3SEL_SH 27 +#define MMCR1_PMC4SEL_SH 22 +#define MMCR1_PMC5SEL_SH 17 +#define MMCR1_PMC6SEL_SH 12 +#define MMCR1_PMC7SEL_SH 7 +#define MMCR1_PMC8SEL_SH 2 + +static short mmcr1_adder_bits[8] = { + MMCR1_PMC1_ADDER_SEL_SH, + MMCR1_PMC2_ADDER_SEL_SH, + MMCR1_PMC3_ADDER_SEL_SH, + MMCR1_PMC4_ADDER_SEL_SH, + MMCR1_PMC5_ADDER_SEL_SH, + MMCR1_PMC6_ADDER_SEL_SH, + MMCR1_PMC7_ADDER_SEL_SH, + MMCR1_PMC8_ADDER_SEL_SH +}; + +/* + * Bits in MMCRA + */ + +/* + * Layout of constraint bits: + * 6666555555555544444444443333333333222222222211111111110000000000 + * 3210987654321098765432109876543210987654321098765432109876543210 + * <><>[ >[ >[ >< >< >< >< ><><><><><><><><> + * T0T1 UC PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8 + * + * T0 - TTM0 constraint + * 46-47: TTM0SEL value (0=FPU, 2=IFU, 3=VPU) 0xC000_0000_0000 + * + * T1 - TTM1 constraint + * 44-45: TTM1SEL value (0=IDU, 3=STS) 0x3000_0000_0000 + * + * UC - unit constraint: can't have all three of FPU|IFU|VPU, ISU, IDU|STS + * 43: UC3 error 0x0800_0000_0000 + * 42: FPU|IFU|VPU events needed 0x0400_0000_0000 + * 41: ISU events needed 0x0200_0000_0000 + * 40: IDU|STS events needed 0x0100_0000_0000 + * + * PS1 + * 39: PS1 error 0x0080_0000_0000 + * 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000 + * + * PS2 + * 35: PS2 error 0x0008_0000_0000 + * 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000 + * + * B0 + * 28-31: Byte 0 event source 0xf000_0000 + * Encoding as for the event code + * + * B1, B2, B3 + * 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources + * + * P1 + * 15: P1 error 0x8000 + * 14-15: Count of events needing PMC1 + * + * P2..P8 + * 0-13: Count of events needing PMC2..PMC8 + */ + +/* Masks and values for using events from the various units */ +static u64 unit_cons[PM_LASTUNIT+1][2] = { + [PM_FPU] = { 0xc80000000000ull, 0x040000000000ull }, + [PM_VPU] = { 0xc80000000000ull, 0xc40000000000ull }, + [PM_ISU] = { 0x080000000000ull, 0x020000000000ull }, + [PM_IFU] = { 0xc80000000000ull, 0x840000000000ull }, + [PM_IDU] = { 0x380000000000ull, 0x010000000000ull }, + [PM_STS] = { 0x380000000000ull, 0x310000000000ull }, +}; + +static int p970_get_constraint(unsigned int event, u64 *maskp, u64 *valp) +{ + int pmc, byte, unit, sh; + u64 mask = 0, value = 0; + int grp = -1; + + pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; + if (pmc) { + if (pmc > 8) + return -1; + sh = (pmc - 1) * 2; + mask |= 2 << sh; + value |= 1 << sh; + grp = ((pmc - 1) >> 1) & 1; + } + unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; + if (unit) { + if (unit > PM_LASTUNIT) + return -1; + mask |= unit_cons[unit][0]; + value |= unit_cons[unit][1]; + byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; + /* + * Bus events on bytes 0 and 2 can be counted + * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8. + */ + if (!pmc) + grp = byte & 1; + /* Set byte lane select field */ + mask |= 0xfULL << (28 - 4 * byte); + value |= (u64)unit << (28 - 4 * byte); + } + if (grp == 0) { + /* increment PMC1/2/5/6 field */ + mask |= 0x8000000000ull; + value |= 0x1000000000ull; + } else if (grp == 1) { + /* increment PMC3/4/7/8 field */ + mask |= 0x800000000ull; + value |= 0x100000000ull; + } + *maskp = mask; + *valp = value; + return 0; +} + +static int p970_get_alternatives(unsigned int event, unsigned int alt[]) +{ + alt[0] = event; + + /* 2 alternatives for LSU empty */ + if (event == 0x2002 || event == 0x3002) { + alt[1] = event ^ 0x1000; + return 2; + } + + return 1; +} + +static int p970_compute_mmcr(unsigned int event[], int n_ev, + unsigned int hwc[], u64 mmcr[]) +{ + u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0; + unsigned int pmc, unit, byte, psel; + unsigned int ttm, grp; + unsigned int pmc_inuse = 0; + unsigned int pmc_grp_use[2]; + unsigned char busbyte[4]; + unsigned char unituse[16]; + unsigned char unitmap[] = { 0, 0<<3, 3<<3, 1<<3, 2<<3, 0|4, 3|4 }; + unsigned char ttmuse[2]; + unsigned char pmcsel[8]; + int i; + + if (n_ev > 8) + return -1; + + /* First pass to count resource use */ + pmc_grp_use[0] = pmc_grp_use[1] = 0; + memset(busbyte, 0, sizeof(busbyte)); + memset(unituse, 0, sizeof(unituse)); + for (i = 0; i < n_ev; ++i) { + pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; + if (pmc) { + if (pmc_inuse & (1 << (pmc - 1))) + return -1; + pmc_inuse |= 1 << (pmc - 1); + /* count 1/2/5/6 vs 3/4/7/8 use */ + ++pmc_grp_use[((pmc - 1) >> 1) & 1]; + } + unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; + byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; + if (unit) { + if (unit > PM_LASTUNIT) + return -1; + if (!pmc) + ++pmc_grp_use[byte & 1]; + if (busbyte[byte] && busbyte[byte] != unit) + return -1; + busbyte[byte] = unit; + unituse[unit] = 1; + } + } + if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4) + return -1; + + /* + * Assign resources and set multiplexer selects. + * + * PM_ISU can go either on TTM0 or TTM1, but that's the only + * choice we have to deal with. + */ + if (unituse[PM_ISU] & + (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_VPU])) + unitmap[PM_ISU] = 2 | 4; /* move ISU to TTM1 */ + /* Set TTM[01]SEL fields. */ + ttmuse[0] = ttmuse[1] = 0; + for (i = PM_FPU; i <= PM_STS; ++i) { + if (!unituse[i]) + continue; + ttm = unitmap[i]; + ++ttmuse[(ttm >> 2) & 1]; + mmcr1 |= (u64)(ttm & ~4) << MMCR1_TTM1SEL_SH; + } + /* Check only one unit per TTMx */ + if (ttmuse[0] > 1 || ttmuse[1] > 1) + return -1; + + /* Set byte lane select fields and TTM3SEL. */ + for (byte = 0; byte < 4; ++byte) { + unit = busbyte[byte]; + if (!unit) + continue; + if (unit <= PM_STS) + ttm = (unitmap[unit] >> 2) & 1; + else if (unit == PM_LSU0) + ttm = 2; + else { + ttm = 3; + if (unit == PM_LSU1L && byte >= 2) + mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte); + } + mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); + } + + /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ + memset(pmcsel, 0x8, sizeof(pmcsel)); /* 8 means don't count */ + for (i = 0; i < n_ev; ++i) { + pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; + unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; + byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; + psel = event[i] & PM_PMCSEL_MSK; + if (!pmc) { + /* Bus event or any-PMC direct event */ + if (unit) + psel |= 0x10 | ((byte & 2) << 2); + else + psel |= 8; + for (pmc = 0; pmc < 8; ++pmc) { + if (pmc_inuse & (1 << pmc)) + continue; + grp = (pmc >> 1) & 1; + if (unit) { + if (grp == (byte & 1)) + break; + } else if (pmc_grp_use[grp] < 4) { + ++pmc_grp_use[grp]; + break; + } + } + pmc_inuse |= 1 << pmc; + } else { + /* Direct event */ + --pmc; + if (psel == 0 && (byte & 2)) + /* add events on higher-numbered bus */ + mmcr1 |= 1ull << mmcr1_adder_bits[pmc]; + } + pmcsel[pmc] = psel; + hwc[i] = pmc; + } + for (pmc = 0; pmc < 2; ++pmc) + mmcr0 |= pmcsel[pmc] << (MMCR0_PMC1SEL_SH - 7 * pmc); + for (; pmc < 8; ++pmc) + mmcr1 |= (u64)pmcsel[pmc] << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)); + if (pmc_inuse & 1) + mmcr0 |= MMCR0_PMC1CE; + if (pmc_inuse & 0xfe) + mmcr0 |= MMCR0_PMCjCE; + + mmcra |= 0x2000; /* mark only one IOP per PPC instruction */ + + /* Return MMCRx values */ + mmcr[0] = mmcr0; + mmcr[1] = mmcr1; + mmcr[2] = mmcra; + return 0; +} + +static void p970_disable_pmc(unsigned int pmc, u64 mmcr[]) +{ + int shift, i; + + if (pmc <= 1) { + shift = MMCR0_PMC1SEL_SH - 7 * pmc; + i = 0; + } else { + shift = MMCR1_PMC3SEL_SH - 5 * (pmc - 2); + i = 1; + } + /* + * Setting the PMCxSEL field to 0x08 disables PMC x. + */ + mmcr[i] = (mmcr[i] & ~(0x1fUL << shift)) | (0x08UL << shift); +} + +static int ppc970_generic_events[] = { + [PERF_COUNT_CPU_CYCLES] = 7, + [PERF_COUNT_INSTRUCTIONS] = 1, + [PERF_COUNT_CACHE_REFERENCES] = 0x8810, /* PM_LD_REF_L1 */ + [PERF_COUNT_CACHE_MISSES] = 0x3810, /* PM_LD_MISS_L1 */ + [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x431, /* PM_BR_ISSUED */ + [PERF_COUNT_BRANCH_MISSES] = 0x327, /* PM_GRP_BR_MPRED */ +}; + +struct power_pmu ppc970_pmu = { + .n_counter = 8, + .max_alternatives = 2, + .add_fields = 0x001100005555ull, + .test_adder = 0x013300000000ull, + .compute_mmcr = p970_compute_mmcr, + .get_constraint = p970_get_constraint, + .get_alternatives = p970_get_alternatives, + .disable_pmc = p970_disable_pmc, + .n_generic = ARRAY_SIZE(ppc970_generic_events), + .generic_events = ppc970_generic_events, +}; -- cgit v0.10.2 From f78628374a13bc150db77c6e02d4f2c0a7f932ef Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 9 Jan 2009 21:05:35 +1100 Subject: powerpc/perf_counter: Add support for POWER6 This adds the back-end for the PMU on the POWER6 processor. Fortunately, the event selection hardware is somewhat simpler on POWER6 than on other POWER family processors, so the constraints fit into only 32 bits. Signed-off-by: Paul Mackerras diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 45798f6..0ebf4d0 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -94,7 +94,7 @@ obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o -obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o ppc970-pmu.o +obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o ppc970-pmu.o power6-pmu.o obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 5561ecb..df3fe05 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -742,6 +742,7 @@ static void perf_counter_interrupt(struct pt_regs *regs) } extern struct power_pmu ppc970_pmu; +extern struct power_pmu power6_pmu; static int init_perf_counters(void) { @@ -760,6 +761,9 @@ static int init_perf_counters(void) case PV_970MP: ppmu = &ppc970_pmu; break; + case 0x3e: + ppmu = &power6_pmu; + break; } return 0; } diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c new file mode 100644 index 0000000..b1f61f3 --- /dev/null +++ b/arch/powerpc/kernel/power6-pmu.c @@ -0,0 +1,283 @@ +/* + * Performance counter support for POWER6 processors. + * + * Copyright 2008-2009 Paul Mackerras, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include + +/* + * Bits in event code for POWER6 + */ +#define PM_PMC_SH 20 /* PMC number (1-based) for direct events */ +#define PM_PMC_MSK 0x7 +#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH) +#define PM_UNIT_SH 16 /* Unit event comes (TTMxSEL encoding) */ +#define PM_UNIT_MSK 0xf +#define PM_UNIT_MSKS (PM_UNIT_MSK << PM_UNIT_SH) +#define PM_LLAV 0x8000 /* Load lookahead match value */ +#define PM_LLA 0x4000 /* Load lookahead match enable */ +#define PM_BYTE_SH 12 /* Byte of event bus to use */ +#define PM_BYTE_MSK 3 +#define PM_SUBUNIT_SH 8 /* Subunit event comes from (NEST_SEL enc.) */ +#define PM_SUBUNIT_MSK 7 +#define PM_SUBUNIT_MSKS (PM_SUBUNIT_MSK << PM_SUBUNIT_SH) +#define PM_PMCSEL_MSK 0xff /* PMCxSEL value */ +#define PM_BUSEVENT_MSK 0xf3700 + +/* + * Bits in MMCR1 for POWER6 + */ +#define MMCR1_TTM0SEL_SH 60 +#define MMCR1_TTMSEL_SH(n) (MMCR1_TTM0SEL_SH - (n) * 4) +#define MMCR1_TTMSEL_MSK 0xf +#define MMCR1_TTMSEL(m, n) (((m) >> MMCR1_TTMSEL_SH(n)) & MMCR1_TTMSEL_MSK) +#define MMCR1_NESTSEL_SH 45 +#define MMCR1_NESTSEL_MSK 0x7 +#define MMCR1_NESTSEL(m) (((m) >> MMCR1_NESTSEL_SH) & MMCR1_NESTSEL_MSK) +#define MMCR1_PMC1_LLA ((u64)1 << 44) +#define MMCR1_PMC1_LLA_VALUE ((u64)1 << 39) +#define MMCR1_PMC1_ADDR_SEL ((u64)1 << 35) +#define MMCR1_PMC1SEL_SH 24 +#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) +#define MMCR1_PMCSEL_MSK 0xff + +/* + * Assign PMC numbers and compute MMCR1 value for a set of events + */ +static int p6_compute_mmcr(unsigned int event[], int n_ev, + unsigned int hwc[], u64 mmcr[]) +{ + u64 mmcr1 = 0; + int i; + unsigned int pmc, ev, b, u, s, psel; + unsigned int ttmset = 0; + unsigned int pmc_inuse = 0; + + if (n_ev > 4) + return -1; + for (i = 0; i < n_ev; ++i) { + pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; + if (pmc) { + if (pmc_inuse & (1 << (pmc - 1))) + return -1; /* collision! */ + pmc_inuse |= 1 << (pmc - 1); + } + } + for (i = 0; i < n_ev; ++i) { + ev = event[i]; + pmc = (ev >> PM_PMC_SH) & PM_PMC_MSK; + if (pmc) { + --pmc; + } else { + /* can go on any PMC; find a free one */ + for (pmc = 0; pmc < 4; ++pmc) + if (!(pmc_inuse & (1 << pmc))) + break; + pmc_inuse |= 1 << pmc; + } + hwc[i] = pmc; + psel = ev & PM_PMCSEL_MSK; + if (ev & PM_BUSEVENT_MSK) { + /* this event uses the event bus */ + b = (ev >> PM_BYTE_SH) & PM_BYTE_MSK; + u = (ev >> PM_UNIT_SH) & PM_UNIT_MSK; + /* check for conflict on this byte of event bus */ + if ((ttmset & (1 << b)) && MMCR1_TTMSEL(mmcr1, b) != u) + return -1; + mmcr1 |= (u64)u << MMCR1_TTMSEL_SH(b); + ttmset |= 1 << b; + if (u == 5) { + /* Nest events have a further mux */ + s = (ev >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK; + if ((ttmset & 0x10) && + MMCR1_NESTSEL(mmcr1) != s) + return -1; + ttmset |= 0x10; + mmcr1 |= (u64)s << MMCR1_NESTSEL_SH; + } + if (0x30 <= psel && psel <= 0x3d) { + /* these need the PMCx_ADDR_SEL bits */ + if (b >= 2) + mmcr1 |= MMCR1_PMC1_ADDR_SEL >> pmc; + } + /* bus select values are different for PMC3/4 */ + if (pmc >= 2 && (psel & 0x90) == 0x80) + psel ^= 0x20; + } + if (ev & PM_LLA) { + mmcr1 |= MMCR1_PMC1_LLA >> pmc; + if (ev & PM_LLAV) + mmcr1 |= MMCR1_PMC1_LLA_VALUE >> pmc; + } + mmcr1 |= (u64)psel << MMCR1_PMCSEL_SH(pmc); + } + mmcr[0] = 0; + if (pmc_inuse & 1) + mmcr[0] = MMCR0_PMC1CE; + if (pmc_inuse & 0xe) + mmcr[0] |= MMCR0_PMCjCE; + mmcr[1] = mmcr1; + mmcr[2] = 0; + return 0; +} + +/* + * Layout of constraint bits: + * + * 0-1 add field: number of uses of PMC1 (max 1) + * 2-3, 4-5, 6-7: ditto for PMC2, 3, 4 + * 8-10 select field: nest (subunit) event selector + * 16-19 select field: unit on byte 0 of event bus + * 20-23, 24-27, 28-31 ditto for bytes 1, 2, 3 + */ +static int p6_get_constraint(unsigned int event, u64 *maskp, u64 *valp) +{ + int pmc, byte, sh; + unsigned int mask = 0, value = 0; + + pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; + if (pmc) { + if (pmc > 4) + return -1; + sh = (pmc - 1) * 2; + mask |= 2 << sh; + value |= 1 << sh; + } + if (event & PM_BUSEVENT_MSK) { + byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; + sh = byte * 4; + mask |= PM_UNIT_MSKS << sh; + value |= (event & PM_UNIT_MSKS) << sh; + if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) { + mask |= PM_SUBUNIT_MSKS; + value |= event & PM_SUBUNIT_MSKS; + } + } + *maskp = mask; + *valp = value; + return 0; +} + +#define MAX_ALT 4 /* at most 4 alternatives for any event */ + +static const unsigned int event_alternatives[][MAX_ALT] = { + { 0x0130e8, 0x2000f6, 0x3000fc }, /* PM_PTEG_RELOAD_VALID */ + { 0x080080, 0x10000d, 0x30000c, 0x4000f0 }, /* PM_LD_MISS_L1 */ + { 0x080088, 0x200054, 0x3000f0 }, /* PM_ST_MISS_L1 */ + { 0x10000a, 0x2000f4 }, /* PM_RUN_CYC */ + { 0x10000b, 0x2000f5 }, /* PM_RUN_COUNT */ + { 0x10000e, 0x400010 }, /* PM_PURR */ + { 0x100010, 0x4000f8 }, /* PM_FLUSH */ + { 0x10001a, 0x200010 }, /* PM_MRK_INST_DISP */ + { 0x100026, 0x3000f8 }, /* PM_TB_BIT_TRANS */ + { 0x100054, 0x2000f0 }, /* PM_ST_FIN */ + { 0x100056, 0x2000fc }, /* PM_L1_ICACHE_MISS */ + { 0x1000f0, 0x40000a }, /* PM_INST_IMC_MATCH_CMPL */ + { 0x1000f8, 0x200008 }, /* PM_GCT_EMPTY_CYC */ + { 0x1000fc, 0x400006 }, /* PM_LSU_DERAT_MISS_CYC */ + { 0x20000e, 0x400007 }, /* PM_LSU_DERAT_MISS */ + { 0x200012, 0x300012 }, /* PM_INST_DISP */ + { 0x2000f2, 0x3000f2 }, /* PM_INST_DISP */ + { 0x2000f8, 0x300010 }, /* PM_EXT_INT */ + { 0x2000fe, 0x300056 }, /* PM_DATA_FROM_L2MISS */ + { 0x2d0030, 0x30001a }, /* PM_MRK_FPU_FIN */ + { 0x30000a, 0x400018 }, /* PM_MRK_INST_FIN */ + { 0x3000f6, 0x40000e }, /* PM_L1_DCACHE_RELOAD_VALID */ + { 0x3000fe, 0x400056 }, /* PM_DATA_FROM_L3MISS */ +}; + +/* + * This could be made more efficient with a binary search on + * a presorted list, if necessary + */ +static int find_alternatives_list(unsigned int event) +{ + int i, j; + unsigned int alt; + + for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { + if (event < event_alternatives[i][0]) + return -1; + for (j = 0; j < MAX_ALT; ++j) { + alt = event_alternatives[i][j]; + if (!alt || event < alt) + break; + if (event == alt) + return i; + } + } + return -1; +} + +static int p6_get_alternatives(unsigned int event, unsigned int alt[]) +{ + int i, j; + unsigned int aevent, psel, pmc; + unsigned int nalt = 1; + + alt[0] = event; + + /* check the alternatives table */ + i = find_alternatives_list(event); + if (i >= 0) { + /* copy out alternatives from list */ + for (j = 0; j < MAX_ALT; ++j) { + aevent = event_alternatives[i][j]; + if (!aevent) + break; + if (aevent != event) + alt[nalt++] = aevent; + } + + } else { + /* Check for alternative ways of computing sum events */ + /* PMCSEL 0x32 counter N == PMCSEL 0x34 counter 5-N */ + psel = event & (PM_PMCSEL_MSK & ~1); /* ignore edge bit */ + pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; + if (pmc && (psel == 0x32 || psel == 0x34)) + alt[nalt++] = ((event ^ 0x6) & ~PM_PMC_MSKS) | + ((5 - pmc) << PM_PMC_SH); + + /* PMCSEL 0x38 counter N == PMCSEL 0x3a counter N+/-2 */ + if (pmc && (psel == 0x38 || psel == 0x3a)) + alt[nalt++] = ((event ^ 0x2) & ~PM_PMC_MSKS) | + ((pmc > 2? pmc - 2: pmc + 2) << PM_PMC_SH); + } + + return nalt; +} + +static void p6_disable_pmc(unsigned int pmc, u64 mmcr[]) +{ + /* Set PMCxSEL to 0 to disable PMCx */ + mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc)); +} + +static int power6_generic_events[] = { + [PERF_COUNT_CPU_CYCLES] = 0x1e, + [PERF_COUNT_INSTRUCTIONS] = 2, + [PERF_COUNT_CACHE_REFERENCES] = 0x280030, /* LD_REF_L1 */ + [PERF_COUNT_CACHE_MISSES] = 0x30000c, /* LD_MISS_L1 */ + [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x410a0, /* BR_PRED */ + [PERF_COUNT_BRANCH_MISSES] = 0x400052, /* BR_MPRED */ +}; + +struct power_pmu power6_pmu = { + .n_counter = 4, + .max_alternatives = MAX_ALT, + .add_fields = 0x55, + .test_adder = 0, + .compute_mmcr = p6_compute_mmcr, + .get_constraint = p6_get_constraint, + .get_alternatives = p6_get_alternatives, + .disable_pmc = p6_disable_pmc, + .n_generic = ARRAY_SIZE(power6_generic_events), + .generic_events = power6_generic_events, +}; -- cgit v0.10.2 From dd0e6ba22ea21bcc2c420b385a170593c58f4c08 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 12 Jan 2009 15:11:00 +1100 Subject: perf_counter: Always schedule all software counters in Software counters aren't subject to the limitations imposed by the fixed number of hardware counter registers, so there is no reason not to enable them all in __perf_counter_sched_in. Previously we used to break out of the loop when we got to a group that wouldn't fit on the PMU; with this we continue through the list but only schedule in software counters (or groups containing only software counters) from there on. Signed-off-by: Paul Mackerras diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 4c0dccb..3aef306 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -455,12 +455,37 @@ group_error: return -EAGAIN; } +/* + * Return 1 for a software counter, 0 for a hardware counter + */ +static inline int is_software_counter(struct perf_counter *counter) +{ + return !counter->hw_event.raw && counter->hw_event.type < 0; +} + +/* + * Return 1 for a group consisting entirely of software counters, + * 0 if the group contains any hardware counters. + */ +static int is_software_only_group(struct perf_counter *leader) +{ + struct perf_counter *counter; + + if (!is_software_counter(leader)) + return 0; + list_for_each_entry(counter, &leader->sibling_list, list_entry) + if (!is_software_counter(counter)) + return 0; + return 1; +} + static void __perf_counter_sched_in(struct perf_counter_context *ctx, struct perf_cpu_context *cpuctx, int cpu) { struct perf_counter *counter; u64 flags; + int can_add_hw = 1; if (likely(!ctx->nr_counters)) return; @@ -477,10 +502,12 @@ __perf_counter_sched_in(struct perf_counter_context *ctx, /* * If we scheduled in a group atomically and exclusively, - * or if this group can't go on, break out: + * or if this group can't go on, don't add any more + * hardware counters. */ - if (group_sched_in(counter, cpuctx, ctx, cpu)) - break; + if (can_add_hw || is_software_only_group(counter)) + if (group_sched_in(counter, cpuctx, ctx, cpu)) + can_add_hw = 0; } hw_perf_restore(flags); spin_unlock(&ctx->lock); -- cgit v0.10.2 From 01d0287f068de2934109ba9b989d8807526cccc2 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 14 Jan 2009 13:44:19 +1100 Subject: powerpc/perf_counter: Make sure PMU gets enabled properly This makes sure that we call the platform-specific ppc_md.enable_pmcs function on each CPU before we try to use the PMU on that CPU. If the CPU goes off-line and then on-line, we need to do the enable_pmcs call again, so we use the hw_perf_counter_setup hook to ensure that. It gets called as each CPU comes online, but it isn't called on the CPU that is coming up, so this adds the CPU number as an argument to it (there were no non-empty instances of hw_perf_counter_setup before). This also arranges to set the pmcregs_in_use field of the lppaca (data structure shared with the hypervisor) on each CPU when we are using the PMU and clear it when we are not. This allows the hypervisor to optimize partition switches by not saving/restoring the PMU registers when we aren't using the PMU. Signed-off-by: Paul Mackerras diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index df3fe05..85ad259 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -15,6 +15,7 @@ #include #include #include +#include struct cpu_hw_counters { int n_counters; @@ -24,6 +25,7 @@ struct cpu_hw_counters { struct perf_counter *counter[MAX_HWCOUNTERS]; unsigned int events[MAX_HWCOUNTERS]; u64 mmcr[3]; + u8 pmcs_enabled; }; DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters); @@ -262,6 +264,15 @@ u64 hw_perf_save_disable(void) cpuhw->n_added = 0; /* + * Check if we ever enabled the PMU on this cpu. + */ + if (!cpuhw->pmcs_enabled) { + if (ppc_md.enable_pmcs) + ppc_md.enable_pmcs(); + cpuhw->pmcs_enabled = 1; + } + + /* * Set the 'freeze counters' bit. * The barrier is to make sure the mtspr has been * executed and the PMU has frozen the counters @@ -305,6 +316,8 @@ void hw_perf_restore(u64 disable) mtspr(SPRN_MMCRA, cpuhw->mmcr[2]); mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); mtspr(SPRN_MMCR0, cpuhw->mmcr[0]); + if (cpuhw->n_counters == 0) + get_lppaca()->pmcregs_in_use = 0; goto out; } @@ -323,6 +336,7 @@ void hw_perf_restore(u64 disable) * bit set and set the hardware counters to their initial values. * Then unfreeze the counters. */ + get_lppaca()->pmcregs_in_use = 1; mtspr(SPRN_MMCRA, cpuhw->mmcr[2]); mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) @@ -741,6 +755,14 @@ static void perf_counter_interrupt(struct pt_regs *regs) } } +void hw_perf_counter_setup(int cpu) +{ + struct cpu_hw_counters *cpuhw = &per_cpu(cpu_hw_counters, cpu); + + memset(cpuhw, 0, sizeof(*cpuhw)); + cpuhw->mmcr[0] = MMCR0_FC; +} + extern struct power_pmu ppc970_pmu; extern struct power_pmu power6_pmu; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 3aef306..52f2f52 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -46,7 +46,7 @@ hw_perf_counter_init(struct perf_counter *counter) u64 __weak hw_perf_save_disable(void) { return 0; } void __weak hw_perf_restore(u64 ctrl) { barrier(); } -void __weak hw_perf_counter_setup(void) { barrier(); } +void __weak hw_perf_counter_setup(int cpu) { barrier(); } int __weak hw_perf_group_sched_in(struct perf_counter *group_leader, struct perf_cpu_context *cpuctx, struct perf_counter_context *ctx, int cpu) @@ -1598,7 +1598,7 @@ static void __cpuinit perf_counter_init_cpu(int cpu) cpuctx->max_pertask = perf_max_counters - perf_reserved_percpu; mutex_unlock(&perf_resource_mutex); - hw_perf_counter_setup(); + hw_perf_counter_setup(cpu); } #ifdef CONFIG_HOTPLUG_CPU -- cgit v0.10.2 From 3b6f9e5cb21964b7ce12bf81076f830885563ec8 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 14 Jan 2009 21:00:30 +1100 Subject: perf_counter: Add support for pinned and exclusive counter groups Impact: New perf_counter features A pinned counter group is one that the user wants to have on the CPU whenever possible, i.e. whenever the associated task is running, for a per-task group, or always for a per-cpu group. If the system cannot satisfy that, it puts the group into an error state where it is not scheduled any more and reads from it return EOF (i.e. 0 bytes read). The group can be released from error state and made readable again using prctl(PR_TASK_PERF_COUNTERS_ENABLE). When we have finer-grained enable/disable controls on counters we'll be able to reset the error state on individual groups. An exclusive group is one that the user wants to be the only group using the CPU performance monitor hardware whenever it is on. The counter group scheduler will not schedule an exclusive group if there are already other groups on the CPU and will not schedule other groups onto the CPU if there is an exclusive group scheduled (that statement does not apply to groups containing only software counters, which can always go on and which do not prevent an exclusive group from going on). With an exclusive group, we will be able to let users program PMU registers at a low level without the concern that those settings will perturb other measurements. Along the way this reorganizes things a little: - is_software_counter() is moved to perf_counter.h. - cpuctx->active_oncpu now records the number of hardware counters on the CPU, i.e. it now excludes software counters. Nothing was reading cpuctx->active_oncpu before, so this change is harmless. - A new cpuctx->exclusive field records whether we currently have an exclusive group on the CPU. - counter_sched_out moves higher up in perf_counter.c and gets called from __perf_counter_remove_from_context and __perf_counter_exit_task, where we used to have essentially the same code. - __perf_counter_sched_in now goes through the counter list twice, doing the pinned counters in the first loop and the non-pinned counters in the second loop, in order to give the pinned counters the best chance to be scheduled in. Note that only a group leader can be exclusive or pinned, and that attribute applies to the whole group. This avoids some awkwardness in some corner cases (e.g. where a group leader is closed and the other group members get added to the context list). If we want to relax that restriction later, we can, and it is easier to relax a restriction than to apply a new one. This doesn't yet handle the case where a pinned counter is inherited and goes into error state in the child - the error state is not propagated up to the parent when the child exits, and arguably it should. Signed-off-by: Paul Mackerras diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 85ad259..5b02113 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -36,14 +36,6 @@ void perf_counter_print_debug(void) } /* - * Return 1 for a software counter, 0 for a hardware counter - */ -static inline int is_software_counter(struct perf_counter *counter) -{ - return !counter->hw_event.raw && counter->hw_event.type < 0; -} - -/* * Read one performance monitor counter (PMC). */ static unsigned long read_pmc(int idx) @@ -443,6 +435,7 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader, */ for (i = n0; i < n0 + n; ++i) cpuhw->counter[i]->hw.config = cpuhw->events[i]; + cpuctx->active_oncpu += n; n = 1; counter_sched_in(group_leader, cpu); list_for_each_entry(sub, &group_leader->sibling_list, list_entry) { @@ -451,7 +444,6 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader, ++n; } } - cpuctx->active_oncpu += n; ctx->nr_active += n; return 1; diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index b21d1ea..7ab8e5f 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -86,7 +86,10 @@ struct perf_counter_hw_event { nmi : 1, /* NMI sampling */ raw : 1, /* raw event type */ inherit : 1, /* children inherit it */ - __reserved_1 : 28; + pinned : 1, /* must always be on PMU */ + exclusive : 1, /* only counter on PMU */ + + __reserved_1 : 26; u64 __reserved_2; }; @@ -141,6 +144,7 @@ struct hw_perf_counter_ops { * enum perf_counter_active_state - the states of a counter */ enum perf_counter_active_state { + PERF_COUNTER_STATE_ERROR = -2, PERF_COUNTER_STATE_OFF = -1, PERF_COUNTER_STATE_INACTIVE = 0, PERF_COUNTER_STATE_ACTIVE = 1, @@ -214,6 +218,7 @@ struct perf_cpu_context { struct perf_counter_context *task_ctx; int active_oncpu; int max_pertask; + int exclusive; }; /* @@ -240,6 +245,14 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader, struct perf_cpu_context *cpuctx, struct perf_counter_context *ctx, int cpu); +/* + * Return 1 for a software counter, 0 for a hardware counter + */ +static inline int is_software_counter(struct perf_counter *counter) +{ + return !counter->hw_event.raw && counter->hw_event.type < 0; +} + #else static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 52f2f52..faf671b 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -93,6 +93,25 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx) } } +static void +counter_sched_out(struct perf_counter *counter, + struct perf_cpu_context *cpuctx, + struct perf_counter_context *ctx) +{ + if (counter->state != PERF_COUNTER_STATE_ACTIVE) + return; + + counter->state = PERF_COUNTER_STATE_INACTIVE; + counter->hw_ops->disable(counter); + counter->oncpu = -1; + + if (!is_software_counter(counter)) + cpuctx->active_oncpu--; + ctx->nr_active--; + if (counter->hw_event.exclusive || !cpuctx->active_oncpu) + cpuctx->exclusive = 0; +} + /* * Cross CPU call to remove a performance counter * @@ -118,14 +137,9 @@ static void __perf_counter_remove_from_context(void *info) curr_rq_lock_irq_save(&flags); spin_lock(&ctx->lock); - if (counter->state == PERF_COUNTER_STATE_ACTIVE) { - counter->state = PERF_COUNTER_STATE_INACTIVE; - counter->hw_ops->disable(counter); - ctx->nr_active--; - cpuctx->active_oncpu--; - counter->task = NULL; - counter->oncpu = -1; - } + counter_sched_out(counter, cpuctx, ctx); + + counter->task = NULL; ctx->nr_counters--; /* @@ -207,7 +221,7 @@ counter_sched_in(struct perf_counter *counter, struct perf_counter_context *ctx, int cpu) { - if (counter->state == PERF_COUNTER_STATE_OFF) + if (counter->state <= PERF_COUNTER_STATE_OFF) return 0; counter->state = PERF_COUNTER_STATE_ACTIVE; @@ -223,13 +237,64 @@ counter_sched_in(struct perf_counter *counter, return -EAGAIN; } - cpuctx->active_oncpu++; + if (!is_software_counter(counter)) + cpuctx->active_oncpu++; ctx->nr_active++; + if (counter->hw_event.exclusive) + cpuctx->exclusive = 1; + return 0; } /* + * Return 1 for a group consisting entirely of software counters, + * 0 if the group contains any hardware counters. + */ +static int is_software_only_group(struct perf_counter *leader) +{ + struct perf_counter *counter; + + if (!is_software_counter(leader)) + return 0; + list_for_each_entry(counter, &leader->sibling_list, list_entry) + if (!is_software_counter(counter)) + return 0; + return 1; +} + +/* + * Work out whether we can put this counter group on the CPU now. + */ +static int group_can_go_on(struct perf_counter *counter, + struct perf_cpu_context *cpuctx, + int can_add_hw) +{ + /* + * Groups consisting entirely of software counters can always go on. + */ + if (is_software_only_group(counter)) + return 1; + /* + * If an exclusive group is already on, no other hardware + * counters can go on. + */ + if (cpuctx->exclusive) + return 0; + /* + * If this group is exclusive and there are already + * counters on the CPU, it can't go on. + */ + if (counter->hw_event.exclusive && cpuctx->active_oncpu) + return 0; + /* + * Otherwise, try to add it if all previous groups were able + * to go on. + */ + return can_add_hw; +} + +/* * Cross CPU call to install and enable a performance counter */ static void __perf_install_in_context(void *info) @@ -240,6 +305,7 @@ static void __perf_install_in_context(void *info) int cpu = smp_processor_id(); unsigned long flags; u64 perf_flags; + int err; /* * If this is a task context, we need to check whether it is @@ -261,9 +327,21 @@ static void __perf_install_in_context(void *info) list_add_counter(counter, ctx); ctx->nr_counters++; - counter_sched_in(counter, cpuctx, ctx, cpu); + /* + * An exclusive counter can't go on if there are already active + * hardware counters, and no hardware counter can go on if there + * is already an exclusive counter on. + */ + if (counter->state == PERF_COUNTER_STATE_INACTIVE && + !group_can_go_on(counter, cpuctx, 1)) + err = -EEXIST; + else + err = counter_sched_in(counter, cpuctx, ctx, cpu); + + if (err && counter->hw_event.pinned) + counter->state = PERF_COUNTER_STATE_ERROR; - if (!ctx->task && cpuctx->max_pertask) + if (!err && !ctx->task && cpuctx->max_pertask) cpuctx->max_pertask--; hw_perf_restore(perf_flags); @@ -327,22 +405,6 @@ retry: } static void -counter_sched_out(struct perf_counter *counter, - struct perf_cpu_context *cpuctx, - struct perf_counter_context *ctx) -{ - if (counter->state != PERF_COUNTER_STATE_ACTIVE) - return; - - counter->state = PERF_COUNTER_STATE_INACTIVE; - counter->hw_ops->disable(counter); - counter->oncpu = -1; - - cpuctx->active_oncpu--; - ctx->nr_active--; -} - -static void group_sched_out(struct perf_counter *group_counter, struct perf_cpu_context *cpuctx, struct perf_counter_context *ctx) @@ -359,6 +421,9 @@ group_sched_out(struct perf_counter *group_counter, */ list_for_each_entry(counter, &group_counter->sibling_list, list_entry) counter_sched_out(counter, cpuctx, ctx); + + if (group_counter->hw_event.exclusive) + cpuctx->exclusive = 0; } void __perf_counter_sched_out(struct perf_counter_context *ctx, @@ -455,30 +520,6 @@ group_error: return -EAGAIN; } -/* - * Return 1 for a software counter, 0 for a hardware counter - */ -static inline int is_software_counter(struct perf_counter *counter) -{ - return !counter->hw_event.raw && counter->hw_event.type < 0; -} - -/* - * Return 1 for a group consisting entirely of software counters, - * 0 if the group contains any hardware counters. - */ -static int is_software_only_group(struct perf_counter *leader) -{ - struct perf_counter *counter; - - if (!is_software_counter(leader)) - return 0; - list_for_each_entry(counter, &leader->sibling_list, list_entry) - if (!is_software_counter(counter)) - return 0; - return 1; -} - static void __perf_counter_sched_in(struct perf_counter_context *ctx, struct perf_cpu_context *cpuctx, int cpu) @@ -492,22 +533,49 @@ __perf_counter_sched_in(struct perf_counter_context *ctx, spin_lock(&ctx->lock); flags = hw_perf_save_disable(); + + /* + * First go through the list and put on any pinned groups + * in order to give them the best chance of going on. + */ + list_for_each_entry(counter, &ctx->counter_list, list_entry) { + if (counter->state <= PERF_COUNTER_STATE_OFF || + !counter->hw_event.pinned) + continue; + if (counter->cpu != -1 && counter->cpu != cpu) + continue; + + if (group_can_go_on(counter, cpuctx, 1)) + group_sched_in(counter, cpuctx, ctx, cpu); + + /* + * If this pinned group hasn't been scheduled, + * put it in error state. + */ + if (counter->state == PERF_COUNTER_STATE_INACTIVE) + counter->state = PERF_COUNTER_STATE_ERROR; + } + list_for_each_entry(counter, &ctx->counter_list, list_entry) { /* + * Ignore counters in OFF or ERROR state, and + * ignore pinned counters since we did them already. + */ + if (counter->state <= PERF_COUNTER_STATE_OFF || + counter->hw_event.pinned) + continue; + + /* * Listen to the 'cpu' scheduling filter constraint * of counters: */ if (counter->cpu != -1 && counter->cpu != cpu) continue; - /* - * If we scheduled in a group atomically and exclusively, - * or if this group can't go on, don't add any more - * hardware counters. - */ - if (can_add_hw || is_software_only_group(counter)) + if (group_can_go_on(counter, cpuctx, can_add_hw)) { if (group_sched_in(counter, cpuctx, ctx, cpu)) can_add_hw = 0; + } } hw_perf_restore(flags); spin_unlock(&ctx->lock); @@ -567,8 +635,10 @@ int perf_counter_task_disable(void) */ perf_flags = hw_perf_save_disable(); - list_for_each_entry(counter, &ctx->counter_list, list_entry) - counter->state = PERF_COUNTER_STATE_OFF; + list_for_each_entry(counter, &ctx->counter_list, list_entry) { + if (counter->state != PERF_COUNTER_STATE_ERROR) + counter->state = PERF_COUNTER_STATE_OFF; + } hw_perf_restore(perf_flags); @@ -607,7 +677,7 @@ int perf_counter_task_enable(void) perf_flags = hw_perf_save_disable(); list_for_each_entry(counter, &ctx->counter_list, list_entry) { - if (counter->state != PERF_COUNTER_STATE_OFF) + if (counter->state > PERF_COUNTER_STATE_OFF) continue; counter->state = PERF_COUNTER_STATE_INACTIVE; counter->hw_event.disabled = 0; @@ -849,6 +919,14 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) if (count != sizeof(cntval)) return -EINVAL; + /* + * Return end-of-file for a read on a counter that is in + * error state (i.e. because it was pinned but it couldn't be + * scheduled on to the CPU at some point). + */ + if (counter->state == PERF_COUNTER_STATE_ERROR) + return 0; + mutex_lock(&counter->mutex); cntval = perf_counter_read(counter); mutex_unlock(&counter->mutex); @@ -884,7 +962,7 @@ perf_read_irq_data(struct perf_counter *counter, { struct perf_data *irqdata, *usrdata; DECLARE_WAITQUEUE(wait, current); - ssize_t res; + ssize_t res, res2; irqdata = counter->irqdata; usrdata = counter->usrdata; @@ -905,6 +983,9 @@ perf_read_irq_data(struct perf_counter *counter, if (signal_pending(current)) break; + if (counter->state == PERF_COUNTER_STATE_ERROR) + break; + spin_unlock_irq(&counter->waitq.lock); schedule(); spin_lock_irq(&counter->waitq.lock); @@ -913,7 +994,8 @@ perf_read_irq_data(struct perf_counter *counter, __set_current_state(TASK_RUNNING); spin_unlock_irq(&counter->waitq.lock); - if (usrdata->len + irqdata->len < count) + if (usrdata->len + irqdata->len < count && + counter->state != PERF_COUNTER_STATE_ERROR) return -ERESTARTSYS; read_pending: mutex_lock(&counter->mutex); @@ -925,11 +1007,12 @@ read_pending: /* Switch irq buffer: */ usrdata = perf_switch_irq_data(counter); - if (perf_copy_usrdata(usrdata, buf + res, count - res) < 0) { + res2 = perf_copy_usrdata(usrdata, buf + res, count - res); + if (res2 < 0) { if (!res) res = -EFAULT; } else { - res = count; + res += res2; } out: mutex_unlock(&counter->mutex); @@ -1348,6 +1431,11 @@ sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user, */ if (group_leader->ctx != ctx) goto err_put_context; + /* + * Only a group leader can be exclusive or pinned + */ + if (hw_event.exclusive || hw_event.pinned) + goto err_put_context; } ret = -EINVAL; @@ -1473,13 +1561,7 @@ __perf_counter_exit_task(struct task_struct *child, cpuctx = &__get_cpu_var(perf_cpu_context); - if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) { - child_counter->state = PERF_COUNTER_STATE_INACTIVE; - child_counter->hw_ops->disable(child_counter); - cpuctx->active_oncpu--; - child_ctx->nr_active--; - child_counter->oncpu = -1; - } + counter_sched_out(child_counter, cpuctx, child_ctx); list_del_init(&child_counter->list_entry); -- cgit v0.10.2 From d859e29fe34cb833071b20aef860ee94fbad9bb2 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sat, 17 Jan 2009 18:10:22 +1100 Subject: perf_counter: Add counter enable/disable ioctls Impact: New perf_counter features This primarily adds a way for perf_counter users to enable and disable counters and groups. Enabling or disabling a counter or group also enables or disables all of the child counters that have been cloned from it to monitor children of the task monitored by the top-level counter. The userspace interface to enable/disable counters is via ioctl on the counter file descriptor. Along the way this extends the code that handles child counters to handle child counter groups properly. A group with multiple counters will be cloned to child tasks if and only if the group leader has the hw_event.inherit bit set - if it is set the whole group is cloned as a group in the child task. In order to be able to enable or disable all child counters of a given top-level counter, we need a way to find them all. Hence I have added a child_list field to struct perf_counter, which is the head of the list of children for a top-level counter, or the link in that list for a child counter. That list is protected by the perf_counter.mutex field. This also adds a mutex to the perf_counter_context struct. Previously the list of counters was protected just by the lock field in the context, which meant that perf_counter_init_task had to take that lock and then take whatever lock/mutex protects the top-level counter's child_list. But the counter enable/disable functions need to take that lock in order to traverse the list, then for each counter take the lock in that counter's context in order to change the counter's state safely, which will lead to a deadlock. To solve this, we now have both a mutex and a spinlock in the context, and taking either is sufficient to ensure the list of counters can't change - you have to take both before changing the list. Now perf_counter_init_task takes the mutex instead of the lock (which incidentally means that inherit_counter can use GFP_KERNEL instead of GFP_ATOMIC) and thus avoids the possible deadlock. Similarly the new enable/disable functions can take the mutex while traversing the list of child counters without incurring a possible deadlock when the counter manipulation code locks the context for a child counter. We also had an misfeature that the first counter added to a context would possibly not go on until the next sched-in, because we were using ctx->nr_active to detect if the context was running on a CPU. But nr_active is the number of active counters, and if that was zero (because the context didn't have any counters yet) it would look like the context wasn't running on a cpu and so the retry code in __perf_install_in_context wouldn't retry. So this adds an 'is_active' field that is set when the context is on a CPU, even if it has no counters. The is_active field is only used for task contexts, not for per-cpu contexts. If we enable a subsidiary counter in a group that is active on a CPU, and the arch code can't enable the counter, then we have to pull the whole group off the CPU. We do this with group_sched_out, which gets moved up in the file so it comes before all its callers. This also adds similar logic to __perf_install_in_context so that the "all on, or none" invariant of groups is preserved when adding a new counter to a group. Signed-off-by: Paul Mackerras diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 7ab8e5f..33ba9fe 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -14,6 +14,7 @@ #define _LINUX_PERF_COUNTER_H #include +#include #ifdef CONFIG_PERF_COUNTERS # include @@ -95,6 +96,12 @@ struct perf_counter_hw_event { }; /* + * Ioctls that can be done on a perf counter fd: + */ +#define PERF_COUNTER_IOC_ENABLE _IO('$', 0) +#define PERF_COUNTER_IOC_DISABLE _IO('$', 1) + +/* * Kernel-internal data types: */ @@ -173,8 +180,10 @@ struct perf_counter { struct file *filp; struct perf_counter *parent; + struct list_head child_list; + /* - * Protect attach/detach: + * Protect attach/detach and child_list: */ struct mutex mutex; @@ -199,13 +208,21 @@ struct perf_counter { struct perf_counter_context { #ifdef CONFIG_PERF_COUNTERS /* - * Protect the list of counters: + * Protect the states of the counters in the list, + * nr_active, and the list: */ spinlock_t lock; + /* + * Protect the list of counters. Locking either mutex or lock + * is sufficient to ensure the list doesn't change; to change + * the list you need to lock both the mutex and the spinlock. + */ + struct mutex mutex; struct list_head counter_list; int nr_counters; int nr_active; + int is_active; struct task_struct *task; #endif }; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index faf671b..1ac18da 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -112,6 +112,28 @@ counter_sched_out(struct perf_counter *counter, cpuctx->exclusive = 0; } +static void +group_sched_out(struct perf_counter *group_counter, + struct perf_cpu_context *cpuctx, + struct perf_counter_context *ctx) +{ + struct perf_counter *counter; + + if (group_counter->state != PERF_COUNTER_STATE_ACTIVE) + return; + + counter_sched_out(group_counter, cpuctx, ctx); + + /* + * Schedule out siblings (if any): + */ + list_for_each_entry(counter, &group_counter->sibling_list, list_entry) + counter_sched_out(counter, cpuctx, ctx); + + if (group_counter->hw_event.exclusive) + cpuctx->exclusive = 0; +} + /* * Cross CPU call to remove a performance counter * @@ -168,7 +190,7 @@ static void __perf_counter_remove_from_context(void *info) /* * Remove the counter from a task's (or a CPU's) list of counters. * - * Must be called with counter->mutex held. + * Must be called with counter->mutex and ctx->mutex held. * * CPU counters are removed with a smp call. For task counters we only * call when the task is on a CPU. @@ -215,6 +237,99 @@ retry: spin_unlock_irq(&ctx->lock); } +/* + * Cross CPU call to disable a performance counter + */ +static void __perf_counter_disable(void *info) +{ + struct perf_counter *counter = info; + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + struct perf_counter_context *ctx = counter->ctx; + unsigned long flags; + + /* + * If this is a per-task counter, need to check whether this + * counter's task is the current task on this cpu. + */ + if (ctx->task && cpuctx->task_ctx != ctx) + return; + + curr_rq_lock_irq_save(&flags); + spin_lock(&ctx->lock); + + /* + * If the counter is on, turn it off. + * If it is in error state, leave it in error state. + */ + if (counter->state >= PERF_COUNTER_STATE_INACTIVE) { + if (counter == counter->group_leader) + group_sched_out(counter, cpuctx, ctx); + else + counter_sched_out(counter, cpuctx, ctx); + counter->state = PERF_COUNTER_STATE_OFF; + } + + spin_unlock(&ctx->lock); + curr_rq_unlock_irq_restore(&flags); +} + +/* + * Disable a counter. + */ +static void perf_counter_disable(struct perf_counter *counter) +{ + struct perf_counter_context *ctx = counter->ctx; + struct task_struct *task = ctx->task; + + if (!task) { + /* + * Disable the counter on the cpu that it's on + */ + smp_call_function_single(counter->cpu, __perf_counter_disable, + counter, 1); + return; + } + + retry: + task_oncpu_function_call(task, __perf_counter_disable, counter); + + spin_lock_irq(&ctx->lock); + /* + * If the counter is still active, we need to retry the cross-call. + */ + if (counter->state == PERF_COUNTER_STATE_ACTIVE) { + spin_unlock_irq(&ctx->lock); + goto retry; + } + + /* + * Since we have the lock this context can't be scheduled + * in, so we can change the state safely. + */ + if (counter->state == PERF_COUNTER_STATE_INACTIVE) + counter->state = PERF_COUNTER_STATE_OFF; + + spin_unlock_irq(&ctx->lock); +} + +/* + * Disable a counter and all its children. + */ +static void perf_counter_disable_family(struct perf_counter *counter) +{ + struct perf_counter *child; + + perf_counter_disable(counter); + + /* + * Lock the mutex to protect the list of children + */ + mutex_lock(&counter->mutex); + list_for_each_entry(child, &counter->child_list, child_list) + perf_counter_disable(child); + mutex_unlock(&counter->mutex); +} + static int counter_sched_in(struct perf_counter *counter, struct perf_cpu_context *cpuctx, @@ -302,6 +417,7 @@ static void __perf_install_in_context(void *info) struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_counter *counter = info; struct perf_counter_context *ctx = counter->ctx; + struct perf_counter *leader = counter->group_leader; int cpu = smp_processor_id(); unsigned long flags; u64 perf_flags; @@ -328,22 +444,39 @@ static void __perf_install_in_context(void *info) ctx->nr_counters++; /* + * Don't put the counter on if it is disabled or if + * it is in a group and the group isn't on. + */ + if (counter->state != PERF_COUNTER_STATE_INACTIVE || + (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE)) + goto unlock; + + /* * An exclusive counter can't go on if there are already active * hardware counters, and no hardware counter can go on if there * is already an exclusive counter on. */ - if (counter->state == PERF_COUNTER_STATE_INACTIVE && - !group_can_go_on(counter, cpuctx, 1)) + if (!group_can_go_on(counter, cpuctx, 1)) err = -EEXIST; else err = counter_sched_in(counter, cpuctx, ctx, cpu); - if (err && counter->hw_event.pinned) - counter->state = PERF_COUNTER_STATE_ERROR; + if (err) { + /* + * This counter couldn't go on. If it is in a group + * then we have to pull the whole group off. + * If the counter group is pinned then put it in error state. + */ + if (leader != counter) + group_sched_out(leader, cpuctx, ctx); + if (leader->hw_event.pinned) + leader->state = PERF_COUNTER_STATE_ERROR; + } if (!err && !ctx->task && cpuctx->max_pertask) cpuctx->max_pertask--; + unlock: hw_perf_restore(perf_flags); spin_unlock(&ctx->lock); @@ -359,6 +492,8 @@ static void __perf_install_in_context(void *info) * If the counter is attached to a task which is on a CPU we use a smp * call to enable it in the task context. The task might have been * scheduled away, but we check this in the smp call again. + * + * Must be called with ctx->mutex held. */ static void perf_install_in_context(struct perf_counter_context *ctx, @@ -387,7 +522,7 @@ retry: /* * we need to retry the smp call. */ - if (ctx->nr_active && list_empty(&counter->list_entry)) { + if (ctx->is_active && list_empty(&counter->list_entry)) { spin_unlock_irq(&ctx->lock); goto retry; } @@ -404,26 +539,131 @@ retry: spin_unlock_irq(&ctx->lock); } -static void -group_sched_out(struct perf_counter *group_counter, - struct perf_cpu_context *cpuctx, - struct perf_counter_context *ctx) +/* + * Cross CPU call to enable a performance counter + */ +static void __perf_counter_enable(void *info) { - struct perf_counter *counter; + struct perf_counter *counter = info; + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + struct perf_counter_context *ctx = counter->ctx; + struct perf_counter *leader = counter->group_leader; + unsigned long flags; + int err; - if (group_counter->state != PERF_COUNTER_STATE_ACTIVE) + /* + * If this is a per-task counter, need to check whether this + * counter's task is the current task on this cpu. + */ + if (ctx->task && cpuctx->task_ctx != ctx) return; - counter_sched_out(group_counter, cpuctx, ctx); + curr_rq_lock_irq_save(&flags); + spin_lock(&ctx->lock); + + if (counter->state >= PERF_COUNTER_STATE_INACTIVE) + goto unlock; + counter->state = PERF_COUNTER_STATE_INACTIVE; /* - * Schedule out siblings (if any): + * If the counter is in a group and isn't the group leader, + * then don't put it on unless the group is on. */ - list_for_each_entry(counter, &group_counter->sibling_list, list_entry) - counter_sched_out(counter, cpuctx, ctx); + if (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE) + goto unlock; - if (group_counter->hw_event.exclusive) - cpuctx->exclusive = 0; + if (!group_can_go_on(counter, cpuctx, 1)) + err = -EEXIST; + else + err = counter_sched_in(counter, cpuctx, ctx, + smp_processor_id()); + + if (err) { + /* + * If this counter can't go on and it's part of a + * group, then the whole group has to come off. + */ + if (leader != counter) + group_sched_out(leader, cpuctx, ctx); + if (leader->hw_event.pinned) + leader->state = PERF_COUNTER_STATE_ERROR; + } + + unlock: + spin_unlock(&ctx->lock); + curr_rq_unlock_irq_restore(&flags); +} + +/* + * Enable a counter. + */ +static void perf_counter_enable(struct perf_counter *counter) +{ + struct perf_counter_context *ctx = counter->ctx; + struct task_struct *task = ctx->task; + + if (!task) { + /* + * Enable the counter on the cpu that it's on + */ + smp_call_function_single(counter->cpu, __perf_counter_enable, + counter, 1); + return; + } + + spin_lock_irq(&ctx->lock); + if (counter->state >= PERF_COUNTER_STATE_INACTIVE) + goto out; + + /* + * If the counter is in error state, clear that first. + * That way, if we see the counter in error state below, we + * know that it has gone back into error state, as distinct + * from the task having been scheduled away before the + * cross-call arrived. + */ + if (counter->state == PERF_COUNTER_STATE_ERROR) + counter->state = PERF_COUNTER_STATE_OFF; + + retry: + spin_unlock_irq(&ctx->lock); + task_oncpu_function_call(task, __perf_counter_enable, counter); + + spin_lock_irq(&ctx->lock); + + /* + * If the context is active and the counter is still off, + * we need to retry the cross-call. + */ + if (ctx->is_active && counter->state == PERF_COUNTER_STATE_OFF) + goto retry; + + /* + * Since we have the lock this context can't be scheduled + * in, so we can change the state safely. + */ + if (counter->state == PERF_COUNTER_STATE_OFF) + counter->state = PERF_COUNTER_STATE_INACTIVE; + out: + spin_unlock_irq(&ctx->lock); +} + +/* + * Enable a counter and all its children. + */ +static void perf_counter_enable_family(struct perf_counter *counter) +{ + struct perf_counter *child; + + perf_counter_enable(counter); + + /* + * Lock the mutex to protect the list of children + */ + mutex_lock(&counter->mutex); + list_for_each_entry(child, &counter->child_list, child_list) + perf_counter_enable(child); + mutex_unlock(&counter->mutex); } void __perf_counter_sched_out(struct perf_counter_context *ctx, @@ -432,16 +672,18 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx, struct perf_counter *counter; u64 flags; + spin_lock(&ctx->lock); + ctx->is_active = 0; if (likely(!ctx->nr_counters)) - return; + goto out; - spin_lock(&ctx->lock); flags = hw_perf_save_disable(); if (ctx->nr_active) { list_for_each_entry(counter, &ctx->counter_list, list_entry) group_sched_out(counter, cpuctx, ctx); } hw_perf_restore(flags); + out: spin_unlock(&ctx->lock); } @@ -528,10 +770,11 @@ __perf_counter_sched_in(struct perf_counter_context *ctx, u64 flags; int can_add_hw = 1; + spin_lock(&ctx->lock); + ctx->is_active = 1; if (likely(!ctx->nr_counters)) - return; + goto out; - spin_lock(&ctx->lock); flags = hw_perf_save_disable(); /* @@ -578,6 +821,7 @@ __perf_counter_sched_in(struct perf_counter_context *ctx, } } hw_perf_restore(flags); + out: spin_unlock(&ctx->lock); } @@ -896,12 +1140,14 @@ static int perf_release(struct inode *inode, struct file *file) file->private_data = NULL; + mutex_lock(&ctx->mutex); mutex_lock(&counter->mutex); perf_counter_remove_from_context(counter); put_context(ctx); mutex_unlock(&counter->mutex); + mutex_unlock(&ctx->mutex); kfree(counter); @@ -1053,10 +1299,30 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) return events; } +static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct perf_counter *counter = file->private_data; + int err = 0; + + switch (cmd) { + case PERF_COUNTER_IOC_ENABLE: + perf_counter_enable_family(counter); + break; + case PERF_COUNTER_IOC_DISABLE: + perf_counter_disable_family(counter); + break; + default: + err = -ENOTTY; + } + return err; +} + static const struct file_operations perf_fops = { .release = perf_release, .read = perf_read, .poll = perf_poll, + .unlocked_ioctl = perf_ioctl, + .compat_ioctl = perf_ioctl, }; static int cpu_clock_perf_counter_enable(struct perf_counter *counter) @@ -1348,6 +1614,8 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, INIT_LIST_HEAD(&counter->sibling_list); init_waitqueue_head(&counter->waitq); + INIT_LIST_HEAD(&counter->child_list); + counter->irqdata = &counter->data[0]; counter->usrdata = &counter->data[1]; counter->cpu = cpu; @@ -1452,7 +1720,9 @@ sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user, goto err_free_put_context; counter->filp = counter_file; + mutex_lock(&ctx->mutex); perf_install_in_context(ctx, counter, cpu); + mutex_unlock(&ctx->mutex); fput_light(counter_file, fput_needed2); @@ -1479,6 +1749,7 @@ __perf_counter_init_context(struct perf_counter_context *ctx, { memset(ctx, 0, sizeof(*ctx)); spin_lock_init(&ctx->lock); + mutex_init(&ctx->mutex); INIT_LIST_HEAD(&ctx->counter_list); ctx->task = task; } @@ -1486,20 +1757,30 @@ __perf_counter_init_context(struct perf_counter_context *ctx, /* * inherit a counter from parent task to child task: */ -static int +static struct perf_counter * inherit_counter(struct perf_counter *parent_counter, struct task_struct *parent, struct perf_counter_context *parent_ctx, struct task_struct *child, + struct perf_counter *group_leader, struct perf_counter_context *child_ctx) { struct perf_counter *child_counter; + /* + * Instead of creating recursive hierarchies of counters, + * we link inherited counters back to the original parent, + * which has a filp for sure, which we use as the reference + * count: + */ + if (parent_counter->parent) + parent_counter = parent_counter->parent; + child_counter = perf_counter_alloc(&parent_counter->hw_event, - parent_counter->cpu, NULL, - GFP_ATOMIC); + parent_counter->cpu, group_leader, + GFP_KERNEL); if (!child_counter) - return -ENOMEM; + return NULL; /* * Link it up in the child's context: @@ -1523,16 +1804,82 @@ inherit_counter(struct perf_counter *parent_counter, */ atomic_long_inc(&parent_counter->filp->f_count); + /* + * Link this into the parent counter's child list + */ + mutex_lock(&parent_counter->mutex); + list_add_tail(&child_counter->child_list, &parent_counter->child_list); + + /* + * Make the child state follow the state of the parent counter, + * not its hw_event.disabled bit. We hold the parent's mutex, + * so we won't race with perf_counter_{en,dis}able_family. + */ + if (parent_counter->state >= PERF_COUNTER_STATE_INACTIVE) + child_counter->state = PERF_COUNTER_STATE_INACTIVE; + else + child_counter->state = PERF_COUNTER_STATE_OFF; + + mutex_unlock(&parent_counter->mutex); + + return child_counter; +} + +static int inherit_group(struct perf_counter *parent_counter, + struct task_struct *parent, + struct perf_counter_context *parent_ctx, + struct task_struct *child, + struct perf_counter_context *child_ctx) +{ + struct perf_counter *leader; + struct perf_counter *sub; + + leader = inherit_counter(parent_counter, parent, parent_ctx, + child, NULL, child_ctx); + if (!leader) + return -ENOMEM; + list_for_each_entry(sub, &parent_counter->sibling_list, list_entry) { + if (!inherit_counter(sub, parent, parent_ctx, + child, leader, child_ctx)) + return -ENOMEM; + } return 0; } +static void sync_child_counter(struct perf_counter *child_counter, + struct perf_counter *parent_counter) +{ + u64 parent_val, child_val; + + parent_val = atomic64_read(&parent_counter->count); + child_val = atomic64_read(&child_counter->count); + + /* + * Add back the child's count to the parent's count: + */ + atomic64_add(child_val, &parent_counter->count); + + /* + * Remove this counter from the parent's list + */ + mutex_lock(&parent_counter->mutex); + list_del_init(&child_counter->child_list); + mutex_unlock(&parent_counter->mutex); + + /* + * Release the parent counter, if this was the last + * reference to it. + */ + fput(parent_counter->filp); +} + static void __perf_counter_exit_task(struct task_struct *child, struct perf_counter *child_counter, struct perf_counter_context *child_ctx) { struct perf_counter *parent_counter; - u64 parent_val, child_val; + struct perf_counter *sub, *tmp; /* * If we do not self-reap then we have to wait for the @@ -1561,7 +1908,7 @@ __perf_counter_exit_task(struct task_struct *child, cpuctx = &__get_cpu_var(perf_cpu_context); - counter_sched_out(child_counter, cpuctx, child_ctx); + group_sched_out(child_counter, cpuctx, child_ctx); list_del_init(&child_counter->list_entry); @@ -1577,26 +1924,23 @@ __perf_counter_exit_task(struct task_struct *child, * that are still around due to the child reference. These * counters need to be zapped - but otherwise linger. */ - if (!parent_counter) - return; - - parent_val = atomic64_read(&parent_counter->count); - child_val = atomic64_read(&child_counter->count); - - /* - * Add back the child's count to the parent's count: - */ - atomic64_add(child_val, &parent_counter->count); - - fput(parent_counter->filp); + if (parent_counter) { + sync_child_counter(child_counter, parent_counter); + list_for_each_entry_safe(sub, tmp, &child_counter->sibling_list, + list_entry) { + if (sub->parent) + sync_child_counter(sub, sub->parent); + kfree(sub); + } + } kfree(child_counter); } /* - * When a child task exist, feed back counter values to parent counters. + * When a child task exits, feed back counter values to parent counters. * - * Note: we are running in child context, but the PID is not hashed + * Note: we may be running in child context, but the PID is not hashed * anymore so new counters will not be added. */ void perf_counter_exit_task(struct task_struct *child) @@ -1620,9 +1964,8 @@ void perf_counter_exit_task(struct task_struct *child) void perf_counter_init_task(struct task_struct *child) { struct perf_counter_context *child_ctx, *parent_ctx; - struct perf_counter *counter, *parent_counter; + struct perf_counter *counter; struct task_struct *parent = current; - unsigned long flags; child_ctx = &child->perf_counter_ctx; parent_ctx = &parent->perf_counter_ctx; @@ -1641,32 +1984,22 @@ void perf_counter_init_task(struct task_struct *child) * Lock the parent list. No need to lock the child - not PID * hashed yet and not running, so nobody can access it. */ - spin_lock_irqsave(&parent_ctx->lock, flags); + mutex_lock(&parent_ctx->mutex); /* * We dont have to disable NMIs - we are only looking at * the list, not manipulating it: */ list_for_each_entry(counter, &parent_ctx->counter_list, list_entry) { - if (!counter->hw_event.inherit || counter->group_leader != counter) + if (!counter->hw_event.inherit) continue; - /* - * Instead of creating recursive hierarchies of counters, - * we link inheritd counters back to the original parent, - * which has a filp for sure, which we use as the reference - * count: - */ - parent_counter = counter; - if (counter->parent) - parent_counter = counter->parent; - - if (inherit_counter(parent_counter, parent, + if (inherit_group(counter, parent, parent_ctx, child, child_ctx)) break; } - spin_unlock_irqrestore(&parent_ctx->lock, flags); + mutex_unlock(&parent_ctx->mutex); } static void __cpuinit perf_counter_init_cpu(int cpu) @@ -1692,11 +2025,15 @@ static void __perf_counter_exit_cpu(void *info) list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry) __perf_counter_remove_from_context(counter); - } static void perf_counter_exit_cpu(int cpu) { + struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); + struct perf_counter_context *ctx = &cpuctx->ctx; + + mutex_lock(&ctx->mutex); smp_call_function_single(cpu, __perf_counter_exit_cpu, NULL, 1); + mutex_unlock(&ctx->mutex); } #else static inline void perf_counter_exit_cpu(int cpu) { } -- cgit v0.10.2 From 1b023a96d9b44f50f4d8ff28c15f5b80e354760f Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Fri, 23 Jan 2009 10:13:01 +0100 Subject: perfcounters: throttle on too high IRQ rates Starting kerneltop with only -c 100 seems to be a bad idea, it can easily lock the system due to perfcounter IRQ overload. So add throttling: if a new IRQ arrives in a shorter than PERFMON_MIN_PERIOD_NS time, turn off perfcounters and untrottle them from the next timer tick. Signed-off-by: Mike Galbraith Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 7b434e5..849c230 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -781,6 +781,8 @@ static void local_apic_timer_interrupt(void) inc_irq_stat(apic_timer_irqs); evt->event_handler(evt); + + perf_counter_unthrottle(); } /* diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 9376771..1a040b1 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -33,6 +33,9 @@ static int nr_counters_fixed __read_mostly; struct cpu_hw_counters { struct perf_counter *counters[X86_PMC_IDX_MAX]; unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + u64 last_interrupt; + u64 global_enable; + int throttled; }; /* @@ -474,16 +477,19 @@ perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown) static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi) { int bit, cpu = smp_processor_id(); - u64 ack, status, saved_global; - struct cpu_hw_counters *cpuc; + u64 ack, status, now; + struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu); - rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, saved_global); + rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable); /* Disable counters globally */ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); ack_APIC_irq(); - cpuc = &per_cpu(cpu_hw_counters, cpu); + now = sched_clock(); + if (now - cpuc->last_interrupt < PERFMON_MIN_PERIOD_NS) + cpuc->throttled = 1; + cpuc->last_interrupt = now; rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); if (!status) @@ -533,9 +539,29 @@ again: goto again; out: /* - * Restore - do not reenable when global enable is off: + * Restore - do not reenable when global enable is off or throttled: */ - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, saved_global); + if (!cpuc->throttled) + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable); +} + +void perf_counter_unthrottle(void) +{ + struct cpu_hw_counters *cpuc; + + if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) + return; + + if (unlikely(!perf_counters_initialized)) + return; + + cpuc = &per_cpu(cpu_hw_counters, smp_processor_id()); + if (cpuc->throttled) { + if (printk_ratelimit()) + printk(KERN_WARNING "PERFMON: max event frequency exceeded!\n"); + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable); + cpuc->throttled = 0; + } } void smp_perf_counter_interrupt(struct pt_regs *regs) diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 33ba9fe..91f1ca4 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -254,6 +254,7 @@ extern void perf_counter_init_task(struct task_struct *child); extern void perf_counter_exit_task(struct task_struct *child); extern void perf_counter_notify(struct pt_regs *regs); extern void perf_counter_print_debug(void); +extern void perf_counter_unthrottle(void); extern u64 hw_perf_save_disable(void); extern void hw_perf_restore(u64 ctrl); extern int perf_counter_task_disable(void); @@ -270,6 +271,8 @@ static inline int is_software_counter(struct perf_counter *counter) return !counter->hw_event.raw && counter->hw_event.type < 0; } +#define PERFMON_MIN_PERIOD_NS 10000 + #else static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } @@ -281,6 +284,7 @@ static inline void perf_counter_init_task(struct task_struct *child) { } static inline void perf_counter_exit_task(struct task_struct *child) { } static inline void perf_counter_notify(struct pt_regs *regs) { } static inline void perf_counter_print_debug(void) { } +static inline void perf_counter_unthrottle(void) { } static inline void hw_perf_restore(u64 ctrl) { } static inline u64 hw_perf_save_disable(void) { return 0; } static inline int perf_counter_task_disable(void) { return -EINVAL; } -- cgit v0.10.2 From 4b39fd96855254a244f71245b41a91cdecb87d63 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Fri, 23 Jan 2009 14:36:16 +0100 Subject: perfcounters: ratelimit performance counter interrupts Ratelimit performance counter interrupts to 100KHz per CPU. This replaces the irq-delta-time based method. Signed-off-by: Mike Galbraith Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 1a040b1..a56d4cf 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -33,9 +33,8 @@ static int nr_counters_fixed __read_mostly; struct cpu_hw_counters { struct perf_counter *counters[X86_PMC_IDX_MAX]; unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; - u64 last_interrupt; + unsigned long interrupts; u64 global_enable; - int throttled; }; /* @@ -471,13 +470,18 @@ perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown) } /* + * Maximum interrupt frequency of 100KHz per CPU + */ +#define PERFMON_MAX_INTERRUPTS 100000/HZ + +/* * This handler is triggered by the local APIC, so the APIC IRQ handling * rules apply: */ static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi) { int bit, cpu = smp_processor_id(); - u64 ack, status, now; + u64 ack, status; struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu); rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable); @@ -486,11 +490,6 @@ static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi) wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); ack_APIC_irq(); - now = sched_clock(); - if (now - cpuc->last_interrupt < PERFMON_MIN_PERIOD_NS) - cpuc->throttled = 1; - cpuc->last_interrupt = now; - rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); if (!status) goto out; @@ -541,13 +540,14 @@ out: /* * Restore - do not reenable when global enable is off or throttled: */ - if (!cpuc->throttled) + if (++cpuc->interrupts < PERFMON_MAX_INTERRUPTS) wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable); } void perf_counter_unthrottle(void) { struct cpu_hw_counters *cpuc; + u64 global_enable; if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) return; @@ -556,12 +556,15 @@ void perf_counter_unthrottle(void) return; cpuc = &per_cpu(cpu_hw_counters, smp_processor_id()); - if (cpuc->throttled) { + if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS) { if (printk_ratelimit()) - printk(KERN_WARNING "PERFMON: max event frequency exceeded!\n"); + printk(KERN_WARNING "PERFMON: max interrupts exceeded!\n"); wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable); - cpuc->throttled = 0; } + rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, global_enable); + if (unlikely(cpuc->global_enable && !global_enable)) + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable); + cpuc->interrupts = 0; } void smp_perf_counter_interrupt(struct pt_regs *regs) diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 91f1ca4..f55381f 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -271,8 +271,6 @@ static inline int is_software_counter(struct perf_counter *counter) return !counter->hw_event.raw && counter->hw_event.type < 0; } -#define PERFMON_MIN_PERIOD_NS 10000 - #else static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } -- cgit v0.10.2 From 3415dd9146c574bffe8f012c096bfc2bc62b9508 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Fri, 23 Jan 2009 14:16:53 +0100 Subject: perfcounters fix section mismatch warning in perf_counter.c::perf_counters_lapic_init() Fix: WARNING: arch/x86/kernel/built-in.o(.text+0xdd0f): Section mismatch in reference from the function pmc_generic_enable() to the function .cpuinit.text:perf_counters_lapic_init() The function pmc_generic_enable() references the function __cpuinit perf_counters_lapic_init(). This is often because pmc_generic_enable lacks a __cpuinit annotation or the annotation of perf_counters_lapic_init is wrong. Signed-off-by: Mike Galbraith Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index a56d4cf..46c436c 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -605,7 +605,7 @@ void perf_counter_notify(struct pt_regs *regs) local_irq_restore(flags); } -void __cpuinit perf_counters_lapic_init(int nmi) +void perf_counters_lapic_init(int nmi) { u32 apic_val; -- cgit v0.10.2 From bb3f0b59ad005d2d2ecbbe9bd048eab6d1ecbd31 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 25 Jan 2009 02:38:09 -0800 Subject: x86: make irqinit_32.c more like irqinit_64.c, v2 Impact: cleanup 1. add smp_intr_init and apic_intr_init for 32bit, the same as 64bit 2. move the apic_intr_init() call before set gate with interrupt[i] 3. for 64bit, if ia32_emulation is not used, will make per_cpu to use 0x80 vector. [ v2: should use !test_bit() instead of test_bit() with 32bit ] Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index c56496f..ddf3eb7 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -120,28 +120,8 @@ int vector_used_by_percpu_irq(unsigned int vector) return 0; } -/* Overridden in paravirt.c */ -void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); - -void __init native_init_IRQ(void) +static void __init smp_intr_init(void) { - int i; - - /* all the set up before the call gates are initialised */ - pre_intr_init_hook(); - - /* - * Cover the whole vector space, no vector can escape - * us. (some of these will be overridden and become - * 'special' SMP interrupts) - */ - for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { - /* SYSCALL_VECTOR was reserved in trap_init. */ - if (i != SYSCALL_VECTOR) - set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); - } - - #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP) /* * The reschedule interrupt is a CPU-to-CPU reschedule-helper @@ -170,8 +150,13 @@ void __init native_init_IRQ(void) set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); #endif +} +static void __init apic_intr_init(void) +{ #ifdef CONFIG_X86_LOCAL_APIC + smp_intr_init(); + /* self generated IPI for local APIC timer */ alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); @@ -181,12 +166,37 @@ void __init native_init_IRQ(void) # ifdef CONFIG_PERF_COUNTERS alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt); # endif -#endif -#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL) +# ifdef CONFIG_X86_MCE_P4THERMAL /* thermal monitor LVT interrupt */ alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); +# endif #endif +} + +/* Overridden in paravirt.c */ +void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); + +void __init native_init_IRQ(void) +{ + int i; + + /* all the set up before the call gates are initialised */ + pre_intr_init_hook(); + + apic_intr_init(); + + /* + * Cover the whole vector space, no vector can escape + * us. (some of these will be overridden and become + * 'special' SMP interrupts) + */ + for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { + int vector = FIRST_EXTERNAL_VECTOR + i; + /* SYSCALL_VECTOR was reserved in trap_init. */ + if (!test_bit(vector, used_vectors)) + set_intr_gate(vector, interrupt[i]); + } if (!acpi_ioapic) setup_irq(2, &irq2); diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index 6a71bfc..16e1fc6 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c @@ -162,6 +162,9 @@ void __init native_init_IRQ(void) int i; init_ISA_irqs(); + + apic_intr_init(); + /* * Cover the whole vector space, no vector can escape * us. (some of these will be overridden and become @@ -169,12 +172,10 @@ void __init native_init_IRQ(void) */ for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { int vector = FIRST_EXTERNAL_VECTOR + i; - if (vector != IA32_SYSCALL_VECTOR) + if (!test_bit(vector, used_vectors)) set_intr_gate(vector, interrupt[i]); } - apic_intr_init(); - if (!acpi_ioapic) setup_irq(2, &irq2); } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index ed5aee5..d36a502 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -979,8 +979,13 @@ void __init trap_init(void) #endif set_intr_gate(19, &simd_coprocessor_error); + /* Reserve all the builtin and the syscall vector: */ + for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) + set_bit(i, used_vectors); + #ifdef CONFIG_IA32_EMULATION set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall); + set_bit(IA32_SYSCALL_VECTOR, used_vectors); #endif #ifdef CONFIG_X86_32 @@ -997,17 +1002,9 @@ void __init trap_init(void) } set_system_trap_gate(SYSCALL_VECTOR, &system_call); -#endif - - /* Reserve all the builtin and the syscall vector: */ - for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) - set_bit(i, used_vectors); - -#ifdef CONFIG_X86_64 - set_bit(IA32_SYSCALL_VECTOR, used_vectors); -#else set_bit(SYSCALL_VECTOR, used_vectors); #endif + /* * Should be a barrier for any external CPU state: */ -- cgit v0.10.2 From 65d370862f64973611a271ced61864b5f9bb6fc0 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Thu, 29 Jan 2009 14:06:52 +0100 Subject: perfcounters: fix refcounting bug don't kfree in use counters. Running... while true; do perfstat -e 1 -c true; done ...on all cores for a while doesn't seem to be eating ram, and my oops is gone. Signed-off-by: Mike Galbraith Signed-off-by: Ingo Molnar diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 1ac18da..f27a7e9 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1934,7 +1934,8 @@ __perf_counter_exit_task(struct task_struct *child, } } - kfree(child_counter); + if (!child_counter->filp || !atomic_long_read(&child_counter->filp->f_count)) + kfree(child_counter); } /* -- cgit v0.10.2 From 15081c61362618a0c81cc8d04e45e7427bc1ed71 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sun, 1 Feb 2009 22:07:39 +0530 Subject: x86: irqinit_32.c fix compilation warning Fix: arch/x86/kernel/irqinit_32.c:124: warning: 'smp_intr_init' defined but not used Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index ddf3eb7..520e6c1 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -154,9 +154,9 @@ static void __init smp_intr_init(void) static void __init apic_intr_init(void) { -#ifdef CONFIG_X86_LOCAL_APIC smp_intr_init(); +#ifdef CONFIG_X86_LOCAL_APIC /* self generated IPI for local APIC timer */ alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); -- cgit v0.10.2 From 5b75af0a02fcf3b8899f38ff6f22164c5d8e2fdd Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Wed, 4 Feb 2009 17:11:34 +0100 Subject: perfcounters: fix "perf counters kill oprofile" bug With oprofile as a module, and unloaded by profiling script, both oprofile and kerneltop work fine.. unless you leave kerneltop running when you start profiling, then you may see badness. Signed-off-by: Mike Galbraith Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 46c436c..8bb2133 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -643,7 +643,9 @@ perf_counter_nmi_handler(struct notifier_block *self, } static __read_mostly struct notifier_block perf_counter_nmi_notifier = { - .notifier_call = perf_counter_nmi_handler + .notifier_call = perf_counter_nmi_handler, + .next = NULL, + .priority = 1 }; void __init init_hw_perf_counters(void) diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 202864a..c638685 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -40,8 +40,9 @@ static int profile_exceptions_notify(struct notifier_block *self, switch (val) { case DIE_NMI: - if (model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu))) - ret = NOTIFY_STOP; + case DIE_NMI_IPI: + model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu)); + ret = NOTIFY_STOP; break; default: break; @@ -134,7 +135,7 @@ static void nmi_cpu_setup(void *dummy) static struct notifier_block profile_exceptions_nb = { .notifier_call = profile_exceptions_notify, .next = NULL, - .priority = 0 + .priority = 2 }; static int nmi_setup(void) -- cgit v0.10.2 From 82aa9a1829199233f9bdaf26e2ee271114f4701e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 5 Feb 2009 15:23:08 +0100 Subject: perfcounters: fix "perf counters kills oprofile" bug, v2 Impact: fix kernel crash Both oprofile and perfcounters register an NMI die handler, but only one can handle the NMI. Conveniently, oprofile unregisters it's notifier when not actively in use, so setting it's notifier priority higher than perfcounter's allows oprofile to borrow the NMI for the duration of it's run. Tested/works both as module and built-in. While testing, I found that if kerneltop was generating NMIs at very high frequency, the kernel may panic when oprofile registered it's handler. This turned out to be because oprofile registers it's handler before reset_value has been allocated, so if an NMI comes in while it's still setting up, kabOom. Rather than try more invasive changes, I followed the lead of other places in op_model_ppro.c, and simply returned in that highly unlikely event. (debug warnings attached) Signed-off-by: Mike Galbraith Signed-off-by: Ingo Molnar diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 07c9145..85eb626 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -126,6 +126,13 @@ static int ppro_check_ctrs(struct pt_regs * const regs, u64 val; int i; + /* + * This can happen if perf counters are in use when + * we steal the die notifier NMI. + */ + if (unlikely(!reset_value)) + goto out; + for (i = 0 ; i < num_counters; ++i) { if (!reset_value[i]) continue; @@ -136,6 +143,7 @@ static int ppro_check_ctrs(struct pt_regs * const regs, } } +out: /* Only P6 based Pentium M need to re-unmask the apic vector but it * doesn't hurt other P6 variant */ apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); -- cgit v0.10.2 From 23a185ca8abbeef64b6ffc33059b1d630e43ec10 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 9 Feb 2009 22:42:47 +1100 Subject: perf_counters: make software counters work as per-cpu counters Impact: kernel crash fix Yanmin Zhang reported that using a PERF_COUNT_TASK_CLOCK software counter as a per-cpu counter would reliably crash the system, because it calls __task_delta_exec with a null pointer. The page fault, context switch and cpu migration counters also won't function correctly as per-cpu counters since they reference the current task. This fixes the problem by redirecting the task_clock counter to the cpu_clock counter when used as a per-cpu counter, and by implementing per-cpu page fault, context switch and cpu migration counters. Along the way, this: - Initializes counter->ctx earlier, in perf_counter_alloc, so that sw_perf_counter_init can use it - Adds code to kernel/sched.c to count task migrations into each cpu, in rq->nr_migrations_in - Exports the per-cpu context switch and task migration counts via new functions added to kernel/sched.c - Makes sure that if sw_perf_counter_init fails, we don't try to initialize the counter as a hardware counter. Since the user has passed a negative, non-raw event type, they clearly don't intend for it to be interpreted as a hardware event. Reported-by: "Zhang Yanmin" Signed-off-by: Paul Mackerras Signed-off-by: Ingo Molnar diff --git a/include/linux/sched.h b/include/linux/sched.h index b85b10a..1e5f700 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -137,6 +137,8 @@ extern unsigned long nr_running(void); extern unsigned long nr_uninterruptible(void); extern unsigned long nr_active(void); extern unsigned long nr_iowait(void); +extern u64 cpu_nr_switches(int cpu); +extern u64 cpu_nr_migrations(int cpu); struct seq_file; struct cfs_rq; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index f27a7e9..544193c 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -20,6 +20,8 @@ #include #include #include +#include +#include /* * Each CPU has a list of per CPU counters: @@ -502,7 +504,6 @@ perf_install_in_context(struct perf_counter_context *ctx, { struct task_struct *task = ctx->task; - counter->ctx = ctx; if (!task) { /* * Per cpu counters are installed via an smp call and @@ -1417,11 +1418,19 @@ static const struct hw_perf_counter_ops perf_ops_task_clock = { .read = task_clock_perf_counter_read, }; -static u64 get_page_faults(void) +#ifdef CONFIG_VM_EVENT_COUNTERS +#define cpu_page_faults() __get_cpu_var(vm_event_states).event[PGFAULT] +#else +#define cpu_page_faults() 0 +#endif + +static u64 get_page_faults(struct perf_counter *counter) { - struct task_struct *curr = current; + struct task_struct *curr = counter->ctx->task; - return curr->maj_flt + curr->min_flt; + if (curr) + return curr->maj_flt + curr->min_flt; + return cpu_page_faults(); } static void page_faults_perf_counter_update(struct perf_counter *counter) @@ -1430,7 +1439,7 @@ static void page_faults_perf_counter_update(struct perf_counter *counter) s64 delta; prev = atomic64_read(&counter->hw.prev_count); - now = get_page_faults(); + now = get_page_faults(counter); atomic64_set(&counter->hw.prev_count, now); @@ -1446,11 +1455,7 @@ static void page_faults_perf_counter_read(struct perf_counter *counter) static int page_faults_perf_counter_enable(struct perf_counter *counter) { - /* - * page-faults is a per-task value already, - * so we dont have to clear it on switch-in. - */ - + atomic64_set(&counter->hw.prev_count, get_page_faults(counter)); return 0; } @@ -1465,11 +1470,13 @@ static const struct hw_perf_counter_ops perf_ops_page_faults = { .read = page_faults_perf_counter_read, }; -static u64 get_context_switches(void) +static u64 get_context_switches(struct perf_counter *counter) { - struct task_struct *curr = current; + struct task_struct *curr = counter->ctx->task; - return curr->nvcsw + curr->nivcsw; + if (curr) + return curr->nvcsw + curr->nivcsw; + return cpu_nr_switches(smp_processor_id()); } static void context_switches_perf_counter_update(struct perf_counter *counter) @@ -1478,7 +1485,7 @@ static void context_switches_perf_counter_update(struct perf_counter *counter) s64 delta; prev = atomic64_read(&counter->hw.prev_count); - now = get_context_switches(); + now = get_context_switches(counter); atomic64_set(&counter->hw.prev_count, now); @@ -1494,11 +1501,7 @@ static void context_switches_perf_counter_read(struct perf_counter *counter) static int context_switches_perf_counter_enable(struct perf_counter *counter) { - /* - * ->nvcsw + curr->nivcsw is a per-task value already, - * so we dont have to clear it on switch-in. - */ - + atomic64_set(&counter->hw.prev_count, get_context_switches(counter)); return 0; } @@ -1513,9 +1516,13 @@ static const struct hw_perf_counter_ops perf_ops_context_switches = { .read = context_switches_perf_counter_read, }; -static inline u64 get_cpu_migrations(void) +static inline u64 get_cpu_migrations(struct perf_counter *counter) { - return current->se.nr_migrations; + struct task_struct *curr = counter->ctx->task; + + if (curr) + return curr->se.nr_migrations; + return cpu_nr_migrations(smp_processor_id()); } static void cpu_migrations_perf_counter_update(struct perf_counter *counter) @@ -1524,7 +1531,7 @@ static void cpu_migrations_perf_counter_update(struct perf_counter *counter) s64 delta; prev = atomic64_read(&counter->hw.prev_count); - now = get_cpu_migrations(); + now = get_cpu_migrations(counter); atomic64_set(&counter->hw.prev_count, now); @@ -1540,11 +1547,7 @@ static void cpu_migrations_perf_counter_read(struct perf_counter *counter) static int cpu_migrations_perf_counter_enable(struct perf_counter *counter) { - /* - * se.nr_migrations is a per-task value already, - * so we dont have to clear it on switch-in. - */ - + atomic64_set(&counter->hw.prev_count, get_cpu_migrations(counter)); return 0; } @@ -1569,7 +1572,14 @@ sw_perf_counter_init(struct perf_counter *counter) hw_ops = &perf_ops_cpu_clock; break; case PERF_COUNT_TASK_CLOCK: - hw_ops = &perf_ops_task_clock; + /* + * If the user instantiates this as a per-cpu counter, + * use the cpu_clock counter instead. + */ + if (counter->ctx->task) + hw_ops = &perf_ops_task_clock; + else + hw_ops = &perf_ops_cpu_clock; break; case PERF_COUNT_PAGE_FAULTS: hw_ops = &perf_ops_page_faults; @@ -1592,6 +1602,7 @@ sw_perf_counter_init(struct perf_counter *counter) static struct perf_counter * perf_counter_alloc(struct perf_counter_hw_event *hw_event, int cpu, + struct perf_counter_context *ctx, struct perf_counter *group_leader, gfp_t gfpflags) { @@ -1623,6 +1634,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, counter->wakeup_pending = 0; counter->group_leader = group_leader; counter->hw_ops = NULL; + counter->ctx = ctx; counter->state = PERF_COUNTER_STATE_INACTIVE; if (hw_event->disabled) @@ -1631,7 +1643,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, hw_ops = NULL; if (!hw_event->raw && hw_event->type < 0) hw_ops = sw_perf_counter_init(counter); - if (!hw_ops) + else hw_ops = hw_perf_counter_init(counter); if (!hw_ops) { @@ -1707,7 +1719,8 @@ sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user, } ret = -EINVAL; - counter = perf_counter_alloc(&hw_event, cpu, group_leader, GFP_KERNEL); + counter = perf_counter_alloc(&hw_event, cpu, ctx, group_leader, + GFP_KERNEL); if (!counter) goto err_put_context; @@ -1777,15 +1790,14 @@ inherit_counter(struct perf_counter *parent_counter, parent_counter = parent_counter->parent; child_counter = perf_counter_alloc(&parent_counter->hw_event, - parent_counter->cpu, group_leader, - GFP_KERNEL); + parent_counter->cpu, child_ctx, + group_leader, GFP_KERNEL); if (!child_counter) return NULL; /* * Link it up in the child's context: */ - child_counter->ctx = child_ctx; child_counter->task = child; list_add_counter(child_counter, child_ctx); child_ctx->nr_counters++; diff --git a/kernel/sched.c b/kernel/sched.c index 8db1a4c..173768f 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -558,6 +558,7 @@ struct rq { struct load_weight load; unsigned long nr_load_updates; u64 nr_switches; + u64 nr_migrations_in; struct cfs_rq cfs; struct rt_rq rt; @@ -1908,6 +1909,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) #endif if (old_cpu != new_cpu) { p->se.nr_migrations++; + new_rq->nr_migrations_in++; #ifdef CONFIG_SCHEDSTATS if (task_hot(p, old_rq->clock, NULL)) schedstat_inc(p, se.nr_forced2_migrations); @@ -2811,6 +2813,21 @@ unsigned long nr_active(void) } /* + * Externally visible per-cpu scheduler statistics: + * cpu_nr_switches(cpu) - number of context switches on that cpu + * cpu_nr_migrations(cpu) - number of migrations into that cpu + */ +u64 cpu_nr_switches(int cpu) +{ + return cpu_rq(cpu)->nr_switches; +} + +u64 cpu_nr_migrations(int cpu) +{ + return cpu_rq(cpu)->nr_migrations_in; +} + +/* * Update rq->cpu_load[] statistics. This function is usually called every * scheduler tick (TICK_NSEC). */ -- cgit v0.10.2 From d278c48435625cb6b7edcf6a547620768b175709 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Mon, 9 Feb 2009 07:38:50 +0100 Subject: perf_counters: account NMI interrupts I noticed that kerneltop interrupts were accounted as NMI, but not their perf counter origin. Account NMI performance counter interrupts. Signed-off-by: Mike Galbraith Signed-off-by: Ingo Molnar arch/x86/kernel/cpu/perf_counter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 8bb2133..9901e46 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -495,6 +495,7 @@ static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi) goto out; again: + inc_irq_stat(apic_perf_irqs); ack = status; for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { struct perf_counter *counter = cpuc->counters[bit]; @@ -570,7 +571,6 @@ void perf_counter_unthrottle(void) void smp_perf_counter_interrupt(struct pt_regs *regs) { irq_enter(); - inc_irq_stat(apic_perf_irqs); apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR); __smp_perf_counter_interrupt(regs, 0); -- cgit v0.10.2 From 0475f9ea8e2cc030298908949e0d5da9f2fc2cfe Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 11 Feb 2009 14:35:35 +1100 Subject: perf_counters: allow users to count user, kernel and/or hypervisor events Impact: new perf_counter feature This extends the perf_counter_hw_event struct with bits that specify that events in user, kernel and/or hypervisor mode should not be counted (i.e. should be excluded), and adds code to program the PMU mode selection bits accordingly on x86 and powerpc. For software counters, we don't currently have the infrastructure to distinguish which mode an event occurs in, so we currently fail the counter initialization if the setting of the hw_event.exclude_* bits would require us to distinguish. Context switches and CPU migrations are currently considered to occur in kernel mode. On x86, this changes the previous policy that only root can count kernel events. Now non-root users can count kernel events or exclude them. Non-root users still can't use NMI events, though. On x86 we don't appear to have any way to control whether hypervisor events are counted or not, so hw_event.exclude_hv is ignored. On powerpc, the selection of whether to count events in user, kernel and/or hypervisor mode is PMU-wide, not per-counter, so this adds a check that the hw_event.exclude_* settings are the same as other events on the PMU. Counters being added to a group have to have the same settings as the other hardware counters in the group. Counters and groups can only be enabled in hw_perf_group_sched_in or power_perf_enable if they have the same settings as any other counters already on the PMU. If we are not running on a hypervisor, the exclude_hv setting is ignored (by forcing it to 0) since we can't ever get any hypervisor events. Signed-off-by: Paul Mackerras diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 5b02113..bd6ba85 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -16,6 +16,7 @@ #include #include #include +#include struct cpu_hw_counters { int n_counters; @@ -214,6 +215,36 @@ static int power_check_constraints(unsigned int event[], int n_ev) return 0; } +/* + * Check if newly-added counters have consistent settings for + * exclude_{user,kernel,hv} with each other and any previously + * added counters. + */ +static int check_excludes(struct perf_counter **ctrs, int n_prev, int n_new) +{ + int eu, ek, eh; + int i, n; + struct perf_counter *counter; + + n = n_prev + n_new; + if (n <= 1) + return 0; + + eu = ctrs[0]->hw_event.exclude_user; + ek = ctrs[0]->hw_event.exclude_kernel; + eh = ctrs[0]->hw_event.exclude_hv; + if (n_prev == 0) + n_prev = 1; + for (i = n_prev; i < n; ++i) { + counter = ctrs[i]; + if (counter->hw_event.exclude_user != eu || + counter->hw_event.exclude_kernel != ek || + counter->hw_event.exclude_hv != eh) + return -EAGAIN; + } + return 0; +} + static void power_perf_read(struct perf_counter *counter) { long val, delta, prev; @@ -324,6 +355,20 @@ void hw_perf_restore(u64 disable) } /* + * Add in MMCR0 freeze bits corresponding to the + * hw_event.exclude_* bits for the first counter. + * We have already checked that all counters have the + * same values for these bits as the first counter. + */ + counter = cpuhw->counter[0]; + if (counter->hw_event.exclude_user) + cpuhw->mmcr[0] |= MMCR0_FCP; + if (counter->hw_event.exclude_kernel) + cpuhw->mmcr[0] |= MMCR0_FCS; + if (counter->hw_event.exclude_hv) + cpuhw->mmcr[0] |= MMCR0_FCHV; + + /* * Write the new configuration to MMCR* with the freeze * bit set and set the hardware counters to their initial values. * Then unfreeze the counters. @@ -424,6 +469,8 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader, &cpuhw->counter[n0], &cpuhw->events[n0]); if (n < 0) return -EAGAIN; + if (check_excludes(cpuhw->counter, n0, n)) + return -EAGAIN; if (power_check_constraints(cpuhw->events, n + n0)) return -EAGAIN; cpuhw->n_counters = n0 + n; @@ -476,6 +523,8 @@ static int power_perf_enable(struct perf_counter *counter) goto out; cpuhw->counter[n0] = counter; cpuhw->events[n0] = counter->hw.config; + if (check_excludes(cpuhw->counter, n0, 1)) + goto out; if (power_check_constraints(cpuhw->events, n0 + 1)) goto out; @@ -555,6 +604,17 @@ hw_perf_counter_init(struct perf_counter *counter) counter->hw.idx = 0; /* + * If we are not running on a hypervisor, force the + * exclude_hv bit to 0 so that we don't care what + * the user set it to. This also means that we don't + * set the MMCR0_FCHV bit, which unconditionally freezes + * the counters on the PPC970 variants used in Apple G5 + * machines (since MSR.HV is always 1 on those machines). + */ + if (!firmware_has_feature(FW_FEATURE_LPAR)) + counter->hw_event.exclude_hv = 0; + + /* * If this is in a group, check if it can go on with all the * other hardware counters in the group. We assume the counter * hasn't been linked into its leader's sibling list at this point. @@ -566,11 +626,13 @@ hw_perf_counter_init(struct perf_counter *counter) if (n < 0) return NULL; } - events[n++] = ev; - if (power_check_constraints(events, n)) + events[n] = ev; + if (check_excludes(ctrs, n, 1)) + return NULL; + if (power_check_constraints(events, n + 1)) return NULL; - counter->hw.config = events[n - 1]; + counter->hw.config = events[n]; atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period); return &power_perf_ops; } diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 9901e46..383d4c6 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -107,21 +107,25 @@ static int __hw_perf_counter_init(struct perf_counter *counter) return -EINVAL; /* - * Count user events, and generate PMC IRQs: + * Generate PMC IRQs: * (keep 'enabled' bit clear for now) */ - hwc->config = ARCH_PERFMON_EVENTSEL_USR | ARCH_PERFMON_EVENTSEL_INT; + hwc->config = ARCH_PERFMON_EVENTSEL_INT; /* - * If privileged enough, count OS events too, and allow - * NMI events as well: + * Count user and OS events unless requested not to. */ - hwc->nmi = 0; - if (capable(CAP_SYS_ADMIN)) { + if (!hw_event->exclude_user) + hwc->config |= ARCH_PERFMON_EVENTSEL_USR; + if (!hw_event->exclude_kernel) hwc->config |= ARCH_PERFMON_EVENTSEL_OS; - if (hw_event->nmi) - hwc->nmi = 1; - } + + /* + * If privileged enough, allow NMI events: + */ + hwc->nmi = 0; + if (capable(CAP_SYS_ADMIN) && hw_event->nmi) + hwc->nmi = 1; hwc->irq_period = hw_event->irq_period; /* @@ -248,10 +252,13 @@ __pmc_fixed_enable(struct perf_counter *counter, int err; /* - * Enable IRQ generation (0x8) and ring-3 counting (0x2), - * and enable ring-0 counting if allowed: + * Enable IRQ generation (0x8), + * and enable ring-3 counting (0x2) and ring-0 counting (0x1) + * if requested: */ - bits = 0x8ULL | 0x2ULL; + bits = 0x8ULL; + if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) + bits |= 0x2; if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) bits |= 0x1; bits <<= (idx * 4); diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index f55381f..c83f51d 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -83,14 +83,17 @@ struct perf_counter_hw_event { u64 irq_period; u32 record_type; - u32 disabled : 1, /* off by default */ - nmi : 1, /* NMI sampling */ - raw : 1, /* raw event type */ - inherit : 1, /* children inherit it */ - pinned : 1, /* must always be on PMU */ - exclusive : 1, /* only counter on PMU */ - - __reserved_1 : 26; + u32 disabled : 1, /* off by default */ + nmi : 1, /* NMI sampling */ + raw : 1, /* raw event type */ + inherit : 1, /* children inherit it */ + pinned : 1, /* must always be on PMU */ + exclusive : 1, /* only group on PMU */ + exclude_user : 1, /* don't count user */ + exclude_kernel : 1, /* ditto kernel */ + exclude_hv : 1, /* ditto hypervisor */ + + __reserved_1 : 23; u64 __reserved_2; }; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 544193c..89d5e3f 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1567,11 +1567,25 @@ sw_perf_counter_init(struct perf_counter *counter) { const struct hw_perf_counter_ops *hw_ops = NULL; + /* + * Software counters (currently) can't in general distinguish + * between user, kernel and hypervisor events. + * However, context switches and cpu migrations are considered + * to be kernel events, and page faults are never hypervisor + * events. + */ switch (counter->hw_event.type) { case PERF_COUNT_CPU_CLOCK: - hw_ops = &perf_ops_cpu_clock; + if (!(counter->hw_event.exclude_user || + counter->hw_event.exclude_kernel || + counter->hw_event.exclude_hv)) + hw_ops = &perf_ops_cpu_clock; break; case PERF_COUNT_TASK_CLOCK: + if (counter->hw_event.exclude_user || + counter->hw_event.exclude_kernel || + counter->hw_event.exclude_hv) + break; /* * If the user instantiates this as a per-cpu counter, * use the cpu_clock counter instead. @@ -1582,13 +1596,17 @@ sw_perf_counter_init(struct perf_counter *counter) hw_ops = &perf_ops_cpu_clock; break; case PERF_COUNT_PAGE_FAULTS: - hw_ops = &perf_ops_page_faults; + if (!(counter->hw_event.exclude_user || + counter->hw_event.exclude_kernel)) + hw_ops = &perf_ops_page_faults; break; case PERF_COUNT_CONTEXT_SWITCHES: - hw_ops = &perf_ops_context_switches; + if (!counter->hw_event.exclude_kernel) + hw_ops = &perf_ops_context_switches; break; case PERF_COUNT_CPU_MIGRATIONS: - hw_ops = &perf_ops_cpu_migrations; + if (!counter->hw_event.exclude_kernel) + hw_ops = &perf_ops_cpu_migrations; break; default: break; -- cgit v0.10.2 From 5af759176cc767e7426f89764bde4996ebaaf419 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Wed, 11 Feb 2009 10:53:37 +0100 Subject: perfcounters: fix use after free in perf_release() running... while true; do foo -d 1 -f 1 -c 100000 & sleep 1 kerneltop -d 1 -f 1 -e 1 -c 25000 -p `pidof foo` done while true; do killall foo; killall kerneltop; sleep 2 done ...in two shells with SLUB_DEBUG enabled produces flood of: BUG task_struct: Poison overwritten. Fix the use-after-free bug in perf_release(). Signed-off-by: Mike Galbraith Signed-off-by: Ingo Molnar diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 89d5e3f..e0576c3 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1145,12 +1145,12 @@ static int perf_release(struct inode *inode, struct file *file) mutex_lock(&counter->mutex); perf_counter_remove_from_context(counter); - put_context(ctx); mutex_unlock(&counter->mutex); mutex_unlock(&ctx->mutex); kfree(counter); + put_context(ctx); return 0; } -- cgit v0.10.2 From 4bcf349a0f90d1e69eb35c6df0fa285c886c1cd6 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 11 Feb 2009 13:53:19 +0100 Subject: perfcounters: fix refcounting bug, take 2 Only free child_counter if it has a parent; if it doesn't, then it has a file pointing to it and we'll free it in perf_release. Signed-off-by: Mike Galbraith Signed-off-by: Ingo Molnar diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index e0576c3..fcefb0a 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1958,14 +1958,13 @@ __perf_counter_exit_task(struct task_struct *child, sync_child_counter(child_counter, parent_counter); list_for_each_entry_safe(sub, tmp, &child_counter->sibling_list, list_entry) { - if (sub->parent) + if (sub->parent) { sync_child_counter(sub, sub->parent); - kfree(sub); + kfree(sub); + } } - } - - if (!child_counter->filp || !atomic_long_read(&child_counter->filp->f_count)) kfree(child_counter); + } } /* -- cgit v0.10.2 From c07c99b67233ccaad38a961c17405dc1e1542aa4 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 13 Feb 2009 22:10:34 +1100 Subject: perfcounters: make context switch and migration software counters work again Jaswinder Singh Rajput reported that commit 23a185ca8abbeef caused the context switch and migration software counters to report zero always. With that commit, the software counters only count events that occur between sched-in and sched-out for a task. This is necessary for the counter enable/disable prctls and ioctls to work. However, the context switch and migration counts are incremented after sched-out for one task and before sched-in for the next. Since the increment doesn't occur while a task is scheduled in (as far as the software counters are concerned) it doesn't count towards any counter. Thus the context switch and migration counters need to count events that occur at any time, provided the counter is enabled, not just those that occur while the task is scheduled in (from the perf_counter subsystem's point of view). The problem though is that the software counter code can't tell the difference between being enabled and being scheduled in, and between being disabled and being scheduled out, since we use the one pair of enable/disable entry points for both. That is, the high-level disable operation simply arranges for the counter to not be scheduled in any more, and the high-level enable operation arranges for it to be scheduled in again. One way to solve this would be to have sched_in/out operations in the hw_perf_counter_ops struct as well as enable/disable. However, this takes a simpler approach: it adds a 'prev_state' field to the perf_counter struct that allows a counter's enable method to know whether the counter was previously disabled or just inactive (scheduled out), and therefore whether the enable method is being called as a result of a high-level enable or a schedule-in operation. This then allows the context switch, migration and page fault counters to reset their hw.prev_count value in their enable functions only if they are called as a result of a high-level enable operation. Although page faults would normally only occur while the counter is scheduled in, this changes the page fault counter code too in case there are ever circumstances where page faults get counted against a task while its counters are not scheduled in. Reported-by: Jaswinder Singh Rajput Signed-off-by: Paul Mackerras Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index c83f51d..32cd1ac 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -173,6 +173,7 @@ struct perf_counter { const struct hw_perf_counter_ops *hw_ops; enum perf_counter_active_state state; + enum perf_counter_active_state prev_state; atomic64_t count; struct perf_counter_hw_event hw_event; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index fcefb0a..ad62965 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -444,6 +444,7 @@ static void __perf_install_in_context(void *info) list_add_counter(counter, ctx); ctx->nr_counters++; + counter->prev_state = PERF_COUNTER_STATE_OFF; /* * Don't put the counter on if it is disabled or if @@ -562,6 +563,7 @@ static void __perf_counter_enable(void *info) curr_rq_lock_irq_save(&flags); spin_lock(&ctx->lock); + counter->prev_state = counter->state; if (counter->state >= PERF_COUNTER_STATE_INACTIVE) goto unlock; counter->state = PERF_COUNTER_STATE_INACTIVE; @@ -733,6 +735,7 @@ group_sched_in(struct perf_counter *group_counter, if (ret) return ret < 0 ? ret : 0; + group_counter->prev_state = group_counter->state; if (counter_sched_in(group_counter, cpuctx, ctx, cpu)) return -EAGAIN; @@ -740,6 +743,7 @@ group_sched_in(struct perf_counter *group_counter, * Schedule in siblings as one group (if any): */ list_for_each_entry(counter, &group_counter->sibling_list, list_entry) { + counter->prev_state = counter->state; if (counter_sched_in(counter, cpuctx, ctx, cpu)) { partial_group = counter; goto group_error; @@ -1398,9 +1402,9 @@ static void task_clock_perf_counter_read(struct perf_counter *counter) static int task_clock_perf_counter_enable(struct perf_counter *counter) { - u64 now = task_clock_perf_counter_val(counter, 0); - - atomic64_set(&counter->hw.prev_count, now); + if (counter->prev_state <= PERF_COUNTER_STATE_OFF) + atomic64_set(&counter->hw.prev_count, + task_clock_perf_counter_val(counter, 0)); return 0; } @@ -1455,7 +1459,8 @@ static void page_faults_perf_counter_read(struct perf_counter *counter) static int page_faults_perf_counter_enable(struct perf_counter *counter) { - atomic64_set(&counter->hw.prev_count, get_page_faults(counter)); + if (counter->prev_state <= PERF_COUNTER_STATE_OFF) + atomic64_set(&counter->hw.prev_count, get_page_faults(counter)); return 0; } @@ -1501,7 +1506,9 @@ static void context_switches_perf_counter_read(struct perf_counter *counter) static int context_switches_perf_counter_enable(struct perf_counter *counter) { - atomic64_set(&counter->hw.prev_count, get_context_switches(counter)); + if (counter->prev_state <= PERF_COUNTER_STATE_OFF) + atomic64_set(&counter->hw.prev_count, + get_context_switches(counter)); return 0; } @@ -1547,7 +1554,9 @@ static void cpu_migrations_perf_counter_read(struct perf_counter *counter) static int cpu_migrations_perf_counter_enable(struct perf_counter *counter) { - atomic64_set(&counter->hw.prev_count, get_cpu_migrations(counter)); + if (counter->prev_state <= PERF_COUNTER_STATE_OFF) + atomic64_set(&counter->hw.prev_count, + get_cpu_migrations(counter)); return 0; } -- cgit v0.10.2 From 73ca2f8380311115723c7afe811f3ed1f0ba945e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 16 Feb 2009 01:08:17 +0100 Subject: perfcounters: remove duplicate definition of LOCAL_PERF_VECTOR Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index b66b518..b07278c 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -112,11 +112,6 @@ #define LOCAL_PERF_VECTOR 0xee /* - * Performance monitoring interrupt vector: - */ -#define LOCAL_PERF_VECTOR 0xee - -/* * First APIC vector available to drivers: (vectors 0x30-0xee) we * start at 0x31(0x41) to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) -- cgit v0.10.2 From 37a25424252b6cff4dd4b1937ab6a1dbfcadabcc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 16 Feb 2009 15:32:23 +0100 Subject: perfcounters: fix acpi_idle_do_entry() workaround Fix merge error in drivers/acpi/processor_idle.c. This resulted in non-working perfcounters on certain Nehalem systems. Signed-off-by: Ingo Molnar diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 259f6e8..08def2f 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -826,12 +826,9 @@ static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx) { u64 perf_flags; - u64 pctrl; - /* Don't trace irqs off for idle */ stop_critical_timings(); perf_flags = hw_perf_save_disable(); - pctrl = hw_perf_save_disable(); if (cx->entry_method == ACPI_CSTATE_FFH) { /* Call into architectural FFH based C-state */ acpi_processor_ffh_cstate_enter(cx); @@ -847,7 +844,6 @@ static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx) unused = inl(acpi_gbl_FADT.xpm_timer_block.address); } hw_perf_restore(perf_flags); - hw_perf_restore(pctrl); start_critical_timings(); } -- cgit v0.10.2 From d095cd46dac104e4d2a4967c7c19b55a12f78240 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 23 Feb 2009 23:01:28 +1100 Subject: perfcounters/powerpc: Make exclude_kernel bit work on Apple G5 processors Currently, setting hw_event.exclude_kernel does nothing on the PPC970 variants used in Apple G5 machines, because they have the HV (hypervisor) bit in the MSR forced to 1, so as far as the PMU is concerned, the kernel runs in hypervisor mode. Thus we have to use the MMCR0_FCHV (freeze counters in hypervisor mode) bit rather than the MMCR0_FCS (freeze counters in supervisor mode) bit. This checks the MSR.HV bit at startup, and if it is set, we set the freeze_counters_kernel variable to MMCR0_FCHV (it was initialized to MMCR0_FCS). We then use that whenever we need to exclude kernel events. Signed-off-by: Paul Mackerras diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index bd6ba85..6e27913 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -32,6 +32,15 @@ DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters); struct power_pmu *ppmu; +/* + * Normally, to ignore kernel events we set the FCS (freeze counters + * in supervisor mode) bit in MMCR0, but if the kernel runs with the + * hypervisor bit set in the MSR, or if we are running on a processor + * where the hypervisor bit is forced to 1 (as on Apple G5 processors), + * then we need to use the FCHV bit to ignore kernel events. + */ +static unsigned int freeze_counters_kernel = MMCR0_FCS; + void perf_counter_print_debug(void) { } @@ -364,7 +373,7 @@ void hw_perf_restore(u64 disable) if (counter->hw_event.exclude_user) cpuhw->mmcr[0] |= MMCR0_FCP; if (counter->hw_event.exclude_kernel) - cpuhw->mmcr[0] |= MMCR0_FCS; + cpuhw->mmcr[0] |= freeze_counters_kernel; if (counter->hw_event.exclude_hv) cpuhw->mmcr[0] |= MMCR0_FCHV; @@ -606,10 +615,7 @@ hw_perf_counter_init(struct perf_counter *counter) /* * If we are not running on a hypervisor, force the * exclude_hv bit to 0 so that we don't care what - * the user set it to. This also means that we don't - * set the MMCR0_FCHV bit, which unconditionally freezes - * the counters on the PPC970 variants used in Apple G5 - * machines (since MSR.HV is always 1 on those machines). + * the user set it to. */ if (!firmware_has_feature(FW_FEATURE_LPAR)) counter->hw_event.exclude_hv = 0; @@ -841,6 +847,13 @@ static int init_perf_counters(void) ppmu = &power6_pmu; break; } + + /* + * Use FCHV to ignore kernel events if MSR.HV is set. + */ + if (mfmsr() & MSR_HV) + freeze_counters_kernel = MMCR0_FCHV; + return 0; } -- cgit v0.10.2 From 742bd95ba96e19b3f7196c3a0834ebc17c8ba006 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 24 Feb 2009 11:33:56 +1100 Subject: perfcounters/powerpc: Add support for POWER5 processors This adds the back-end for the PMU on the POWER5 processor. This knows how to use the fixed-function PMC5 and PMC6 (instructions completed and run cycles). Unlike POWER6, PMC5/6 obey the freeze conditions and can generate interrupts, so their use doesn't impose any extra restrictions. POWER5+ is different and is not supported by this patch. Signed-off-by: Paul Mackerras diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 7c941ec..b4c6f46 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -94,7 +94,8 @@ obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o -obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o ppc970-pmu.o power6-pmu.o +obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o ppc970-pmu.o power5-pmu.o \ + power6-pmu.o obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 6e27913..112332d 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -824,6 +824,7 @@ void hw_perf_counter_setup(int cpu) } extern struct power_pmu ppc970_pmu; +extern struct power_pmu power5_pmu; extern struct power_pmu power6_pmu; static int init_perf_counters(void) @@ -843,6 +844,9 @@ static int init_perf_counters(void) case PV_970MP: ppmu = &ppc970_pmu; break; + case PV_POWER5: + ppmu = &power5_pmu; + break; case 0x3e: ppmu = &power6_pmu; break; diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c new file mode 100644 index 0000000..379ed10 --- /dev/null +++ b/arch/powerpc/kernel/power5-pmu.c @@ -0,0 +1,475 @@ +/* + * Performance counter support for POWER5 (not POWER5++) processors. + * + * Copyright 2009 Paul Mackerras, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include + +/* + * Bits in event code for POWER5 (not POWER5++) + */ +#define PM_PMC_SH 20 /* PMC number (1-based) for direct events */ +#define PM_PMC_MSK 0xf +#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH) +#define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */ +#define PM_UNIT_MSK 0xf +#define PM_BYTE_SH 12 /* Byte number of event bus to use */ +#define PM_BYTE_MSK 7 +#define PM_GRS_SH 8 /* Storage subsystem mux select */ +#define PM_GRS_MSK 7 +#define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */ +#define PM_PMCSEL_MSK 0x7f + +/* Values in PM_UNIT field */ +#define PM_FPU 0 +#define PM_ISU0 1 +#define PM_IFU 2 +#define PM_ISU1 3 +#define PM_IDU 4 +#define PM_ISU0_ALT 6 +#define PM_GRS 7 +#define PM_LSU0 8 +#define PM_LSU1 0xc +#define PM_LASTUNIT 0xc + +/* + * Bits in MMCR1 for POWER5 + */ +#define MMCR1_TTM0SEL_SH 62 +#define MMCR1_TTM1SEL_SH 60 +#define MMCR1_TTM2SEL_SH 58 +#define MMCR1_TTM3SEL_SH 56 +#define MMCR1_TTMSEL_MSK 3 +#define MMCR1_TD_CP_DBG0SEL_SH 54 +#define MMCR1_TD_CP_DBG1SEL_SH 52 +#define MMCR1_TD_CP_DBG2SEL_SH 50 +#define MMCR1_TD_CP_DBG3SEL_SH 48 +#define MMCR1_GRS_L2SEL_SH 46 +#define MMCR1_GRS_L2SEL_MSK 3 +#define MMCR1_GRS_L3SEL_SH 44 +#define MMCR1_GRS_L3SEL_MSK 3 +#define MMCR1_GRS_MCSEL_SH 41 +#define MMCR1_GRS_MCSEL_MSK 7 +#define MMCR1_GRS_FABSEL_SH 39 +#define MMCR1_GRS_FABSEL_MSK 3 +#define MMCR1_PMC1_ADDER_SEL_SH 35 +#define MMCR1_PMC2_ADDER_SEL_SH 34 +#define MMCR1_PMC3_ADDER_SEL_SH 33 +#define MMCR1_PMC4_ADDER_SEL_SH 32 +#define MMCR1_PMC1SEL_SH 25 +#define MMCR1_PMC2SEL_SH 17 +#define MMCR1_PMC3SEL_SH 9 +#define MMCR1_PMC4SEL_SH 1 +#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) +#define MMCR1_PMCSEL_MSK 0x7f + +/* + * Bits in MMCRA + */ + +/* + * Layout of constraint bits: + * 6666555555555544444444443333333333222222222211111111110000000000 + * 3210987654321098765432109876543210987654321098765432109876543210 + * <><>[ ><><>< ><> [ >[ >[ >< >< >< >< ><><><><><><> + * T0T1 NC G0G1G2 G3 UC PS1PS2 B0 B1 B2 B3 P6P5P4P3P2P1 + * + * T0 - TTM0 constraint + * 54-55: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0xc0_0000_0000_0000 + * + * T1 - TTM1 constraint + * 52-53: TTM1SEL value (0=IDU, 3=GRS) 0x30_0000_0000_0000 + * + * NC - number of counters + * 51: NC error 0x0008_0000_0000_0000 + * 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000 + * + * G0..G3 - GRS mux constraints + * 46-47: GRS_L2SEL value + * 44-45: GRS_L3SEL value + * 41-44: GRS_MCSEL value + * 39-40: GRS_FABSEL value + * Note that these match up with their bit positions in MMCR1 + * + * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS + * 37: UC3 error 0x20_0000_0000 + * 36: FPU|IFU|ISU1 events needed 0x10_0000_0000 + * 35: ISU0 events needed 0x08_0000_0000 + * 34: IDU|GRS events needed 0x04_0000_0000 + * + * PS1 + * 33: PS1 error 0x2_0000_0000 + * 31-32: count of events needing PMC1/2 0x1_8000_0000 + * + * PS2 + * 30: PS2 error 0x4000_0000 + * 28-29: count of events needing PMC3/4 0x3000_0000 + * + * B0 + * 24-27: Byte 0 event source 0x0f00_0000 + * Encoding as for the event code + * + * B1, B2, B3 + * 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources + * + * P1..P6 + * 0-11: Count of events needing PMC1..PMC6 + */ + +static const int grsel_shift[8] = { + MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, + MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, + MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH +}; + +/* Masks and values for using events from the various units */ +static u64 unit_cons[PM_LASTUNIT+1][2] = { + [PM_FPU] = { 0xc0002000000000ull, 0x00001000000000ull }, + [PM_ISU0] = { 0x00002000000000ull, 0x00000800000000ull }, + [PM_ISU1] = { 0xc0002000000000ull, 0xc0001000000000ull }, + [PM_IFU] = { 0xc0002000000000ull, 0x80001000000000ull }, + [PM_IDU] = { 0x30002000000000ull, 0x00000400000000ull }, + [PM_GRS] = { 0x30002000000000ull, 0x30000400000000ull }, +}; + +static int power5_get_constraint(unsigned int event, u64 *maskp, u64 *valp) +{ + int pmc, byte, unit, sh; + int bit, fmask; + u64 mask = 0, value = 0; + int grp = -1; + + pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; + if (pmc) { + if (pmc > 6) + return -1; + sh = (pmc - 1) * 2; + mask |= 2 << sh; + value |= 1 << sh; + if (pmc <= 4) + grp = (pmc - 1) >> 1; + else if (event != 0x500009 && event != 0x600005) + return -1; + } + if (event & PM_BUSEVENT_MSK) { + unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; + if (unit > PM_LASTUNIT) + return -1; + if (unit == PM_ISU0_ALT) + unit = PM_ISU0; + mask |= unit_cons[unit][0]; + value |= unit_cons[unit][1]; + byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; + if (byte >= 4) { + if (unit != PM_LSU1) + return -1; + /* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */ + ++unit; + byte &= 3; + } + if (unit == PM_GRS) { + bit = event & 7; + fmask = (bit == 6)? 7: 3; + sh = grsel_shift[bit]; + mask |= (u64)fmask << sh; + value |= (u64)((event >> PM_GRS_SH) & fmask) << sh; + } + /* + * Bus events on bytes 0 and 2 can be counted + * on PMC1/2; bytes 1 and 3 on PMC3/4. + */ + if (!pmc) + grp = byte & 1; + /* Set byte lane select field */ + mask |= 0xfULL << (24 - 4 * byte); + value |= (u64)unit << (24 - 4 * byte); + } + if (grp == 0) { + /* increment PMC1/2 field */ + mask |= 0x200000000ull; + value |= 0x080000000ull; + } else if (grp == 1) { + /* increment PMC3/4 field */ + mask |= 0x40000000ull; + value |= 0x10000000ull; + } + if (pmc < 5) { + /* need a counter from PMC1-4 set */ + mask |= 0x8000000000000ull; + value |= 0x1000000000000ull; + } + *maskp = mask; + *valp = value; + return 0; +} + +#define MAX_ALT 3 /* at most 3 alternatives for any event */ + +static const unsigned int event_alternatives[][MAX_ALT] = { + { 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */ + { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */ + { 0x100005, 0x600005 }, /* PM_RUN_CYC */ + { 0x100009, 0x200009, 0x500009 }, /* PM_INST_CMPL */ + { 0x300009, 0x400009 }, /* PM_INST_DISP */ +}; + +/* + * Scan the alternatives table for a match and return the + * index into the alternatives table if found, else -1. + */ +static int find_alternative(unsigned int event) +{ + int i, j; + + for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { + if (event < event_alternatives[i][0]) + break; + for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j) + if (event == event_alternatives[i][j]) + return i; + } + return -1; +} + +static const unsigned char bytedecode_alternatives[4][4] = { + /* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 }, + /* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e }, + /* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 }, + /* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e } +}; + +/* + * Some direct events for decodes of event bus byte 3 have alternative + * PMCSEL values on other counters. This returns the alternative + * event code for those that do, or -1 otherwise. + */ +static int find_alternative_bdecode(unsigned int event) +{ + int pmc, altpmc, pp, j; + + pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; + if (pmc == 0 || pmc > 4) + return -1; + altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */ + pp = event & PM_PMCSEL_MSK; + for (j = 0; j < 4; ++j) { + if (bytedecode_alternatives[pmc - 1][j] == pp) { + return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) | + (altpmc << PM_PMC_SH) | + bytedecode_alternatives[altpmc - 1][j]; + } + } + return -1; +} + +static int power5_get_alternatives(unsigned int event, unsigned int alt[]) +{ + int i, j, ae, nalt = 1; + + alt[0] = event; + nalt = 1; + i = find_alternative(event); + if (i >= 0) { + for (j = 0; j < MAX_ALT; ++j) { + ae = event_alternatives[i][j]; + if (ae && ae != event) + alt[nalt++] = ae; + } + } else { + ae = find_alternative_bdecode(event); + if (ae > 0) + alt[nalt++] = ae; + } + return nalt; +} + +static int power5_compute_mmcr(unsigned int event[], int n_ev, + unsigned int hwc[], u64 mmcr[]) +{ + u64 mmcr1 = 0; + unsigned int pmc, unit, byte, psel; + unsigned int ttm, grp; + int i, isbus, bit, grsel; + unsigned int pmc_inuse = 0; + unsigned int pmc_grp_use[2]; + unsigned char busbyte[4]; + unsigned char unituse[16]; + int ttmuse; + + if (n_ev > 6) + return -1; + + /* First pass to count resource use */ + pmc_grp_use[0] = pmc_grp_use[1] = 0; + memset(busbyte, 0, sizeof(busbyte)); + memset(unituse, 0, sizeof(unituse)); + for (i = 0; i < n_ev; ++i) { + pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; + if (pmc) { + if (pmc > 6) + return -1; + if (pmc_inuse & (1 << (pmc - 1))) + return -1; + pmc_inuse |= 1 << (pmc - 1); + /* count 1/2 vs 3/4 use */ + if (pmc <= 4) + ++pmc_grp_use[(pmc - 1) >> 1]; + } + if (event[i] & PM_BUSEVENT_MSK) { + unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; + byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; + if (unit > PM_LASTUNIT) + return -1; + if (unit == PM_ISU0_ALT) + unit = PM_ISU0; + if (byte >= 4) { + if (unit != PM_LSU1) + return -1; + ++unit; + byte &= 3; + } + if (!pmc) + ++pmc_grp_use[byte & 1]; + if (busbyte[byte] && busbyte[byte] != unit) + return -1; + busbyte[byte] = unit; + unituse[unit] = 1; + } + } + if (pmc_grp_use[0] > 2 || pmc_grp_use[1] > 2) + return -1; + + /* + * Assign resources and set multiplexer selects. + * + * PM_ISU0 can go either on TTM0 or TTM1, but that's the only + * choice we have to deal with. + */ + if (unituse[PM_ISU0] & + (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) { + unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */ + unituse[PM_ISU0] = 0; + } + /* Set TTM[01]SEL fields. */ + ttmuse = 0; + for (i = PM_FPU; i <= PM_ISU1; ++i) { + if (!unituse[i]) + continue; + if (ttmuse++) + return -1; + mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH; + } + ttmuse = 0; + for (; i <= PM_GRS; ++i) { + if (!unituse[i]) + continue; + if (ttmuse++) + return -1; + mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH; + } + if (ttmuse > 1) + return -1; + + /* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */ + for (byte = 0; byte < 4; ++byte) { + unit = busbyte[byte]; + if (!unit) + continue; + if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) { + /* get ISU0 through TTM1 rather than TTM0 */ + unit = PM_ISU0_ALT; + } else if (unit == PM_LSU1 + 1) { + /* select lower word of LSU1 for this byte */ + mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte); + } + ttm = unit >> 2; + mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); + } + + /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ + for (i = 0; i < n_ev; ++i) { + pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; + unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; + byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; + psel = event[i] & PM_PMCSEL_MSK; + isbus = event[i] & PM_BUSEVENT_MSK; + if (!pmc) { + /* Bus event or any-PMC direct event */ + for (pmc = 0; pmc < 4; ++pmc) { + if (pmc_inuse & (1 << pmc)) + continue; + grp = (pmc >> 1) & 1; + if (isbus) { + if (grp == (byte & 1)) + break; + } else if (pmc_grp_use[grp] < 2) { + ++pmc_grp_use[grp]; + break; + } + } + pmc_inuse |= 1 << pmc; + } else if (pmc <= 4) { + /* Direct event */ + --pmc; + if ((psel == 8 || psel == 0x10) && isbus && (byte & 2)) + /* add events on higher-numbered bus */ + mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc); + } else { + /* Instructions or run cycles on PMC5/6 */ + --pmc; + } + if (isbus && unit == PM_GRS) { + bit = psel & 7; + grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK; + mmcr1 |= (u64)grsel << grsel_shift[bit]; + } + if (pmc <= 3) + mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc); + hwc[i] = pmc; + } + + /* Return MMCRx values */ + mmcr[0] = 0; + if (pmc_inuse & 1) + mmcr[0] = MMCR0_PMC1CE; + if (pmc_inuse & 0x3e) + mmcr[0] |= MMCR0_PMCjCE; + mmcr[1] = mmcr1; + mmcr[2] = 0; + return 0; +} + +static void power5_disable_pmc(unsigned int pmc, u64 mmcr[]) +{ + if (pmc <= 3) + mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); +} + +static int power5_generic_events[] = { + [PERF_COUNT_CPU_CYCLES] = 0xf, + [PERF_COUNT_INSTRUCTIONS] = 0x100009, + [PERF_COUNT_CACHE_REFERENCES] = 0x4c1090, /* LD_REF_L1 */ + [PERF_COUNT_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */ + [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */ + [PERF_COUNT_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */ +}; + +struct power_pmu power5_pmu = { + .n_counter = 6, + .max_alternatives = MAX_ALT, + .add_fields = 0x7000090000555ull, + .test_adder = 0x3000490000000ull, + .compute_mmcr = power5_compute_mmcr, + .get_constraint = power5_get_constraint, + .get_alternatives = power5_get_alternatives, + .disable_pmc = power5_disable_pmc, + .n_generic = ARRAY_SIZE(power5_generic_events), + .generic_events = power5_generic_events, +}; -- cgit v0.10.2 From f3dfd2656deb81a0addee4f4ceff66b50a387388 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 26 Feb 2009 22:43:46 +1100 Subject: perfcounters: fix a few minor cleanliness issues This fixes three issues noticed by Arnd Bergmann: - Add #ifdef __KERNEL__ and move some things around in perf_counter.h to make sure only the bits that userspace needs are exported to userspace. - Use __u64, __s64, __u32 types in the structs exported to userspace rather than u64, s64, u32. - Make the sys_perf_counter_open syscall available to the SPUs on Cell platforms. And one issue that I noticed in looking at the code again: - Wrap the perf_counter_open syscall with SYSCALL_DEFINE4 so we get the proper handling of int arguments on ppc64 (and some other 64-bit architectures). Reported-by: Arnd Bergmann Signed-off-by: Paul Mackerras diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 4c8095f..d312eec 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -322,4 +322,4 @@ SYSCALL_SPU(epoll_create1) SYSCALL_SPU(dup3) SYSCALL_SPU(pipe2) SYSCALL(inotify_init1) -SYSCALL(perf_counter_open) +SYSCALL_SPU(perf_counter_open) diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 32cd1ac..186efaf 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -13,20 +13,8 @@ #ifndef _LINUX_PERF_COUNTER_H #define _LINUX_PERF_COUNTER_H -#include -#include - -#ifdef CONFIG_PERF_COUNTERS -# include -#endif - -#include -#include -#include -#include -#include - -struct task_struct; +#include +#include /* * User-space ABI bits: @@ -78,12 +66,12 @@ enum perf_counter_record_type { * Hardware event to monitor via a performance monitoring counter: */ struct perf_counter_hw_event { - s64 type; + __s64 type; - u64 irq_period; - u32 record_type; + __u64 irq_period; + __u32 record_type; - u32 disabled : 1, /* off by default */ + __u32 disabled : 1, /* off by default */ nmi : 1, /* NMI sampling */ raw : 1, /* raw event type */ inherit : 1, /* children inherit it */ @@ -95,7 +83,7 @@ struct perf_counter_hw_event { __reserved_1 : 23; - u64 __reserved_2; + __u64 __reserved_2; }; /* @@ -104,10 +92,24 @@ struct perf_counter_hw_event { #define PERF_COUNTER_IOC_ENABLE _IO('$', 0) #define PERF_COUNTER_IOC_DISABLE _IO('$', 1) +#ifdef __KERNEL__ /* - * Kernel-internal data types: + * Kernel-internal data types and definitions: */ +#ifdef CONFIG_PERF_COUNTERS +# include +#endif + +#include +#include +#include +#include +#include +#include + +struct task_struct; + /** * struct hw_perf_counter - performance counter hardware details: */ @@ -293,4 +295,5 @@ static inline int perf_counter_task_disable(void) { return -EINVAL; } static inline int perf_counter_task_enable(void) { return -EINVAL; } #endif +#endif /* __KERNEL__ */ #endif /* _LINUX_PERF_COUNTER_H */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 88255d32..28ef2be 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -696,10 +696,7 @@ asmlinkage long sys_pipe(int __user *); int kernel_execve(const char *filename, char *const argv[], char *const envp[]); -asmlinkage int sys_perf_counter_open( - - struct perf_counter_hw_event *hw_event_uptr __user, - pid_t pid, - int cpu, - int group_fd); +asmlinkage long sys_perf_counter_open( + const struct perf_counter_hw_event __user *hw_event_uptr, + pid_t pid, int cpu, int group_fd); #endif diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index ad62965..16b14ba 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1690,9 +1690,9 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, * @cpu: target cpu * @group_fd: group leader counter fd */ -asmlinkage int -sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user, - pid_t pid, int cpu, int group_fd) +SYSCALL_DEFINE4(perf_counter_open, + const struct perf_counter_hw_event __user *, hw_event_uptr, + pid_t, pid, int, cpu, int, group_fd) { struct perf_counter *counter, *group_leader; struct perf_counter_hw_event hw_event; -- cgit v0.10.2 From b56a3802dc6df29aa27d2c12edf420258091ad66 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 27 Feb 2009 18:09:09 +0530 Subject: x86: prepare perf_counter to add more cpus Introduced struct pmc_x86_ops to add more cpus. Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 383d4c6..a3c8852 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -3,6 +3,7 @@ * * Copyright(C) 2008 Thomas Gleixner * Copyright(C) 2008 Red Hat, Inc., Ingo Molnar + * Copyright(C) 2009 Jaswinder Singh Rajput * * For licencing details see kernel-base/COPYING */ @@ -38,10 +39,24 @@ struct cpu_hw_counters { }; /* - * Intel PerfMon v3. Used on Core2 and later. + * struct pmc_x86_ops - performance counter x86 ops */ +struct pmc_x86_ops { + u64 (*save_disable_all) (void); + void (*restore_all) (u64 ctrl); + unsigned eventsel; + unsigned perfctr; + int (*event_map) (int event); + int max_events; +}; + +static struct pmc_x86_ops *pmc_ops; + static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters); +/* + * Intel PerfMon v3. Used on Core2 and later. + */ static const int intel_perfmon_event_map[] = { [PERF_COUNT_CPU_CYCLES] = 0x003c, @@ -53,7 +68,10 @@ static const int intel_perfmon_event_map[] = [PERF_COUNT_BUS_CYCLES] = 0x013c, }; -static const int max_intel_perfmon_events = ARRAY_SIZE(intel_perfmon_event_map); +static int pmc_intel_event_map(int event) +{ + return intel_perfmon_event_map[event]; +} /* * Propagate counter elapsed time into the generic counter. @@ -144,38 +162,48 @@ static int __hw_perf_counter_init(struct perf_counter *counter) if (hw_event->raw) { hwc->config |= hw_event->type; } else { - if (hw_event->type >= max_intel_perfmon_events) + if (hw_event->type >= pmc_ops->max_events) return -EINVAL; /* * The generic map: */ - hwc->config |= intel_perfmon_event_map[hw_event->type]; + hwc->config |= pmc_ops->event_map(hw_event->type); } counter->wakeup_pending = 0; return 0; } -u64 hw_perf_save_disable(void) +static u64 pmc_intel_save_disable_all(void) { u64 ctrl; - if (unlikely(!perf_counters_initialized)) - return 0; - rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); return ctrl; } + +u64 hw_perf_save_disable(void) +{ + if (unlikely(!perf_counters_initialized)) + return 0; + + return pmc_ops->save_disable_all(); +} EXPORT_SYMBOL_GPL(hw_perf_save_disable); +static void pmc_intel_restore_all(u64 ctrl) +{ + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); +} + void hw_perf_restore(u64 ctrl) { if (unlikely(!perf_counters_initialized)) return; - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); + pmc_ops->restore_all(ctrl); } EXPORT_SYMBOL_GPL(hw_perf_restore); @@ -291,11 +319,11 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc) event = hwc->config & ARCH_PERFMON_EVENT_MASK; - if (unlikely(event == intel_perfmon_event_map[PERF_COUNT_INSTRUCTIONS])) + if (unlikely(event == pmc_ops->event_map(PERF_COUNT_INSTRUCTIONS))) return X86_PMC_IDX_FIXED_INSTRUCTIONS; - if (unlikely(event == intel_perfmon_event_map[PERF_COUNT_CPU_CYCLES])) + if (unlikely(event == pmc_ops->event_map(PERF_COUNT_CPU_CYCLES))) return X86_PMC_IDX_FIXED_CPU_CYCLES; - if (unlikely(event == intel_perfmon_event_map[PERF_COUNT_BUS_CYCLES])) + if (unlikely(event == pmc_ops->event_map(PERF_COUNT_BUS_CYCLES))) return X86_PMC_IDX_FIXED_BUS_CYCLES; return -1; @@ -339,8 +367,8 @@ try_generic: set_bit(idx, cpuc->used); hwc->idx = idx; } - hwc->config_base = MSR_ARCH_PERFMON_EVENTSEL0; - hwc->counter_base = MSR_ARCH_PERFMON_PERFCTR0; + hwc->config_base = pmc_ops->eventsel; + hwc->counter_base = pmc_ops->perfctr; } perf_counters_lapic_init(hwc->nmi); @@ -386,8 +414,8 @@ void perf_counter_print_debug(void) printk(KERN_INFO "CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used); for (idx = 0; idx < nr_counters_generic; idx++) { - rdmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, pmc_ctrl); - rdmsrl(MSR_ARCH_PERFMON_PERFCTR0 + idx, pmc_count); + rdmsrl(pmc_ops->eventsel + idx, pmc_ctrl); + rdmsrl(pmc_ops->perfctr + idx, pmc_count); prev_left = per_cpu(prev_left[idx], cpu); @@ -655,29 +683,56 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = { .priority = 1 }; -void __init init_hw_perf_counters(void) +static struct pmc_x86_ops pmc_intel_ops = { + .save_disable_all = pmc_intel_save_disable_all, + .restore_all = pmc_intel_restore_all, + .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, + .perfctr = MSR_ARCH_PERFMON_PERFCTR0, + .event_map = pmc_intel_event_map, + .max_events = ARRAY_SIZE(intel_perfmon_event_map), +}; + +static struct pmc_x86_ops *pmc_intel_init(void) { union cpuid10_eax eax; unsigned int ebx; unsigned int unused; union cpuid10_edx edx; - if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) - return; - /* * Check whether the Architectural PerfMon supports * Branch Misses Retired Event or not. */ cpuid(10, &eax.full, &ebx, &unused, &edx.full); if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) - return; + return NULL; printk(KERN_INFO "Intel Performance Monitoring support detected.\n"); - printk(KERN_INFO "... version: %d\n", eax.split.version_id); - printk(KERN_INFO "... num counters: %d\n", eax.split.num_counters); + printk(KERN_INFO "... bit width: %d\n", eax.split.bit_width); + printk(KERN_INFO "... mask length: %d\n", eax.split.mask_length); + nr_counters_generic = eax.split.num_counters; + nr_counters_fixed = edx.split.num_counters_fixed; + counter_value_mask = (1ULL << eax.split.bit_width) - 1; + + return &pmc_intel_ops; +} + +void __init init_hw_perf_counters(void) +{ + if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) + return; + + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + pmc_ops = pmc_intel_init(); + break; + } + if (!pmc_ops) + return; + + printk(KERN_INFO "... num counters: %d\n", nr_counters_generic); if (nr_counters_generic > X86_PMC_MAX_GENERIC) { nr_counters_generic = X86_PMC_MAX_GENERIC; WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!", @@ -686,13 +741,8 @@ void __init init_hw_perf_counters(void) perf_counter_mask = (1 << nr_counters_generic) - 1; perf_max_counters = nr_counters_generic; - printk(KERN_INFO "... bit width: %d\n", eax.split.bit_width); - counter_value_mask = (1ULL << eax.split.bit_width) - 1; printk(KERN_INFO "... value mask: %016Lx\n", counter_value_mask); - printk(KERN_INFO "... mask length: %d\n", eax.split.mask_length); - - nr_counters_fixed = edx.split.num_counters_fixed; if (nr_counters_fixed > X86_PMC_MAX_FIXED) { nr_counters_fixed = X86_PMC_MAX_FIXED; WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!", -- cgit v0.10.2 From f87ad35d37fa543925210550f7db20a54c83ed70 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 27 Feb 2009 20:15:14 +0530 Subject: x86: AMD Support for perf_counter Supported basic performance counter for AMD K7 and later: $ perfstat -e 0,1,2,3,4,5,-1,-2,-3,-4,-5 ls > /dev/null Performance counter stats for 'ls': 12.298610 task clock ticks (msecs) 3298477 CPU cycles (events) 1406354 instructions (events) 749035 cache references (events) 16939 cache misses (events) 100589 branches (events) 11159 branch misses (events) 7.627540 cpu clock ticks (msecs) 12.298610 task clock ticks (msecs) 500 pagefaults (events) 6 context switches (events) 3 CPU migrations (events) Wall-clock time elapsed: 8.672290 msecs Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 25423a5..edcde52 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -368,6 +368,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) if (c->x86 >= 6) set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); + /* Enable Performance counter for K7 and later */ + if (c->x86 > 6 && c->x86 <= 0x11) + set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); + if (!c->x86_model_id[0]) { switch (c->x86) { case 0xf: diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index a3c8852..266618a 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -74,6 +74,24 @@ static int pmc_intel_event_map(int event) } /* + * AMD Performance Monitor K7 and later. + */ +static const int amd_perfmon_event_map[] = +{ + [PERF_COUNT_CPU_CYCLES] = 0x0076, + [PERF_COUNT_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_CACHE_REFERENCES] = 0x0080, + [PERF_COUNT_CACHE_MISSES] = 0x0081, + [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x00c4, + [PERF_COUNT_BRANCH_MISSES] = 0x00c5, +}; + +static int pmc_amd_event_map(int event) +{ + return amd_perfmon_event_map[event]; +} + +/* * Propagate counter elapsed time into the generic counter. * Can only be executed on the CPU where the counter is active. * Returns the delta events processed. @@ -151,8 +169,9 @@ static int __hw_perf_counter_init(struct perf_counter *counter) * so we install an artificial 1<<31 period regardless of * the generic counter period: */ - if ((s64)hwc->irq_period <= 0 || hwc->irq_period > 0x7FFFFFFF) - hwc->irq_period = 0x7FFFFFFF; + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + if ((s64)hwc->irq_period <= 0 || hwc->irq_period > 0x7FFFFFFF) + hwc->irq_period = 0x7FFFFFFF; atomic64_set(&hwc->period_left, hwc->irq_period); @@ -184,6 +203,22 @@ static u64 pmc_intel_save_disable_all(void) return ctrl; } +static u64 pmc_amd_save_disable_all(void) +{ + int idx; + u64 val, ctrl = 0; + + for (idx = 0; idx < nr_counters_generic; idx++) { + rdmsrl(MSR_K7_EVNTSEL0 + idx, val); + if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) + ctrl |= (1 << idx); + val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsrl(MSR_K7_EVNTSEL0 + idx, val); + } + + return ctrl; +} + u64 hw_perf_save_disable(void) { if (unlikely(!perf_counters_initialized)) @@ -198,6 +233,20 @@ static void pmc_intel_restore_all(u64 ctrl) wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); } +static void pmc_amd_restore_all(u64 ctrl) +{ + u64 val; + int idx; + + for (idx = 0; idx < nr_counters_generic; idx++) { + if (ctrl & (1 << idx)) { + rdmsrl(MSR_K7_EVNTSEL0 + idx, val); + val |= ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsrl(MSR_K7_EVNTSEL0 + idx, val); + } + } +} + void hw_perf_restore(u64 ctrl) { if (unlikely(!perf_counters_initialized)) @@ -314,6 +363,9 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc) { unsigned int event; + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + return -1; + if (unlikely(hwc->nmi)) return -1; @@ -401,6 +453,7 @@ void perf_counter_print_debug(void) cpu = smp_processor_id(); cpuc = &per_cpu(cpu_hw_counters, cpu); + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); @@ -411,6 +464,7 @@ void perf_counter_print_debug(void) printk(KERN_INFO "CPU#%d: status: %016llx\n", cpu, status); printk(KERN_INFO "CPU#%d: overflow: %016llx\n", cpu, overflow); printk(KERN_INFO "CPU#%d: fixed: %016llx\n", cpu, fixed); + } printk(KERN_INFO "CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used); for (idx = 0; idx < nr_counters_generic; idx++) { @@ -588,6 +642,9 @@ void perf_counter_unthrottle(void) if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) return; + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + return; + if (unlikely(!perf_counters_initialized)) return; @@ -692,6 +749,15 @@ static struct pmc_x86_ops pmc_intel_ops = { .max_events = ARRAY_SIZE(intel_perfmon_event_map), }; +static struct pmc_x86_ops pmc_amd_ops = { + .save_disable_all = pmc_amd_save_disable_all, + .restore_all = pmc_amd_restore_all, + .eventsel = MSR_K7_EVNTSEL0, + .perfctr = MSR_K7_PERFCTR0, + .event_map = pmc_amd_event_map, + .max_events = ARRAY_SIZE(amd_perfmon_event_map), +}; + static struct pmc_x86_ops *pmc_intel_init(void) { union cpuid10_eax eax; @@ -719,6 +785,16 @@ static struct pmc_x86_ops *pmc_intel_init(void) return &pmc_intel_ops; } +static struct pmc_x86_ops *pmc_amd_init(void) +{ + nr_counters_generic = 4; + nr_counters_fixed = 0; + + printk(KERN_INFO "AMD Performance Monitoring support detected.\n"); + + return &pmc_amd_ops; +} + void __init init_hw_perf_counters(void) { if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) @@ -728,6 +804,9 @@ void __init init_hw_perf_counters(void) case X86_VENDOR_INTEL: pmc_ops = pmc_intel_init(); break; + case X86_VENDOR_AMD: + pmc_ops = pmc_amd_init(); + break; } if (!pmc_ops) return; -- cgit v0.10.2 From 169e41eb7f5464c077a7e0e129f025759d04cc54 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 28 Feb 2009 18:37:49 +0530 Subject: x86: decent declarations in perf_counter.c Impact: cleanup making decent declrations for struct pmc_x86_ops and fix checkpatch error: ERROR: Macros with complex values should be enclosed in parenthesis Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 266618a..a1f3646 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -42,12 +42,12 @@ struct cpu_hw_counters { * struct pmc_x86_ops - performance counter x86 ops */ struct pmc_x86_ops { - u64 (*save_disable_all) (void); - void (*restore_all) (u64 ctrl); - unsigned eventsel; - unsigned perfctr; - int (*event_map) (int event); - int max_events; + u64 (*save_disable_all)(void); + void (*restore_all)(u64 ctrl); + unsigned eventsel; + unsigned perfctr; + int (*event_map)(int event); + int max_events; }; static struct pmc_x86_ops *pmc_ops; @@ -561,7 +561,7 @@ perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown) /* * Maximum interrupt frequency of 100KHz per CPU */ -#define PERFMON_MAX_INTERRUPTS 100000/HZ +#define PERFMON_MAX_INTERRUPTS (100000/HZ) /* * This handler is triggered by the local APIC, so the APIC IRQ handling -- cgit v0.10.2 From a1ef58f442542d8b3e3b963339fbc522c36e827c Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 28 Feb 2009 18:45:39 +0530 Subject: x86: use pr_info in perf_counter.c Impact: cleanup using pr_info in perf_counter.c fixes various 80 characters warnings and also indenting for conditional statement Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index a1f3646..3b65f19 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -454,18 +454,18 @@ void perf_counter_print_debug(void) cpuc = &per_cpu(cpu_hw_counters, cpu); if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { - rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); - rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); - rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); - rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); - - printk(KERN_INFO "\n"); - printk(KERN_INFO "CPU#%d: ctrl: %016llx\n", cpu, ctrl); - printk(KERN_INFO "CPU#%d: status: %016llx\n", cpu, status); - printk(KERN_INFO "CPU#%d: overflow: %016llx\n", cpu, overflow); - printk(KERN_INFO "CPU#%d: fixed: %016llx\n", cpu, fixed); + rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); + rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); + rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); + rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); + + pr_info("\n"); + pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); + pr_info("CPU#%d: status: %016llx\n", cpu, status); + pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); + pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); } - printk(KERN_INFO "CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used); + pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used); for (idx = 0; idx < nr_counters_generic; idx++) { rdmsrl(pmc_ops->eventsel + idx, pmc_ctrl); @@ -473,17 +473,17 @@ void perf_counter_print_debug(void) prev_left = per_cpu(prev_left[idx], cpu); - printk(KERN_INFO "CPU#%d: gen-PMC%d ctrl: %016llx\n", + pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n", cpu, idx, pmc_ctrl); - printk(KERN_INFO "CPU#%d: gen-PMC%d count: %016llx\n", + pr_info("CPU#%d: gen-PMC%d count: %016llx\n", cpu, idx, pmc_count); - printk(KERN_INFO "CPU#%d: gen-PMC%d left: %016llx\n", + pr_info("CPU#%d: gen-PMC%d left: %016llx\n", cpu, idx, prev_left); } for (idx = 0; idx < nr_counters_fixed; idx++) { rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); - printk(KERN_INFO "CPU#%d: fixed-PMC%d count: %016llx\n", + pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", cpu, idx, pmc_count); } local_irq_enable(); @@ -773,10 +773,10 @@ static struct pmc_x86_ops *pmc_intel_init(void) if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) return NULL; - printk(KERN_INFO "Intel Performance Monitoring support detected.\n"); - printk(KERN_INFO "... version: %d\n", eax.split.version_id); - printk(KERN_INFO "... bit width: %d\n", eax.split.bit_width); - printk(KERN_INFO "... mask length: %d\n", eax.split.mask_length); + pr_info("Intel Performance Monitoring support detected.\n"); + pr_info("... version: %d\n", eax.split.version_id); + pr_info("... bit width: %d\n", eax.split.bit_width); + pr_info("... mask length: %d\n", eax.split.mask_length); nr_counters_generic = eax.split.num_counters; nr_counters_fixed = edx.split.num_counters_fixed; @@ -790,7 +790,7 @@ static struct pmc_x86_ops *pmc_amd_init(void) nr_counters_generic = 4; nr_counters_fixed = 0; - printk(KERN_INFO "AMD Performance Monitoring support detected.\n"); + pr_info("AMD Performance Monitoring support detected.\n"); return &pmc_amd_ops; } @@ -811,7 +811,7 @@ void __init init_hw_perf_counters(void) if (!pmc_ops) return; - printk(KERN_INFO "... num counters: %d\n", nr_counters_generic); + pr_info("... num counters: %d\n", nr_counters_generic); if (nr_counters_generic > X86_PMC_MAX_GENERIC) { nr_counters_generic = X86_PMC_MAX_GENERIC; WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!", @@ -820,18 +820,18 @@ void __init init_hw_perf_counters(void) perf_counter_mask = (1 << nr_counters_generic) - 1; perf_max_counters = nr_counters_generic; - printk(KERN_INFO "... value mask: %016Lx\n", counter_value_mask); + pr_info("... value mask: %016Lx\n", counter_value_mask); if (nr_counters_fixed > X86_PMC_MAX_FIXED) { nr_counters_fixed = X86_PMC_MAX_FIXED; WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!", nr_counters_fixed, X86_PMC_MAX_FIXED); } - printk(KERN_INFO "... fixed counters: %d\n", nr_counters_fixed); + pr_info("... fixed counters: %d\n", nr_counters_fixed); perf_counter_mask |= ((1LL << nr_counters_fixed)-1) << X86_PMC_IDX_FIXED; - printk(KERN_INFO "... counter mask: %016Lx\n", perf_counter_mask); + pr_info("... counter mask: %016Lx\n", perf_counter_mask); perf_counters_initialized = true; perf_counters_lapic_init(0); -- cgit v0.10.2 From 2743a5b0fa6f309da904f2190a9cc25deee34dbd Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 4 Mar 2009 20:36:51 +1100 Subject: perfcounters: provide expansion room in the ABI Impact: ABI change This expands several fields in the perf_counter_hw_event struct and adds a "flags" argument to the perf_counter_open system call, in order that features can be added in future without ABI changes. In particular the record_type field is expanded to 64 bits, and the space for flag bits has been expanded from 32 to 64 bits. This also adds some new fields: * read_format (64 bits) is intended to provide a way to specify what userspace wants to get back when it does a read() on a simple (non-interrupting) counter; * exclude_idle (1 bit) provides a way for userspace to ask that events that occur when the cpu is idle be excluded; * extra_config_len will provide a way for userspace to supply an arbitrary amount of extra machine-specific PMU configuration data immediately following the perf_counter_hw_event struct, to allow sophisticated users to program things such as instruction matching CAMs and address range registers; * __reserved_3 and __reserved_4 provide space for future expansion. Signed-off-by: Paul Mackerras diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 186efaf..c42455a 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -69,9 +69,10 @@ struct perf_counter_hw_event { __s64 type; __u64 irq_period; - __u32 record_type; + __u64 record_type; + __u64 read_format; - __u32 disabled : 1, /* off by default */ + __u64 disabled : 1, /* off by default */ nmi : 1, /* NMI sampling */ raw : 1, /* raw event type */ inherit : 1, /* children inherit it */ @@ -80,10 +81,15 @@ struct perf_counter_hw_event { exclude_user : 1, /* don't count user */ exclude_kernel : 1, /* ditto kernel */ exclude_hv : 1, /* ditto hypervisor */ + exclude_idle : 1, /* don't count when idle */ - __reserved_1 : 23; + __reserved_1 : 55; + + __u32 extra_config_len; + __u32 __reserved_4; __u64 __reserved_2; + __u64 __reserved_3; }; /* diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 28ef2be..ab1d772 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -698,5 +698,5 @@ int kernel_execve(const char *filename, char *const argv[], char *const envp[]); asmlinkage long sys_perf_counter_open( const struct perf_counter_hw_event __user *hw_event_uptr, - pid_t pid, int cpu, int group_fd); + pid_t pid, int cpu, int group_fd, unsigned long flags); #endif diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 16b14ba..b2e8389 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1683,16 +1683,16 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, } /** - * sys_perf_task_open - open a performance counter, associate it to a task/cpu + * sys_perf_counter_open - open a performance counter, associate it to a task/cpu * * @hw_event_uptr: event type attributes for monitoring/sampling * @pid: target pid * @cpu: target cpu * @group_fd: group leader counter fd */ -SYSCALL_DEFINE4(perf_counter_open, +SYSCALL_DEFINE5(perf_counter_open, const struct perf_counter_hw_event __user *, hw_event_uptr, - pid_t, pid, int, cpu, int, group_fd) + pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) { struct perf_counter *counter, *group_leader; struct perf_counter_hw_event hw_event; @@ -1703,6 +1703,10 @@ SYSCALL_DEFINE4(perf_counter_open, int fput_needed2 = 0; int ret; + /* for future expandability... */ + if (flags) + return -EINVAL; + if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0) return -EFAULT; -- cgit v0.10.2 From 2485e5184452ccbda34ff83883883d9107800dff Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 5 Mar 2009 12:33:16 +0100 Subject: perfcounters: fix reserved bits sizing The sum of bits is 65 currently not 64 - so reduce the # of reserved bits from 55 to 54. Cc: Paul Mackerras Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index c42455a..dde5645 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -83,7 +83,7 @@ struct perf_counter_hw_event { exclude_hv : 1, /* ditto hypervisor */ exclude_idle : 1, /* don't count when idle */ - __reserved_1 : 55; + __reserved_1 : 54; __u32 extra_config_len; __u32 __reserved_4; -- cgit v0.10.2 From b0f3f28e0f14eb335f67bfaae33ce8b8d74fd58b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 5 Mar 2009 18:08:27 +0100 Subject: perfcounters: IRQ and NMI support on AMD CPUs The below completes the K7+ performance counter support: - IRQ support - NMI support KernelTop output works now as well. Signed-off-by: Peter Zijlstra Cc: Jaswinder Singh Rajput Cc: Paul Mackerras LKML-Reference: <1236273633.5187.286.camel@laptop> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 3b65f19..6ebe9ab 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -28,6 +28,7 @@ static bool perf_counters_initialized __read_mostly; static int nr_counters_generic __read_mostly; static u64 perf_counter_mask __read_mostly; static u64 counter_value_mask __read_mostly; +static int counter_value_bits __read_mostly; static int nr_counters_fixed __read_mostly; @@ -35,7 +36,9 @@ struct cpu_hw_counters { struct perf_counter *counters[X86_PMC_IDX_MAX]; unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; unsigned long interrupts; - u64 global_enable; + u64 throttle_ctrl; + u64 active_mask; + int enabled; }; /* @@ -43,21 +46,28 @@ struct cpu_hw_counters { */ struct pmc_x86_ops { u64 (*save_disable_all)(void); - void (*restore_all)(u64 ctrl); + void (*restore_all)(u64); + u64 (*get_status)(u64); + void (*ack_status)(u64); + void (*enable)(int, u64); + void (*disable)(int, u64); unsigned eventsel; unsigned perfctr; - int (*event_map)(int event); + u64 (*event_map)(int); + u64 (*raw_event)(u64); int max_events; }; static struct pmc_x86_ops *pmc_ops; -static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters); +static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = { + .enabled = 1, +}; /* * Intel PerfMon v3. Used on Core2 and later. */ -static const int intel_perfmon_event_map[] = +static const u64 intel_perfmon_event_map[] = { [PERF_COUNT_CPU_CYCLES] = 0x003c, [PERF_COUNT_INSTRUCTIONS] = 0x00c0, @@ -68,15 +78,29 @@ static const int intel_perfmon_event_map[] = [PERF_COUNT_BUS_CYCLES] = 0x013c, }; -static int pmc_intel_event_map(int event) +static u64 pmc_intel_event_map(int event) { return intel_perfmon_event_map[event]; } +static u64 pmc_intel_raw_event(u64 event) +{ +#define CORE_EVNTSEL_EVENT_MASK 0x000000FF +#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00 +#define CORE_EVNTSEL_COUNTER_MASK 0xFF000000 + +#define CORE_EVNTSEL_MASK \ + (CORE_EVNTSEL_EVENT_MASK | \ + CORE_EVNTSEL_UNIT_MASK | \ + CORE_EVNTSEL_COUNTER_MASK) + + return event & CORE_EVNTSEL_MASK; +} + /* * AMD Performance Monitor K7 and later. */ -static const int amd_perfmon_event_map[] = +static const u64 amd_perfmon_event_map[] = { [PERF_COUNT_CPU_CYCLES] = 0x0076, [PERF_COUNT_INSTRUCTIONS] = 0x00c0, @@ -86,11 +110,25 @@ static const int amd_perfmon_event_map[] = [PERF_COUNT_BRANCH_MISSES] = 0x00c5, }; -static int pmc_amd_event_map(int event) +static u64 pmc_amd_event_map(int event) { return amd_perfmon_event_map[event]; } +static u64 pmc_amd_raw_event(u64 event) +{ +#define K7_EVNTSEL_EVENT_MASK 0x7000000FF +#define K7_EVNTSEL_UNIT_MASK 0x00000FF00 +#define K7_EVNTSEL_COUNTER_MASK 0x0FF000000 + +#define K7_EVNTSEL_MASK \ + (K7_EVNTSEL_EVENT_MASK | \ + K7_EVNTSEL_UNIT_MASK | \ + K7_EVNTSEL_COUNTER_MASK) + + return event & K7_EVNTSEL_MASK; +} + /* * Propagate counter elapsed time into the generic counter. * Can only be executed on the CPU where the counter is active. @@ -179,7 +217,7 @@ static int __hw_perf_counter_init(struct perf_counter *counter) * Raw event type provide the config in the event structure */ if (hw_event->raw) { - hwc->config |= hw_event->type; + hwc->config |= pmc_ops->raw_event(hw_event->type); } else { if (hw_event->type >= pmc_ops->max_events) return -EINVAL; @@ -205,18 +243,24 @@ static u64 pmc_intel_save_disable_all(void) static u64 pmc_amd_save_disable_all(void) { - int idx; - u64 val, ctrl = 0; + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + int enabled, idx; + + enabled = cpuc->enabled; + cpuc->enabled = 0; + barrier(); for (idx = 0; idx < nr_counters_generic; idx++) { + u64 val; + rdmsrl(MSR_K7_EVNTSEL0 + idx, val); - if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) - ctrl |= (1 << idx); - val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; - wrmsrl(MSR_K7_EVNTSEL0 + idx, val); + if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) { + val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsrl(MSR_K7_EVNTSEL0 + idx, val); + } } - return ctrl; + return enabled; } u64 hw_perf_save_disable(void) @@ -226,6 +270,9 @@ u64 hw_perf_save_disable(void) return pmc_ops->save_disable_all(); } +/* + * Exported because of ACPI idle + */ EXPORT_SYMBOL_GPL(hw_perf_save_disable); static void pmc_intel_restore_all(u64 ctrl) @@ -235,11 +282,18 @@ static void pmc_intel_restore_all(u64 ctrl) static void pmc_amd_restore_all(u64 ctrl) { - u64 val; + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); int idx; + cpuc->enabled = ctrl; + barrier(); + if (!ctrl) + return; + for (idx = 0; idx < nr_counters_generic; idx++) { - if (ctrl & (1 << idx)) { + if (test_bit(idx, (unsigned long *)&cpuc->active_mask)) { + u64 val; + rdmsrl(MSR_K7_EVNTSEL0 + idx, val); val |= ARCH_PERFMON_EVENTSEL0_ENABLE; wrmsrl(MSR_K7_EVNTSEL0 + idx, val); @@ -254,8 +308,112 @@ void hw_perf_restore(u64 ctrl) pmc_ops->restore_all(ctrl); } +/* + * Exported because of ACPI idle + */ EXPORT_SYMBOL_GPL(hw_perf_restore); +static u64 pmc_intel_get_status(u64 mask) +{ + u64 status; + + rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); + + return status; +} + +static u64 pmc_amd_get_status(u64 mask) +{ + u64 status = 0; + int idx; + + for (idx = 0; idx < nr_counters_generic; idx++) { + s64 val; + + if (!(mask & (1 << idx))) + continue; + + rdmsrl(MSR_K7_PERFCTR0 + idx, val); + val <<= (64 - counter_value_bits); + if (val >= 0) + status |= (1 << idx); + } + + return status; +} + +static u64 hw_perf_get_status(u64 mask) +{ + if (unlikely(!perf_counters_initialized)) + return 0; + + return pmc_ops->get_status(mask); +} + +static void pmc_intel_ack_status(u64 ack) +{ + wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); +} + +static void pmc_amd_ack_status(u64 ack) +{ +} + +static void hw_perf_ack_status(u64 ack) +{ + if (unlikely(!perf_counters_initialized)) + return; + + pmc_ops->ack_status(ack); +} + +static void pmc_intel_enable(int idx, u64 config) +{ + wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, + config | ARCH_PERFMON_EVENTSEL0_ENABLE); +} + +static void pmc_amd_enable(int idx, u64 config) +{ + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + + set_bit(idx, (unsigned long *)&cpuc->active_mask); + if (cpuc->enabled) + config |= ARCH_PERFMON_EVENTSEL0_ENABLE; + + wrmsrl(MSR_K7_EVNTSEL0 + idx, config); +} + +static void hw_perf_enable(int idx, u64 config) +{ + if (unlikely(!perf_counters_initialized)) + return; + + pmc_ops->enable(idx, config); +} + +static void pmc_intel_disable(int idx, u64 config) +{ + wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, config); +} + +static void pmc_amd_disable(int idx, u64 config) +{ + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + + clear_bit(idx, (unsigned long *)&cpuc->active_mask); + wrmsrl(MSR_K7_EVNTSEL0 + idx, config); + +} + +static void hw_perf_disable(int idx, u64 config) +{ + if (unlikely(!perf_counters_initialized)) + return; + + pmc_ops->disable(idx, config); +} + static inline void __pmc_fixed_disable(struct perf_counter *counter, struct hw_perf_counter *hwc, unsigned int __idx) @@ -278,7 +436,7 @@ __pmc_generic_disable(struct perf_counter *counter, if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) __pmc_fixed_disable(counter, hwc, idx); else - wrmsr_safe(hwc->config_base + idx, hwc->config, 0); + hw_perf_disable(idx, hwc->config); } static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]); @@ -354,8 +512,7 @@ __pmc_generic_enable(struct perf_counter *counter, if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) __pmc_fixed_enable(counter, hwc, idx); else - wrmsr(hwc->config_base + idx, - hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE, 0); + hw_perf_enable(idx, hwc->config); } static int @@ -567,22 +724,20 @@ perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown) * This handler is triggered by the local APIC, so the APIC IRQ handling * rules apply: */ -static void __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi) +static int __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi) { int bit, cpu = smp_processor_id(); u64 ack, status; struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu); + int ret = 0; - rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable); - - /* Disable counters globally */ - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); - ack_APIC_irq(); + cpuc->throttle_ctrl = hw_perf_save_disable(); - rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); + status = hw_perf_get_status(cpuc->throttle_ctrl); if (!status) goto out; + ret = 1; again: inc_irq_stat(apic_perf_irqs); ack = status; @@ -618,12 +773,12 @@ again: } } - wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); + hw_perf_ack_status(ack); /* * Repeat if there is more work to be done: */ - rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); + status = hw_perf_get_status(cpuc->throttle_ctrl); if (status) goto again; out: @@ -631,32 +786,27 @@ out: * Restore - do not reenable when global enable is off or throttled: */ if (++cpuc->interrupts < PERFMON_MAX_INTERRUPTS) - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable); + hw_perf_restore(cpuc->throttle_ctrl); + + return ret; } void perf_counter_unthrottle(void) { struct cpu_hw_counters *cpuc; - u64 global_enable; if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) return; - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) - return; - if (unlikely(!perf_counters_initialized)) return; - cpuc = &per_cpu(cpu_hw_counters, smp_processor_id()); + cpuc = &__get_cpu_var(cpu_hw_counters); if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS) { if (printk_ratelimit()) printk(KERN_WARNING "PERFMON: max interrupts exceeded!\n"); - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable); + hw_perf_restore(cpuc->throttle_ctrl); } - rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, global_enable); - if (unlikely(cpuc->global_enable && !global_enable)) - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, cpuc->global_enable); cpuc->interrupts = 0; } @@ -664,8 +814,8 @@ void smp_perf_counter_interrupt(struct pt_regs *regs) { irq_enter(); apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR); + ack_APIC_irq(); __smp_perf_counter_interrupt(regs, 0); - irq_exit(); } @@ -722,16 +872,23 @@ perf_counter_nmi_handler(struct notifier_block *self, { struct die_args *args = __args; struct pt_regs *regs; + int ret; + + switch (cmd) { + case DIE_NMI: + case DIE_NMI_IPI: + break; - if (likely(cmd != DIE_NMI_IPI)) + default: return NOTIFY_DONE; + } regs = args->regs; apic_write(APIC_LVTPC, APIC_DM_NMI); - __smp_perf_counter_interrupt(regs, 1); + ret = __smp_perf_counter_interrupt(regs, 1); - return NOTIFY_STOP; + return ret ? NOTIFY_STOP : NOTIFY_OK; } static __read_mostly struct notifier_block perf_counter_nmi_notifier = { @@ -743,18 +900,28 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = { static struct pmc_x86_ops pmc_intel_ops = { .save_disable_all = pmc_intel_save_disable_all, .restore_all = pmc_intel_restore_all, + .get_status = pmc_intel_get_status, + .ack_status = pmc_intel_ack_status, + .enable = pmc_intel_enable, + .disable = pmc_intel_disable, .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, .perfctr = MSR_ARCH_PERFMON_PERFCTR0, .event_map = pmc_intel_event_map, + .raw_event = pmc_intel_raw_event, .max_events = ARRAY_SIZE(intel_perfmon_event_map), }; static struct pmc_x86_ops pmc_amd_ops = { .save_disable_all = pmc_amd_save_disable_all, .restore_all = pmc_amd_restore_all, + .get_status = pmc_amd_get_status, + .ack_status = pmc_amd_ack_status, + .enable = pmc_amd_enable, + .disable = pmc_amd_disable, .eventsel = MSR_K7_EVNTSEL0, .perfctr = MSR_K7_PERFCTR0, .event_map = pmc_amd_event_map, + .raw_event = pmc_amd_raw_event, .max_events = ARRAY_SIZE(amd_perfmon_event_map), }; @@ -787,8 +954,25 @@ static struct pmc_x86_ops *pmc_intel_init(void) static struct pmc_x86_ops *pmc_amd_init(void) { + u64 old; + int bits; + nr_counters_generic = 4; nr_counters_fixed = 0; + counter_value_mask = ~0ULL; + + rdmsrl(MSR_K7_PERFCTR0, old); + wrmsrl(MSR_K7_PERFCTR0, counter_value_mask); + /* + * read the truncated mask + */ + rdmsrl(MSR_K7_PERFCTR0, counter_value_mask); + wrmsrl(MSR_K7_PERFCTR0, old); + + bits = 32 + fls(counter_value_mask >> 32); + if (bits == 32) + bits = fls((u32)counter_value_mask); + counter_value_bits = bits; pr_info("AMD Performance Monitoring support detected.\n"); -- cgit v0.10.2 From b5e8acf66ff5db707c7e08df49fdf6b415878442 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 5 Mar 2009 20:34:21 +0100 Subject: perfcounters: IRQ and NMI support on AMD CPUs, fix The BKGD suggests that counter width on AMD CPUs is 48 for all existing models (it certainly is for mine). Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 6ebe9ab..f585371 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -959,20 +959,8 @@ static struct pmc_x86_ops *pmc_amd_init(void) nr_counters_generic = 4; nr_counters_fixed = 0; - counter_value_mask = ~0ULL; - - rdmsrl(MSR_K7_PERFCTR0, old); - wrmsrl(MSR_K7_PERFCTR0, counter_value_mask); - /* - * read the truncated mask - */ - rdmsrl(MSR_K7_PERFCTR0, counter_value_mask); - wrmsrl(MSR_K7_PERFCTR0, old); - - bits = 32 + fls(counter_value_mask >> 32); - if (bits == 32) - bits = fls((u32)counter_value_mask); - counter_value_bits = bits; + counter_value_mask = 0x0000FFFFFFFFFFFFULL; + counter_value_bits = 48; pr_info("AMD Performance Monitoring support detected.\n"); -- cgit v0.10.2 From 86028598de16538f02519141756ccf4accfc29a6 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 5 Mar 2009 14:05:57 +1100 Subject: perfcounters/powerpc: fix oops with multiple counters in a group Impact: fix oops-causing bug This fixes a bug in the powerpc hw_perf_counter_init where the code didn't initialize ctrs[n] before passing the ctrs array to check_excludes, leading to possible oopses and other incorrect behaviour. This fixes it by initializing ctrs[n] correctly. Signed-off-by: Paul Mackerras diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 112332d..4fec112 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -633,6 +633,7 @@ hw_perf_counter_init(struct perf_counter *counter) return NULL; } events[n] = ev; + ctrs[n] = counter; if (check_excludes(ctrs, n, 1)) return NULL; if (power_check_constraints(events, n + 1)) -- cgit v0.10.2 From aabbaa6036fd847c583f585c6bae82b5a033e6c7 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 6 Mar 2009 16:27:10 +1100 Subject: perfcounters/powerpc: add support for POWER5+ processors Impact: more hardware support This adds the back-end for the PMU on the POWER5+ processors (i.e. GS, including GS DD3 aka POWER5++). This doesn't use the fixed-function PMC5 and PMC6 since they don't respect the freeze conditions and don't generate interrupts, as on POWER6. Signed-off-by: Paul Mackerras diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index b4c6f46..49851e0 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -94,8 +94,8 @@ obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o -obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o ppc970-pmu.o power5-pmu.o \ - power6-pmu.o +obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o ppc970-pmu.o \ + power5-pmu.o power5+-pmu.o power6-pmu.o obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 4fec112..162f398 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -826,6 +826,7 @@ void hw_perf_counter_setup(int cpu) extern struct power_pmu ppc970_pmu; extern struct power_pmu power5_pmu; +extern struct power_pmu power5p_pmu; extern struct power_pmu power6_pmu; static int init_perf_counters(void) @@ -848,6 +849,9 @@ static int init_perf_counters(void) case PV_POWER5: ppmu = &power5_pmu; break; + case PV_POWER5p: + ppmu = &power5p_pmu; + break; case 0x3e: ppmu = &power6_pmu; break; diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c new file mode 100644 index 0000000..cec21ea --- /dev/null +++ b/arch/powerpc/kernel/power5+-pmu.c @@ -0,0 +1,452 @@ +/* + * Performance counter support for POWER5 (not POWER5++) processors. + * + * Copyright 2009 Paul Mackerras, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include + +/* + * Bits in event code for POWER5+ (POWER5 GS) and POWER5++ (POWER5 GS DD3) + */ +#define PM_PMC_SH 20 /* PMC number (1-based) for direct events */ +#define PM_PMC_MSK 0xf +#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH) +#define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */ +#define PM_UNIT_MSK 0xf +#define PM_BYTE_SH 12 /* Byte number of event bus to use */ +#define PM_BYTE_MSK 7 +#define PM_GRS_SH 8 /* Storage subsystem mux select */ +#define PM_GRS_MSK 7 +#define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */ +#define PM_PMCSEL_MSK 0x7f + +/* Values in PM_UNIT field */ +#define PM_FPU 0 +#define PM_ISU0 1 +#define PM_IFU 2 +#define PM_ISU1 3 +#define PM_IDU 4 +#define PM_ISU0_ALT 6 +#define PM_GRS 7 +#define PM_LSU0 8 +#define PM_LSU1 0xc +#define PM_LASTUNIT 0xc + +/* + * Bits in MMCR1 for POWER5+ + */ +#define MMCR1_TTM0SEL_SH 62 +#define MMCR1_TTM1SEL_SH 60 +#define MMCR1_TTM2SEL_SH 58 +#define MMCR1_TTM3SEL_SH 56 +#define MMCR1_TTMSEL_MSK 3 +#define MMCR1_TD_CP_DBG0SEL_SH 54 +#define MMCR1_TD_CP_DBG1SEL_SH 52 +#define MMCR1_TD_CP_DBG2SEL_SH 50 +#define MMCR1_TD_CP_DBG3SEL_SH 48 +#define MMCR1_GRS_L2SEL_SH 46 +#define MMCR1_GRS_L2SEL_MSK 3 +#define MMCR1_GRS_L3SEL_SH 44 +#define MMCR1_GRS_L3SEL_MSK 3 +#define MMCR1_GRS_MCSEL_SH 41 +#define MMCR1_GRS_MCSEL_MSK 7 +#define MMCR1_GRS_FABSEL_SH 39 +#define MMCR1_GRS_FABSEL_MSK 3 +#define MMCR1_PMC1_ADDER_SEL_SH 35 +#define MMCR1_PMC2_ADDER_SEL_SH 34 +#define MMCR1_PMC3_ADDER_SEL_SH 33 +#define MMCR1_PMC4_ADDER_SEL_SH 32 +#define MMCR1_PMC1SEL_SH 25 +#define MMCR1_PMC2SEL_SH 17 +#define MMCR1_PMC3SEL_SH 9 +#define MMCR1_PMC4SEL_SH 1 +#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) +#define MMCR1_PMCSEL_MSK 0x7f + +/* + * Bits in MMCRA + */ + +/* + * Layout of constraint bits: + * 6666555555555544444444443333333333222222222211111111110000000000 + * 3210987654321098765432109876543210987654321098765432109876543210 + * [ ><><>< ><> <><>[ > < >< >< >< ><><><><> + * NC G0G1G2 G3 T0T1 UC B0 B1 B2 B3 P4P3P2P1 + * + * NC - number of counters + * 51: NC error 0x0008_0000_0000_0000 + * 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000 + * + * G0..G3 - GRS mux constraints + * 46-47: GRS_L2SEL value + * 44-45: GRS_L3SEL value + * 41-44: GRS_MCSEL value + * 39-40: GRS_FABSEL value + * Note that these match up with their bit positions in MMCR1 + * + * T0 - TTM0 constraint + * 36-37: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0x30_0000_0000 + * + * T1 - TTM1 constraint + * 34-35: TTM1SEL value (0=IDU, 3=GRS) 0x0c_0000_0000 + * + * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS + * 33: UC3 error 0x02_0000_0000 + * 32: FPU|IFU|ISU1 events needed 0x01_0000_0000 + * 31: ISU0 events needed 0x01_8000_0000 + * 30: IDU|GRS events needed 0x00_4000_0000 + * + * B0 + * 20-23: Byte 0 event source 0x00f0_0000 + * Encoding as for the event code + * + * B1, B2, B3 + * 16-19, 12-15, 8-11: Byte 1, 2, 3 event sources + * + * P4 + * 7: P1 error 0x80 + * 6-7: Count of events needing PMC4 + * + * P1..P3 + * 0-6: Count of events needing PMC1..PMC3 + */ + +static const int grsel_shift[8] = { + MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, + MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, + MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH +}; + +/* Masks and values for using events from the various units */ +static u64 unit_cons[PM_LASTUNIT+1][2] = { + [PM_FPU] = { 0x3200000000ull, 0x0100000000ull }, + [PM_ISU0] = { 0x0200000000ull, 0x0080000000ull }, + [PM_ISU1] = { 0x3200000000ull, 0x3100000000ull }, + [PM_IFU] = { 0x3200000000ull, 0x2100000000ull }, + [PM_IDU] = { 0x0e00000000ull, 0x0040000000ull }, + [PM_GRS] = { 0x0e00000000ull, 0x0c40000000ull }, +}; + +static int power5p_get_constraint(unsigned int event, u64 *maskp, u64 *valp) +{ + int pmc, byte, unit, sh; + int bit, fmask; + u64 mask = 0, value = 0; + + pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; + if (pmc) { + if (pmc > 4) + return -1; + sh = (pmc - 1) * 2; + mask |= 2 << sh; + value |= 1 << sh; + } + if (event & PM_BUSEVENT_MSK) { + unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; + if (unit > PM_LASTUNIT) + return -1; + if (unit == PM_ISU0_ALT) + unit = PM_ISU0; + mask |= unit_cons[unit][0]; + value |= unit_cons[unit][1]; + byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; + if (byte >= 4) { + if (unit != PM_LSU1) + return -1; + /* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */ + ++unit; + byte &= 3; + } + if (unit == PM_GRS) { + bit = event & 7; + fmask = (bit == 6)? 7: 3; + sh = grsel_shift[bit]; + mask |= (u64)fmask << sh; + value |= (u64)((event >> PM_GRS_SH) & fmask) << sh; + } + /* Set byte lane select field */ + mask |= 0xfULL << (20 - 4 * byte); + value |= (u64)unit << (20 - 4 * byte); + } + mask |= 0x8000000000000ull; + value |= 0x1000000000000ull; + *maskp = mask; + *valp = value; + return 0; +} + +#define MAX_ALT 3 /* at most 3 alternatives for any event */ + +static const unsigned int event_alternatives[][MAX_ALT] = { + { 0x100c0, 0x40001f }, /* PM_GCT_FULL_CYC */ + { 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */ + { 0x230e2, 0x323087 }, /* PM_BR_PRED_CR */ + { 0x230e3, 0x223087, 0x3230a0 }, /* PM_BR_PRED_TA */ + { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */ + { 0x800c4, 0xc20e0 }, /* PM_DTLB_MISS */ + { 0xc50c6, 0xc60e0 }, /* PM_MRK_DTLB_MISS */ + { 0x100009, 0x200009 }, /* PM_INST_CMPL */ + { 0x200015, 0x300015 }, /* PM_LSU_LMQ_SRQ_EMPTY_CYC */ + { 0x300009, 0x400009 }, /* PM_INST_DISP */ +}; + +/* + * Scan the alternatives table for a match and return the + * index into the alternatives table if found, else -1. + */ +static int find_alternative(unsigned int event) +{ + int i, j; + + for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { + if (event < event_alternatives[i][0]) + break; + for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j) + if (event == event_alternatives[i][j]) + return i; + } + return -1; +} + +static const unsigned char bytedecode_alternatives[4][4] = { + /* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 }, + /* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e }, + /* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 }, + /* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e } +}; + +/* + * Some direct events for decodes of event bus byte 3 have alternative + * PMCSEL values on other counters. This returns the alternative + * event code for those that do, or -1 otherwise. This also handles + * alternative PCMSEL values for add events. + */ +static int find_alternative_bdecode(unsigned int event) +{ + int pmc, altpmc, pp, j; + + pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; + if (pmc == 0 || pmc > 4) + return -1; + altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */ + pp = event & PM_PMCSEL_MSK; + for (j = 0; j < 4; ++j) { + if (bytedecode_alternatives[pmc - 1][j] == pp) { + return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) | + (altpmc << PM_PMC_SH) | + bytedecode_alternatives[altpmc - 1][j]; + } + } + + /* new decode alternatives for power5+ */ + if (pmc == 1 && (pp == 0x0d || pp == 0x0e)) + return event + (2 << PM_PMC_SH) + (0x2e - 0x0d); + if (pmc == 3 && (pp == 0x2e || pp == 0x2f)) + return event - (2 << PM_PMC_SH) - (0x2e - 0x0d); + + /* alternative add event encodings */ + if (pp == 0x10 || pp == 0x28) + return ((event ^ (0x10 ^ 0x28)) & ~PM_PMC_MSKS) | + (altpmc << PM_PMC_SH); + + return -1; +} + +static int power5p_get_alternatives(unsigned int event, unsigned int alt[]) +{ + int i, j, ae, nalt = 1; + + alt[0] = event; + nalt = 1; + i = find_alternative(event); + if (i >= 0) { + for (j = 0; j < MAX_ALT; ++j) { + ae = event_alternatives[i][j]; + if (ae && ae != event) + alt[nalt++] = ae; + } + } else { + ae = find_alternative_bdecode(event); + if (ae > 0) + alt[nalt++] = ae; + } + return nalt; +} + +static int power5p_compute_mmcr(unsigned int event[], int n_ev, + unsigned int hwc[], u64 mmcr[]) +{ + u64 mmcr1 = 0; + unsigned int pmc, unit, byte, psel; + unsigned int ttm; + int i, isbus, bit, grsel; + unsigned int pmc_inuse = 0; + unsigned char busbyte[4]; + unsigned char unituse[16]; + int ttmuse; + + if (n_ev > 4) + return -1; + + /* First pass to count resource use */ + memset(busbyte, 0, sizeof(busbyte)); + memset(unituse, 0, sizeof(unituse)); + for (i = 0; i < n_ev; ++i) { + pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; + if (pmc) { + if (pmc > 4) + return -1; + if (pmc_inuse & (1 << (pmc - 1))) + return -1; + pmc_inuse |= 1 << (pmc - 1); + } + if (event[i] & PM_BUSEVENT_MSK) { + unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; + byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; + if (unit > PM_LASTUNIT) + return -1; + if (unit == PM_ISU0_ALT) + unit = PM_ISU0; + if (byte >= 4) { + if (unit != PM_LSU1) + return -1; + ++unit; + byte &= 3; + } + if (busbyte[byte] && busbyte[byte] != unit) + return -1; + busbyte[byte] = unit; + unituse[unit] = 1; + } + } + + /* + * Assign resources and set multiplexer selects. + * + * PM_ISU0 can go either on TTM0 or TTM1, but that's the only + * choice we have to deal with. + */ + if (unituse[PM_ISU0] & + (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) { + unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */ + unituse[PM_ISU0] = 0; + } + /* Set TTM[01]SEL fields. */ + ttmuse = 0; + for (i = PM_FPU; i <= PM_ISU1; ++i) { + if (!unituse[i]) + continue; + if (ttmuse++) + return -1; + mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH; + } + ttmuse = 0; + for (; i <= PM_GRS; ++i) { + if (!unituse[i]) + continue; + if (ttmuse++) + return -1; + mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH; + } + if (ttmuse > 1) + return -1; + + /* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */ + for (byte = 0; byte < 4; ++byte) { + unit = busbyte[byte]; + if (!unit) + continue; + if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) { + /* get ISU0 through TTM1 rather than TTM0 */ + unit = PM_ISU0_ALT; + } else if (unit == PM_LSU1 + 1) { + /* select lower word of LSU1 for this byte */ + mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte); + } + ttm = unit >> 2; + mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); + } + + /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ + for (i = 0; i < n_ev; ++i) { + pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; + unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; + byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; + psel = event[i] & PM_PMCSEL_MSK; + isbus = event[i] & PM_BUSEVENT_MSK; + if (!pmc) { + /* Bus event or any-PMC direct event */ + for (pmc = 0; pmc < 4; ++pmc) { + if (!(pmc_inuse & (1 << pmc))) + break; + } + if (pmc >= 4) + return -1; + pmc_inuse |= 1 << pmc; + } else { + /* Direct event */ + --pmc; + if (isbus && (byte & 2) && + (psel == 8 || psel == 0x10 || psel == 0x28)) + /* add events on higher-numbered bus */ + mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc); + } + if (isbus && unit == PM_GRS) { + bit = psel & 7; + grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK; + mmcr1 |= (u64)grsel << grsel_shift[bit]; + } + if ((psel & 0x58) == 0x40 && (byte & 1) != ((pmc >> 1) & 1)) + /* select alternate byte lane */ + psel |= 0x10; + if (pmc <= 3) + mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc); + hwc[i] = pmc; + } + + /* Return MMCRx values */ + mmcr[0] = 0; + if (pmc_inuse & 1) + mmcr[0] = MMCR0_PMC1CE; + if (pmc_inuse & 0x3e) + mmcr[0] |= MMCR0_PMCjCE; + mmcr[1] = mmcr1; + mmcr[2] = 0; + return 0; +} + +static void power5p_disable_pmc(unsigned int pmc, u64 mmcr[]) +{ + if (pmc <= 3) + mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); +} + +static int power5p_generic_events[] = { + [PERF_COUNT_CPU_CYCLES] = 0xf, + [PERF_COUNT_INSTRUCTIONS] = 0x100009, + [PERF_COUNT_CACHE_REFERENCES] = 0x1c10a8, /* LD_REF_L1 */ + [PERF_COUNT_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */ + [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */ + [PERF_COUNT_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */ +}; + +struct power_pmu power5p_pmu = { + .n_counter = 4, + .max_alternatives = MAX_ALT, + .add_fields = 0x7000000000055ull, + .test_adder = 0x3000040000000ull, + .compute_mmcr = power5p_compute_mmcr, + .get_constraint = power5p_get_constraint, + .get_alternatives = power5p_get_alternatives, + .disable_pmc = power5p_disable_pmc, + .n_generic = ARRAY_SIZE(power5p_generic_events), + .generic_events = power5p_generic_events, +}; -- cgit v0.10.2 From 880860e392d92c457e8116cdee39ec4d109174ee Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 6 Mar 2009 16:30:52 +1100 Subject: perfcounters/powerpc: add support for POWER4 processors Impact: more hardware support This adds the back-end for the PMU on the POWER4 and POWER4+ processors (GP and GQ). This is quite similar to the PPC970, with 8 PMCs, but has fewer events than the PPC970. Signed-off-by: Paul Mackerras diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 49851e0..8e5e2c7 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -94,7 +94,7 @@ obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o -obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o ppc970-pmu.o \ +obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o power4-pmu.o ppc970-pmu.o \ power5-pmu.o power5+-pmu.o power6-pmu.o obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 162f398..0e33d27 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -824,6 +824,7 @@ void hw_perf_counter_setup(int cpu) cpuhw->mmcr[0] = MMCR0_FC; } +extern struct power_pmu power4_pmu; extern struct power_pmu ppc970_pmu; extern struct power_pmu power5_pmu; extern struct power_pmu power5p_pmu; @@ -841,6 +842,10 @@ static int init_perf_counters(void) /* XXX should get this from cputable */ pvr = mfspr(SPRN_PVR); switch (PVR_VER(pvr)) { + case PV_POWER4: + case PV_POWER4p: + ppmu = &power4_pmu; + break; case PV_970: case PV_970FX: case PV_970MP: diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c new file mode 100644 index 0000000..1407b19 --- /dev/null +++ b/arch/powerpc/kernel/power4-pmu.c @@ -0,0 +1,557 @@ +/* + * Performance counter support for POWER4 (GP) and POWER4+ (GQ) processors. + * + * Copyright 2009 Paul Mackerras, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include + +/* + * Bits in event code for POWER4 + */ +#define PM_PMC_SH 12 /* PMC number (1-based) for direct events */ +#define PM_PMC_MSK 0xf +#define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */ +#define PM_UNIT_MSK 0xf +#define PM_LOWER_SH 6 +#define PM_LOWER_MSK 1 +#define PM_LOWER_MSKS 0x40 +#define PM_BYTE_SH 4 /* Byte number of event bus to use */ +#define PM_BYTE_MSK 3 +#define PM_PMCSEL_MSK 7 + +/* + * Unit code values + */ +#define PM_FPU 1 +#define PM_ISU1 2 +#define PM_IFU 3 +#define PM_IDU0 4 +#define PM_ISU1_ALT 6 +#define PM_ISU2 7 +#define PM_IFU_ALT 8 +#define PM_LSU0 9 +#define PM_LSU1 0xc +#define PM_GPS 0xf + +/* + * Bits in MMCR0 for POWER4 + */ +#define MMCR0_PMC1SEL_SH 8 +#define MMCR0_PMC2SEL_SH 1 +#define MMCR_PMCSEL_MSK 0x1f + +/* + * Bits in MMCR1 for POWER4 + */ +#define MMCR1_TTM0SEL_SH 62 +#define MMCR1_TTC0SEL_SH 61 +#define MMCR1_TTM1SEL_SH 59 +#define MMCR1_TTC1SEL_SH 58 +#define MMCR1_TTM2SEL_SH 56 +#define MMCR1_TTC2SEL_SH 55 +#define MMCR1_TTM3SEL_SH 53 +#define MMCR1_TTC3SEL_SH 52 +#define MMCR1_TTMSEL_MSK 3 +#define MMCR1_TD_CP_DBG0SEL_SH 50 +#define MMCR1_TD_CP_DBG1SEL_SH 48 +#define MMCR1_TD_CP_DBG2SEL_SH 46 +#define MMCR1_TD_CP_DBG3SEL_SH 44 +#define MMCR1_DEBUG0SEL_SH 43 +#define MMCR1_DEBUG1SEL_SH 42 +#define MMCR1_DEBUG2SEL_SH 41 +#define MMCR1_DEBUG3SEL_SH 40 +#define MMCR1_PMC1_ADDER_SEL_SH 39 +#define MMCR1_PMC2_ADDER_SEL_SH 38 +#define MMCR1_PMC6_ADDER_SEL_SH 37 +#define MMCR1_PMC5_ADDER_SEL_SH 36 +#define MMCR1_PMC8_ADDER_SEL_SH 35 +#define MMCR1_PMC7_ADDER_SEL_SH 34 +#define MMCR1_PMC3_ADDER_SEL_SH 33 +#define MMCR1_PMC4_ADDER_SEL_SH 32 +#define MMCR1_PMC3SEL_SH 27 +#define MMCR1_PMC4SEL_SH 22 +#define MMCR1_PMC5SEL_SH 17 +#define MMCR1_PMC6SEL_SH 12 +#define MMCR1_PMC7SEL_SH 7 +#define MMCR1_PMC8SEL_SH 2 /* note bit 0 is in MMCRA for GP */ + +static short mmcr1_adder_bits[8] = { + MMCR1_PMC1_ADDER_SEL_SH, + MMCR1_PMC2_ADDER_SEL_SH, + MMCR1_PMC3_ADDER_SEL_SH, + MMCR1_PMC4_ADDER_SEL_SH, + MMCR1_PMC5_ADDER_SEL_SH, + MMCR1_PMC6_ADDER_SEL_SH, + MMCR1_PMC7_ADDER_SEL_SH, + MMCR1_PMC8_ADDER_SEL_SH +}; + +/* + * Bits in MMCRA + */ +#define MMCRA_PMC8SEL0_SH 17 /* PMC8SEL bit 0 for GP */ + +/* + * Layout of constraint bits: + * 6666555555555544444444443333333333222222222211111111110000000000 + * 3210987654321098765432109876543210987654321098765432109876543210 + * |[ >[ >[ >|||[ >[ >< >< >< >< ><><><><><><><><> + * | UC1 UC2 UC3 ||| PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8 + * \SMPL ||\TTC3SEL + * |\TTC_IFU_SEL + * \TTM2SEL0 + * + * SMPL - SAMPLE_ENABLE constraint + * 56: SAMPLE_ENABLE value 0x0100_0000_0000_0000 + * + * UC1 - unit constraint 1: can't have all three of FPU/ISU1/IDU0|ISU2 + * 55: UC1 error 0x0080_0000_0000_0000 + * 54: FPU events needed 0x0040_0000_0000_0000 + * 53: ISU1 events needed 0x0020_0000_0000_0000 + * 52: IDU0|ISU2 events needed 0x0010_0000_0000_0000 + * + * UC2 - unit constraint 2: can't have all three of FPU/IFU/LSU0 + * 51: UC2 error 0x0008_0000_0000_0000 + * 50: FPU events needed 0x0004_0000_0000_0000 + * 49: IFU events needed 0x0002_0000_0000_0000 + * 48: LSU0 events needed 0x0001_0000_0000_0000 + * + * UC3 - unit constraint 3: can't have all four of LSU0/IFU/IDU0|ISU2/ISU1 + * 47: UC3 error 0x8000_0000_0000 + * 46: LSU0 events needed 0x4000_0000_0000 + * 45: IFU events needed 0x2000_0000_0000 + * 44: IDU0|ISU2 events needed 0x1000_0000_0000 + * 43: ISU1 events needed 0x0800_0000_0000 + * + * TTM2SEL0 + * 42: 0 = IDU0 events needed + * 1 = ISU2 events needed 0x0400_0000_0000 + * + * TTC_IFU_SEL + * 41: 0 = IFU.U events needed + * 1 = IFU.L events needed 0x0200_0000_0000 + * + * TTC3SEL + * 40: 0 = LSU1.U events needed + * 1 = LSU1.L events needed 0x0100_0000_0000 + * + * PS1 + * 39: PS1 error 0x0080_0000_0000 + * 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000 + * + * PS2 + * 35: PS2 error 0x0008_0000_0000 + * 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000 + * + * B0 + * 28-31: Byte 0 event source 0xf000_0000 + * 1 = FPU + * 2 = ISU1 + * 3 = IFU + * 4 = IDU0 + * 7 = ISU2 + * 9 = LSU0 + * c = LSU1 + * f = GPS + * + * B1, B2, B3 + * 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources + * + * P8 + * 15: P8 error 0x8000 + * 14-15: Count of events needing PMC8 + * + * P1..P7 + * 0-13: Count of events needing PMC1..PMC7 + * + * Note: this doesn't allow events using IFU.U to be combined with events + * using IFU.L, though that is feasible (using TTM0 and TTM2). However + * there are no listed events for IFU.L (they are debug events not + * verified for performance monitoring) so this shouldn't cause a + * problem. + */ + +static struct unitinfo { + u64 value, mask; + int unit; + int lowerbit; +} p4_unitinfo[16] = { + [PM_FPU] = { 0x44000000000000ull, 0x88000000000000ull, PM_FPU, 0 }, + [PM_ISU1] = { 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 }, + [PM_ISU1_ALT] = + { 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 }, + [PM_IFU] = { 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 }, + [PM_IFU_ALT] = + { 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 }, + [PM_IDU0] = { 0x10100000000000ull, 0x80840000000000ull, PM_IDU0, 1 }, + [PM_ISU2] = { 0x10140000000000ull, 0x80840000000000ull, PM_ISU2, 0 }, + [PM_LSU0] = { 0x01400000000000ull, 0x08800000000000ull, PM_LSU0, 0 }, + [PM_LSU1] = { 0x00000000000000ull, 0x00010000000000ull, PM_LSU1, 40 }, + [PM_GPS] = { 0x00000000000000ull, 0x00000000000000ull, PM_GPS, 0 } +}; + +static unsigned char direct_marked_event[8] = { + (1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */ + (1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */ + (1<<3), /* PMC3: PM_MRK_ST_CMPL_INT */ + (1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */ + (1<<4) | (1<<5), /* PMC5: PM_MRK_GRP_TIMEO */ + (1<<3) | (1<<4) | (1<<5), + /* PMC6: PM_MRK_ST_GPS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */ + (1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */ + (1<<4), /* PMC8: PM_MRK_LSU_FIN */ +}; + +/* + * Returns 1 if event counts things relating to marked instructions + * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. + */ +static int p4_marked_instr_event(unsigned int event) +{ + int pmc, psel, unit, byte, bit; + unsigned int mask; + + pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; + psel = event & PM_PMCSEL_MSK; + if (pmc) { + if (direct_marked_event[pmc - 1] & (1 << psel)) + return 1; + if (psel == 0) /* add events */ + bit = (pmc <= 4)? pmc - 1: 8 - pmc; + else if (psel == 6) /* decode events */ + bit = 4; + else + return 0; + } else + bit = psel; + + byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; + unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; + mask = 0; + switch (unit) { + case PM_LSU1: + if (event & PM_LOWER_MSKS) + mask = 1 << 28; /* byte 7 bit 4 */ + else + mask = 6 << 24; /* byte 3 bits 1 and 2 */ + break; + case PM_LSU0: + /* byte 3, bit 3; byte 2 bits 0,2,3,4,5; byte 1 */ + mask = 0x083dff00; + } + return (mask >> (byte * 8 + bit)) & 1; +} + +static int p4_get_constraint(unsigned int event, u64 *maskp, u64 *valp) +{ + int pmc, byte, unit, lower, sh; + u64 mask = 0, value = 0; + int grp = -1; + + pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; + if (pmc) { + if (pmc > 8) + return -1; + sh = (pmc - 1) * 2; + mask |= 2 << sh; + value |= 1 << sh; + grp = ((pmc - 1) >> 1) & 1; + } + unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; + byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; + if (unit) { + lower = (event >> PM_LOWER_SH) & PM_LOWER_MSK; + + /* + * Bus events on bytes 0 and 2 can be counted + * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8. + */ + if (!pmc) + grp = byte & 1; + + if (!p4_unitinfo[unit].unit) + return -1; + mask |= p4_unitinfo[unit].mask; + value |= p4_unitinfo[unit].value; + sh = p4_unitinfo[unit].lowerbit; + if (sh > 1) + value |= (u64)lower << sh; + else if (lower != sh) + return -1; + unit = p4_unitinfo[unit].unit; + + /* Set byte lane select field */ + mask |= 0xfULL << (28 - 4 * byte); + value |= (u64)unit << (28 - 4 * byte); + } + if (grp == 0) { + /* increment PMC1/2/5/6 field */ + mask |= 0x8000000000ull; + value |= 0x1000000000ull; + } else { + /* increment PMC3/4/7/8 field */ + mask |= 0x800000000ull; + value |= 0x100000000ull; + } + + /* Marked instruction events need sample_enable set */ + if (p4_marked_instr_event(event)) { + mask |= 1ull << 56; + value |= 1ull << 56; + } + + /* PMCSEL=6 decode events on byte 2 need sample_enable clear */ + if (pmc && (event & PM_PMCSEL_MSK) == 6 && byte == 2) + mask |= 1ull << 56; + + *maskp = mask; + *valp = value; + return 0; +} + +static unsigned int ppc_inst_cmpl[] = { + 0x1001, 0x4001, 0x6001, 0x7001, 0x8001 +}; + +static int p4_get_alternatives(unsigned int event, unsigned int alt[]) +{ + int i, j, na; + + alt[0] = event; + na = 1; + + /* 2 possibilities for PM_GRP_DISP_REJECT */ + if (event == 0x8003 || event == 0x0224) { + alt[1] = event ^ (0x8003 ^ 0x0224); + return 2; + } + + /* 2 possibilities for PM_ST_MISS_L1 */ + if (event == 0x0c13 || event == 0x0c23) { + alt[1] = event ^ (0x0c13 ^ 0x0c23); + return 2; + } + + /* several possibilities for PM_INST_CMPL */ + for (i = 0; i < ARRAY_SIZE(ppc_inst_cmpl); ++i) { + if (event == ppc_inst_cmpl[i]) { + for (j = 0; j < ARRAY_SIZE(ppc_inst_cmpl); ++j) + if (j != i) + alt[na++] = ppc_inst_cmpl[j]; + break; + } + } + + return na; +} + +static int p4_compute_mmcr(unsigned int event[], int n_ev, + unsigned int hwc[], u64 mmcr[]) +{ + u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0; + unsigned int pmc, unit, byte, psel, lower; + unsigned int ttm, grp; + unsigned int pmc_inuse = 0; + unsigned int pmc_grp_use[2]; + unsigned char busbyte[4]; + unsigned char unituse[16]; + unsigned int unitlower = 0; + int i; + + if (n_ev > 8) + return -1; + + /* First pass to count resource use */ + pmc_grp_use[0] = pmc_grp_use[1] = 0; + memset(busbyte, 0, sizeof(busbyte)); + memset(unituse, 0, sizeof(unituse)); + for (i = 0; i < n_ev; ++i) { + pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; + if (pmc) { + if (pmc_inuse & (1 << (pmc - 1))) + return -1; + pmc_inuse |= 1 << (pmc - 1); + /* count 1/2/5/6 vs 3/4/7/8 use */ + ++pmc_grp_use[((pmc - 1) >> 1) & 1]; + } + unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; + byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; + lower = (event[i] >> PM_LOWER_SH) & PM_LOWER_MSK; + if (unit) { + if (!pmc) + ++pmc_grp_use[byte & 1]; + if (unit == 6 || unit == 8) + /* map alt ISU1/IFU codes: 6->2, 8->3 */ + unit = (unit >> 1) - 1; + if (busbyte[byte] && busbyte[byte] != unit) + return -1; + busbyte[byte] = unit; + lower <<= unit; + if (unituse[unit] && lower != (unitlower & lower)) + return -1; + unituse[unit] = 1; + unitlower |= lower; + } + } + if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4) + return -1; + + /* + * Assign resources and set multiplexer selects. + * + * Units 1,2,3 are on TTM0, 4,6,7 on TTM1, 8,10 on TTM2. + * Each TTMx can only select one unit, but since + * units 2 and 6 are both ISU1, and 3 and 8 are both IFU, + * we have some choices. + */ + if (unituse[2] & (unituse[1] | (unituse[3] & unituse[9]))) { + unituse[6] = 1; /* Move 2 to 6 */ + unituse[2] = 0; + } + if (unituse[3] & (unituse[1] | unituse[2])) { + unituse[8] = 1; /* Move 3 to 8 */ + unituse[3] = 0; + unitlower = (unitlower & ~8) | ((unitlower & 8) << 5); + } + /* Check only one unit per TTMx */ + if (unituse[1] + unituse[2] + unituse[3] > 1 || + unituse[4] + unituse[6] + unituse[7] > 1 || + unituse[8] + unituse[9] > 1 || + (unituse[5] | unituse[10] | unituse[11] | + unituse[13] | unituse[14])) + return -1; + + /* Set TTMxSEL fields. Note, units 1-3 => TTM0SEL codes 0-2 */ + mmcr1 |= (u64)(unituse[3] * 2 + unituse[2]) << MMCR1_TTM0SEL_SH; + mmcr1 |= (u64)(unituse[7] * 3 + unituse[6] * 2) << MMCR1_TTM1SEL_SH; + mmcr1 |= (u64)unituse[9] << MMCR1_TTM2SEL_SH; + + /* Set TTCxSEL fields. */ + if (unitlower & 0xe) + mmcr1 |= 1ull << MMCR1_TTC0SEL_SH; + if (unitlower & 0xf0) + mmcr1 |= 1ull << MMCR1_TTC1SEL_SH; + if (unitlower & 0xf00) + mmcr1 |= 1ull << MMCR1_TTC2SEL_SH; + if (unitlower & 0x7000) + mmcr1 |= 1ull << MMCR1_TTC3SEL_SH; + + /* Set byte lane select fields. */ + for (byte = 0; byte < 4; ++byte) { + unit = busbyte[byte]; + if (!unit) + continue; + if (unit == 0xf) { + /* special case for GPS */ + mmcr1 |= 1ull << (MMCR1_DEBUG0SEL_SH - byte); + } else { + if (!unituse[unit]) + ttm = unit - 1; /* 2->1, 3->2 */ + else + ttm = unit >> 2; + mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2*byte); + } + } + + /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ + for (i = 0; i < n_ev; ++i) { + pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; + unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; + byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; + psel = event[i] & PM_PMCSEL_MSK; + if (!pmc) { + /* Bus event or 00xxx direct event (off or cycles) */ + if (unit) + psel |= 0x10 | ((byte & 2) << 2); + for (pmc = 0; pmc < 8; ++pmc) { + if (pmc_inuse & (1 << pmc)) + continue; + grp = (pmc >> 1) & 1; + if (unit) { + if (grp == (byte & 1)) + break; + } else if (pmc_grp_use[grp] < 4) { + ++pmc_grp_use[grp]; + break; + } + } + pmc_inuse |= 1 << pmc; + } else { + /* Direct event */ + --pmc; + if (psel == 0 && (byte & 2)) + /* add events on higher-numbered bus */ + mmcr1 |= 1ull << mmcr1_adder_bits[pmc]; + else if (psel == 6 && byte == 3) + /* seem to need to set sample_enable here */ + mmcra |= MMCRA_SAMPLE_ENABLE; + psel |= 8; + } + if (pmc <= 1) + mmcr0 |= psel << (MMCR0_PMC1SEL_SH - 7 * pmc); + else + mmcr1 |= psel << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)); + if (pmc == 7) /* PMC8 */ + mmcra |= (psel & 1) << MMCRA_PMC8SEL0_SH; + hwc[i] = pmc; + if (p4_marked_instr_event(event[i])) + mmcra |= MMCRA_SAMPLE_ENABLE; + } + + if (pmc_inuse & 1) + mmcr0 |= MMCR0_PMC1CE; + if (pmc_inuse & 0xfe) + mmcr0 |= MMCR0_PMCjCE; + + mmcra |= 0x2000; /* mark only one IOP per PPC instruction */ + + /* Return MMCRx values */ + mmcr[0] = mmcr0; + mmcr[1] = mmcr1; + mmcr[2] = mmcra; + return 0; +} + +static void p4_disable_pmc(unsigned int pmc, u64 mmcr[]) +{ + /* + * Setting the PMCxSEL field to 0 disables PMC x. + * (Note that pmc is 0-based here, not 1-based.) + */ + if (pmc <= 1) { + mmcr[0] &= ~(0x1fUL << (MMCR0_PMC1SEL_SH - 7 * pmc)); + } else { + mmcr[1] &= ~(0x1fUL << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2))); + if (pmc == 7) + mmcr[2] &= ~(1UL << MMCRA_PMC8SEL0_SH); + } +} + +static int p4_generic_events[] = { + [PERF_COUNT_CPU_CYCLES] = 7, + [PERF_COUNT_INSTRUCTIONS] = 0x1001, + [PERF_COUNT_CACHE_REFERENCES] = 0x8c10, /* PM_LD_REF_L1 */ + [PERF_COUNT_CACHE_MISSES] = 0x3c10, /* PM_LD_MISS_L1 */ + [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x330, /* PM_BR_ISSUED */ + [PERF_COUNT_BRANCH_MISSES] = 0x331, /* PM_BR_MPRED_CR */ +}; + +struct power_pmu power4_pmu = { + .n_counter = 8, + .max_alternatives = 5, + .add_fields = 0x0000001100005555ull, + .test_adder = 0x0011083300000000ull, + .compute_mmcr = p4_compute_mmcr, + .get_constraint = p4_get_constraint, + .get_alternatives = p4_get_alternatives, + .disable_pmc = p4_disable_pmc, + .n_generic = ARRAY_SIZE(p4_generic_events), + .generic_events = p4_generic_events, +}; -- cgit v0.10.2 From 184fe4ab1f2e4dfa45584889bb3820031648386b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sun, 8 Mar 2009 11:34:19 +0100 Subject: x86: perf_counter cleanup Use and actual unsigned long bitmap instead of casting our way around. Signed-off-by: Peter Zijlstra Cc: Jaswinder Singh Rajput LKML-Reference: <1236508459.22914.3645.camel@twins> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index f585371..1df4210 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -37,7 +37,7 @@ struct cpu_hw_counters { unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; unsigned long interrupts; u64 throttle_ctrl; - u64 active_mask; + unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; int enabled; }; @@ -291,7 +291,7 @@ static void pmc_amd_restore_all(u64 ctrl) return; for (idx = 0; idx < nr_counters_generic; idx++) { - if (test_bit(idx, (unsigned long *)&cpuc->active_mask)) { + if (test_bit(idx, cpuc->active_mask)) { u64 val; rdmsrl(MSR_K7_EVNTSEL0 + idx, val); @@ -377,7 +377,7 @@ static void pmc_amd_enable(int idx, u64 config) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - set_bit(idx, (unsigned long *)&cpuc->active_mask); + set_bit(idx, cpuc->active_mask); if (cpuc->enabled) config |= ARCH_PERFMON_EVENTSEL0_ENABLE; @@ -401,7 +401,7 @@ static void pmc_amd_disable(int idx, u64 config) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - clear_bit(idx, (unsigned long *)&cpuc->active_mask); + clear_bit(idx, cpuc->active_mask); wrmsrl(MSR_K7_EVNTSEL0 + idx, config); } -- cgit v0.10.2 From e255357764f92afcafafbd4879b222b8c752065a Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sun, 8 Mar 2009 17:09:49 +0530 Subject: x86: perf_counter cleanup Remove unused variables and duplicate header file. Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 1df4210..155bc3c 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -17,7 +17,6 @@ #include #include -#include #include static bool perf_counters_initialized __read_mostly; @@ -954,9 +953,6 @@ static struct pmc_x86_ops *pmc_intel_init(void) static struct pmc_x86_ops *pmc_amd_init(void) { - u64 old; - int bits; - nr_counters_generic = 4; nr_counters_fixed = 0; counter_value_mask = 0x0000FFFFFFFFFFFFULL; -- cgit v0.10.2 From bc44fb5f7d3e764ed7698c835a1a0f35aba2eb3d Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 13 Mar 2009 10:42:18 +0100 Subject: x86, bts: detect size of DS fields Impact: more robust DS feature enumeration Detect the size of the pointer-type fields in the DS area configuration via the DTES64 features rather than based on the cpuid. Rename a variable to denote that size to reflect that it only covers the pointer-type fields. Add more boot-time diagnostics giving the detected size and the sizes of BTS and PEBS records. Use the size of the BTS/PEBS record to indicate that the respective feature is not available (if the record size is zero). Signed-off-by: Markus Metzger LKML-Reference: <20090313104218.A30096@sedona.ch.intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 87b67e3..6e5ec67 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -39,7 +39,7 @@ struct ds_configuration { /* the size of one pointer-typed field in the DS structure and in the BTS and PEBS buffers in bytes; this covers the first 8 DS fields related to buffer management. */ - unsigned char sizeof_field; + unsigned char sizeof_ptr_field; /* the size of a BTS/PEBS record in bytes */ unsigned char sizeof_rec[2]; /* a series of bit-masks to control various features indexed @@ -142,14 +142,14 @@ enum ds_qualifier { static inline unsigned long ds_get(const unsigned char *base, enum ds_qualifier qual, enum ds_field field) { - base += (ds_cfg.sizeof_field * (field + (4 * qual))); + base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual))); return *(unsigned long *)base; } static inline void ds_set(unsigned char *base, enum ds_qualifier qual, enum ds_field field, unsigned long value) { - base += (ds_cfg.sizeof_field * (field + (4 * qual))); + base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual))); (*(unsigned long *)base) = value; } @@ -410,7 +410,7 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual, * Later architectures use 64bit pointers throughout, whereas earlier * architectures use 32bit pointers in 32bit mode. * - * We compute the base address for the first 8 fields based on: + * We compute the base address for the fields based on: * - the field size stored in the DS configuration * - the relative field position * @@ -441,13 +441,13 @@ enum bts_field { static inline unsigned long bts_get(const char *base, enum bts_field field) { - base += (ds_cfg.sizeof_field * field); + base += (ds_cfg.sizeof_ptr_field * field); return *(unsigned long *)base; } static inline void bts_set(char *base, enum bts_field field, unsigned long val) { - base += (ds_cfg.sizeof_field * field);; + base += (ds_cfg.sizeof_ptr_field * field);; (*(unsigned long *)base) = val; } @@ -593,6 +593,10 @@ static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, struct ds_context *context; int error; + error = -EOPNOTSUPP; + if (!ds_cfg.sizeof_rec[qual]) + goto out; + error = -EINVAL; if (!base) goto out; @@ -635,10 +639,6 @@ struct bts_tracer *ds_request_bts(struct task_struct *task, unsigned long irq; int error; - error = -EOPNOTSUPP; - if (!ds_cfg.ctl[dsf_bts]) - goto out; - /* buffer overflow notification is not yet implemented */ error = -EOPNOTSUPP; if (ovfl) @@ -848,7 +848,8 @@ const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer) ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); tracer->trace.reset_value = - *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)); + *(u64 *)(tracer->ds.context->ds + + (ds_cfg.sizeof_ptr_field * 8)); return &tracer->trace; } @@ -884,7 +885,8 @@ int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value) if (!tracer) return -EINVAL; - *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value; + *(u64 *)(tracer->ds.context->ds + + (ds_cfg.sizeof_ptr_field * 8)) = value; return 0; } @@ -894,52 +896,54 @@ static const struct ds_configuration ds_cfg_netburst = { .ctl[dsf_bts] = (1 << 2) | (1 << 3), .ctl[dsf_bts_kernel] = (1 << 5), .ctl[dsf_bts_user] = (1 << 6), - - .sizeof_field = sizeof(long), - .sizeof_rec[ds_bts] = sizeof(long) * 3, -#ifdef __i386__ - .sizeof_rec[ds_pebs] = sizeof(long) * 10, -#else - .sizeof_rec[ds_pebs] = sizeof(long) * 18, -#endif }; static const struct ds_configuration ds_cfg_pentium_m = { .name = "Pentium M", .ctl[dsf_bts] = (1 << 6) | (1 << 7), - - .sizeof_field = sizeof(long), - .sizeof_rec[ds_bts] = sizeof(long) * 3, -#ifdef __i386__ - .sizeof_rec[ds_pebs] = sizeof(long) * 10, -#else - .sizeof_rec[ds_pebs] = sizeof(long) * 18, -#endif }; static const struct ds_configuration ds_cfg_core2_atom = { .name = "Core 2/Atom", .ctl[dsf_bts] = (1 << 6) | (1 << 7), .ctl[dsf_bts_kernel] = (1 << 9), .ctl[dsf_bts_user] = (1 << 10), - - .sizeof_field = 8, - .sizeof_rec[ds_bts] = 8 * 3, - .sizeof_rec[ds_pebs] = 8 * 18, }; static void -ds_configure(const struct ds_configuration *cfg) +ds_configure(const struct ds_configuration *cfg, + struct cpuinfo_x86 *cpu) { + unsigned long nr_pebs_fields = 0; + + printk(KERN_INFO "[ds] using %s configuration\n", cfg->name); + +#ifdef __i386__ + nr_pebs_fields = 10; +#else + nr_pebs_fields = 18; +#endif + memset(&ds_cfg, 0, sizeof(ds_cfg)); ds_cfg = *cfg; - printk(KERN_INFO "[ds] using %s configuration\n", ds_cfg.name); + ds_cfg.sizeof_ptr_field = + (cpu_has(cpu, X86_FEATURE_DTES64) ? 8 : 4); - if (!cpu_has_bts) { - ds_cfg.ctl[dsf_bts] = 0; + ds_cfg.sizeof_rec[ds_bts] = ds_cfg.sizeof_ptr_field * 3; + ds_cfg.sizeof_rec[ds_pebs] = ds_cfg.sizeof_ptr_field * nr_pebs_fields; + + if (!cpu_has(cpu, X86_FEATURE_BTS)) { + ds_cfg.sizeof_rec[ds_bts] = 0; printk(KERN_INFO "[ds] bts not available\n"); } - if (!cpu_has_pebs) + if (!cpu_has(cpu, X86_FEATURE_PEBS)) { + ds_cfg.sizeof_rec[ds_pebs] = 0; printk(KERN_INFO "[ds] pebs not available\n"); + } + + printk(KERN_INFO "[ds] sizes: address: %u bit, ", + 8 * ds_cfg.sizeof_ptr_field); + printk("bts/pebs record: %u/%u bytes\n", + ds_cfg.sizeof_rec[ds_bts], ds_cfg.sizeof_rec[ds_pebs]); WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_field)); } @@ -951,12 +955,12 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) switch (c->x86_model) { case 0x9: case 0xd: /* Pentium M */ - ds_configure(&ds_cfg_pentium_m); + ds_configure(&ds_cfg_pentium_m, c); break; case 0xf: case 0x17: /* Core2 */ case 0x1c: /* Atom */ - ds_configure(&ds_cfg_core2_atom); + ds_configure(&ds_cfg_core2_atom, c); break; case 0x1a: /* i7 */ default: @@ -969,7 +973,7 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) case 0x0: case 0x1: case 0x2: /* Netburst */ - ds_configure(&ds_cfg_netburst); + ds_configure(&ds_cfg_netburst, c); break; default: /* sorry, don't know about them */ -- cgit v0.10.2 From 8a327f6d1b05f5ce16572b4413a5df1d0e872283 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 13 Mar 2009 10:45:07 +0100 Subject: x86, bts: add selftest for BTS Perform a selftest of branch trace store when a cpu is initialized. WARN and disable branch trace store support if the selftest fails. Signed-off-by: Markus Metzger LKML-Reference: <20090313104507.A30125@sedona.ch.intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index fdb45df..dfd74ab 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -175,6 +175,15 @@ config IOMMU_LEAK Add a simple leak tracer to the IOMMU code. This is useful when you are debugging a buggy device driver that leaks IOMMU mappings. +config X86_DS_SELFTEST + bool "DS selftest" + default y + depends on DEBUG_KERNEL + depends on X86_DS + ---help--- + Perform Debug Store selftests at boot time. + If in doubt, say "N". + config HAVE_MMIOTRACE_SUPPORT def_bool y diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 339ce35..a0c9e13 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -44,6 +44,7 @@ obj-y += process.o obj-y += i387.o xsave.o obj-y += ptrace.o obj-$(CONFIG_X86_DS) += ds.o +obj-$(CONFIG_X86_DS_SELFTEST) += ds_selftest.o obj-$(CONFIG_X86_32) += tls.o obj-$(CONFIG_IA32_EMULATION) += tls.o obj-y += step.o diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 6e5ec67..51c936c 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -29,6 +29,7 @@ #include #include +#include "ds_selftest.h" /* * The configuration for a particular DS hardware implementation. @@ -940,6 +941,26 @@ ds_configure(const struct ds_configuration *cfg, printk(KERN_INFO "[ds] pebs not available\n"); } + if (ds_cfg.sizeof_rec[ds_bts]) { + int error; + + error = ds_selftest_bts(); + if (error) { + WARN(1, "[ds] selftest failed. disabling bts.\n"); + ds_cfg.sizeof_rec[ds_bts] = 0; + } + } + + if (ds_cfg.sizeof_rec[ds_pebs]) { + int error; + + error = ds_selftest_pebs(); + if (error) { + WARN(1, "[ds] selftest failed. disabling pebs.\n"); + ds_cfg.sizeof_rec[ds_pebs] = 0; + } + } + printk(KERN_INFO "[ds] sizes: address: %u bit, ", 8 * ds_cfg.sizeof_ptr_field); printk("bts/pebs record: %u/%u bytes\n", diff --git a/arch/x86/kernel/ds_selftest.c b/arch/x86/kernel/ds_selftest.c new file mode 100644 index 0000000..8c46fbf --- /dev/null +++ b/arch/x86/kernel/ds_selftest.c @@ -0,0 +1,241 @@ +/* + * Debug Store support - selftest + * + * + * Copyright (C) 2009 Intel Corporation. + * Markus Metzger , 2009 + */ + +#include "ds_selftest.h" + +#include +#include + +#include + + +#define DS_SELFTEST_BUFFER_SIZE 1021 /* Intentionally chose an odd size. */ + + +static int ds_selftest_bts_consistency(const struct bts_trace *trace) +{ + int error = 0; + + if (!trace) { + printk(KERN_CONT "failed to access trace..."); + /* Bail out. Other tests are pointless. */ + return -1; + } + + if (!trace->read) { + printk(KERN_CONT "bts read not available..."); + error = -1; + } + + /* Do some sanity checks on the trace configuration. */ + if (!trace->ds.n) { + printk(KERN_CONT "empty bts buffer..."); + error = -1; + } + if (!trace->ds.size) { + printk(KERN_CONT "bad bts trace setup..."); + error = -1; + } + if (trace->ds.end != + (char *)trace->ds.begin + (trace->ds.n * trace->ds.size)) { + printk(KERN_CONT "bad bts buffer setup..."); + error = -1; + } + if ((trace->ds.top < trace->ds.begin) || + (trace->ds.end <= trace->ds.top)) { + printk(KERN_CONT "bts top out of bounds..."); + error = -1; + } + + return error; +} + +static int ds_selftest_bts_read(struct bts_tracer *tracer, + const struct bts_trace *trace, + const void *from, const void *to) +{ + const unsigned char *at; + + /* + * Check a few things which do not belong to this test. + * They should be covered by other tests. + */ + if (!trace) + return -1; + + if (!trace->read) + return -1; + + if (to < from) + return -1; + + if (from < trace->ds.begin) + return -1; + + if (trace->ds.end < to) + return -1; + + if (!trace->ds.size) + return -1; + + /* Now to the test itself. */ + for (at = from; (void *)at < to; at += trace->ds.size) { + struct bts_struct bts; + size_t index; + int error; + + if (((void *)at - trace->ds.begin) % trace->ds.size) { + printk(KERN_CONT + "read from non-integer index..."); + return -1; + } + index = ((void *)at - trace->ds.begin) / trace->ds.size; + + memset(&bts, 0, sizeof(bts)); + error = trace->read(tracer, at, &bts); + if (error < 0) { + printk(KERN_CONT + "error reading bts trace at [%lu] (0x%p)...", + index, at); + return error; + } + + switch (bts.qualifier) { + case BTS_BRANCH: + break; + default: + printk(KERN_CONT + "unexpected bts entry %llu at [%lu] (0x%p)...", + bts.qualifier, index, at); + return -1; + } + } + + return 0; +} + +int ds_selftest_bts(void) +{ + const struct bts_trace *trace; + struct bts_tracer *tracer; + int error = 0; + void *top; + unsigned char buffer[DS_SELFTEST_BUFFER_SIZE]; + + printk(KERN_INFO "[ds] bts selftest..."); + + tracer = ds_request_bts(NULL, buffer, DS_SELFTEST_BUFFER_SIZE, + NULL, (size_t)-1, BTS_KERNEL); + if (IS_ERR(tracer)) { + error = PTR_ERR(tracer); + tracer = NULL; + + printk(KERN_CONT + "initialization failed (err: %d)...", error); + goto out; + } + + /* The return should already give us enough trace. */ + ds_suspend_bts(tracer); + + /* Let's see if we can access the trace. */ + trace = ds_read_bts(tracer); + + error = ds_selftest_bts_consistency(trace); + if (error < 0) + goto out; + + /* If everything went well, we should have a few trace entries. */ + if (trace->ds.top == trace->ds.begin) { + /* + * It is possible but highly unlikely that we got a + * buffer overflow and end up at exactly the same + * position we started from. + * Let's issue a warning, but continue. + */ + printk(KERN_CONT "no trace/overflow..."); + } + + /* Let's try to read the trace we collected. */ + error = ds_selftest_bts_read(tracer, trace, + trace->ds.begin, trace->ds.top); + if (error < 0) + goto out; + + /* + * Let's read the trace again. + * Since we suspended tracing, we should get the same result. + */ + top = trace->ds.top; + + trace = ds_read_bts(tracer); + error = ds_selftest_bts_consistency(trace); + if (error < 0) + goto out; + + if (top != trace->ds.top) { + printk(KERN_CONT "suspend not working..."); + error = -1; + goto out; + } + + /* Let's collect some more trace - see if resume is working. */ + ds_resume_bts(tracer); + ds_suspend_bts(tracer); + + trace = ds_read_bts(tracer); + + error = ds_selftest_bts_consistency(trace); + if (error < 0) + goto out; + + if (trace->ds.top == top) { + /* + * It is possible but highly unlikely that we got a + * buffer overflow and end up at exactly the same + * position we started from. + * Let's issue a warning and check the full trace. + */ + printk(KERN_CONT + "no resume progress/overflow..."); + + error = ds_selftest_bts_read(tracer, trace, + trace->ds.begin, trace->ds.end); + } else if (trace->ds.top < top) { + /* + * We had a buffer overflow - the entire buffer should + * contain trace records. + */ + error = ds_selftest_bts_read(tracer, trace, + trace->ds.begin, trace->ds.end); + } else { + /* + * It is quite likely that the buffer did not overflow. + * Let's just check the delta trace. + */ + error = ds_selftest_bts_read(tracer, trace, + top, trace->ds.top); + } + if (error < 0) + goto out; + + error = 0; + + /* The final test: release the tracer while tracing is suspended. */ + out: + ds_release_bts(tracer); + + printk(KERN_CONT "%s.\n", (error ? "failed" : "passed")); + + return error; +} + +int ds_selftest_pebs(void) +{ + return 0; +} diff --git a/arch/x86/kernel/ds_selftest.h b/arch/x86/kernel/ds_selftest.h new file mode 100644 index 0000000..0e6e19d --- /dev/null +++ b/arch/x86/kernel/ds_selftest.h @@ -0,0 +1,15 @@ +/* + * Debug Store support - selftest + * + * + * Copyright (C) 2009 Intel Corporation. + * Markus Metzger , 2009 + */ + +#ifdef CONFIG_X86_DS_SELFTEST +extern int ds_selftest_bts(void); +extern int ds_selftest_pebs(void); +#else +static inline int ds_selftest_bts(void) { return 0; } +static inline int ds_selftest_pebs(void) { return 0; } +#endif /* CONFIG_X86_DS_SELFTEST */ -- cgit v0.10.2 From b8e47195451c5d3f62620b2b1b5928669afd56eb Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 13 Mar 2009 10:46:42 +0100 Subject: x86, bts: correct comment style in ds.c Correct the comment style in ds.c. Signed-off-by: Markus Metzger LKML-Reference: <20090313104642.A30149@sedona.ch.intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 51c936c..d9cab71 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -35,25 +35,22 @@ * The configuration for a particular DS hardware implementation. */ struct ds_configuration { - /* the name of the configuration */ + /* The name of the configuration. */ const char *name; - /* the size of one pointer-typed field in the DS structure and - in the BTS and PEBS buffers in bytes; - this covers the first 8 DS fields related to buffer management. */ + /* The size of pointer-typed fields in DS, BTS, and PEBS. */ unsigned char sizeof_ptr_field; - /* the size of a BTS/PEBS record in bytes */ + /* The size of a BTS/PEBS record in bytes. */ unsigned char sizeof_rec[2]; - /* a series of bit-masks to control various features indexed - * by enum ds_feature */ + /* Control bit-masks indexed by enum ds_feature. */ unsigned long ctl[dsf_ctl_max]; }; static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array); #define ds_cfg per_cpu(ds_cfg_array, smp_processor_id()) -#define MAX_SIZEOF_DS (12 * 8) /* maximal size of a DS configuration */ -#define MAX_SIZEOF_BTS (3 * 8) /* maximal size of a BTS record */ -#define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */ +#define MAX_SIZEOF_DS (12 * 8) /* Maximal size of a DS configuration. */ +#define MAX_SIZEOF_BTS (3 * 8) /* Maximal size of a BTS record. */ +#define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment. */ #define BTS_CONTROL \ (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\ @@ -67,28 +64,28 @@ static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array); * to identify tracers. */ struct ds_tracer { - /* the DS context (partially) owned by this tracer */ + /* The DS context (partially) owned by this tracer. */ struct ds_context *context; - /* the buffer provided on ds_request() and its size in bytes */ + /* The buffer provided on ds_request() and its size in bytes. */ void *buffer; size_t size; }; struct bts_tracer { - /* the common DS part */ + /* The common DS part. */ struct ds_tracer ds; - /* the trace including the DS configuration */ + /* The trace including the DS configuration. */ struct bts_trace trace; - /* buffer overflow notification function */ + /* Buffer overflow notification function. */ bts_ovfl_callback_t ovfl; }; struct pebs_tracer { - /* the common DS part */ + /* The common DS part. */ struct ds_tracer ds; - /* the trace including the DS configuration */ + /* The trace including the DS configuration. */ struct pebs_trace trace; - /* buffer overflow notification function */ + /* Buffer overflow notification function. */ pebs_ovfl_callback_t ovfl; }; @@ -214,18 +211,16 @@ static inline int check_tracer(struct task_struct *task) * deallocated when the last user puts the context. */ struct ds_context { - /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ + /* The DS configuration; goes into MSR_IA32_DS_AREA. */ unsigned char ds[MAX_SIZEOF_DS]; - /* the owner of the BTS and PEBS configuration, respectively */ + /* The owner of the BTS and PEBS configuration, respectively. */ struct bts_tracer *bts_master; struct pebs_tracer *pebs_master; - /* use count */ + /* Use count. */ unsigned long count; - /* a pointer to the context location inside the thread_struct - * or the per_cpu context array */ + /* Pointer to the context pointer field. */ struct ds_context **this; - /* a pointer to the task owning this context, or NULL, if the - * context is owned by a cpu */ + /* The traced task; NULL for current cpu. */ struct task_struct *task; }; @@ -350,14 +345,14 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual, unsigned long write_size, adj_write_size; /* - * write as much as possible without producing an + * Write as much as possible without producing an * overflow interrupt. * - * interrupt_threshold must either be + * Interrupt_threshold must either be * - bigger than absolute_maximum or * - point to a record between buffer_base and absolute_maximum * - * index points to a valid record. + * Index points to a valid record. */ base = ds_get(context->ds, qual, ds_buffer_base); index = ds_get(context->ds, qual, ds_index); @@ -366,8 +361,10 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual, write_end = min(end, int_th); - /* if we are already beyond the interrupt threshold, - * we fill the entire buffer */ + /* + * If we are already beyond the interrupt threshold, + * we fill the entire buffer. + */ if (write_end <= index) write_end = end; @@ -384,7 +381,7 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual, adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; adj_write_size *= ds_cfg.sizeof_rec[qual]; - /* zero out trailing bytes */ + /* Zero out trailing bytes. */ memset((char *)index + write_size, 0, adj_write_size - write_size); index += adj_write_size; @@ -556,7 +553,8 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, unsigned int flags) { unsigned long buffer, adj; - /* adjust the buffer address and size to meet alignment + /* + * Adjust the buffer address and size to meet alignment * constraints: * - buffer is double-word aligned * - size is multiple of record size @@ -578,7 +576,8 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, trace->begin = (void *)buffer; trace->top = trace->begin; trace->end = (void *)(buffer + size); - /* The value for 'no threshold' is -1, which will set the + /* + * The value for 'no threshold' is -1, which will set the * threshold outside of the buffer, just like we want it. */ trace->ith = (void *)(buffer + size - ith); @@ -602,7 +601,7 @@ static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, if (!base) goto out; - /* we require some space to do alignment adjustments below */ + /* We require some space to do alignment adjustments below. */ error = -EINVAL; if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual])) goto out; @@ -640,7 +639,7 @@ struct bts_tracer *ds_request_bts(struct task_struct *task, unsigned long irq; int error; - /* buffer overflow notification is not yet implemented */ + /* Buffer overflow notification is not yet implemented. */ error = -EOPNOTSUPP; if (ovfl) goto out; @@ -700,7 +699,7 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task, unsigned long irq; int error; - /* buffer overflow notification is not yet implemented */ + /* Buffer overflow notification is not yet implemented. */ error = -EOPNOTSUPP; if (ovfl) goto out; @@ -983,9 +982,9 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) case 0x1c: /* Atom */ ds_configure(&ds_cfg_core2_atom, c); break; - case 0x1a: /* i7 */ + case 0x1a: /* Core i7 */ default: - /* sorry, don't know about them */ + /* Sorry, don't know about them. */ break; } break; @@ -997,12 +996,12 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) ds_configure(&ds_cfg_netburst, c); break; default: - /* sorry, don't know about them */ + /* Sorry, don't know about them. */ break; } break; default: - /* sorry, don't know about them */ + /* Sorry, don't know about them. */ break; } } -- cgit v0.10.2 From ba9372a8f306c4e53a5f61dcbcd6c1e4a8c2e9ac Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 13 Mar 2009 10:48:52 +0100 Subject: x86, hw-branch-tracer: keep resources on stop Distinguish init/reset and start/stop: init/reset will allocate and release bts tracing resources stop/start will suspend and resume bts tracing Return an error on init() if no cpu can be traced. Signed-off-by: Markus Metzger LKML-Reference: <20090313104852.A30168@sedona.ch.intel.com> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c index 7bfdf4c..a99a04c 100644 --- a/kernel/trace/trace_hw_branches.c +++ b/kernel/trace/trace_hw_branches.c @@ -19,7 +19,7 @@ #include "trace_output.h" -#define SIZEOF_BTS (1 << 13) +#define BTS_BUFFER_SIZE (1 << 13) /* * The tracer lock protects the below per-cpu tracer array. @@ -33,53 +33,68 @@ */ static DEFINE_SPINLOCK(bts_tracer_lock); static DEFINE_PER_CPU(struct bts_tracer *, tracer); -static DEFINE_PER_CPU(unsigned char[SIZEOF_BTS], buffer); +static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], buffer); #define this_tracer per_cpu(tracer, smp_processor_id()) #define this_buffer per_cpu(buffer, smp_processor_id()) -static int __read_mostly trace_hw_branches_enabled; +static int trace_hw_branches_enabled __read_mostly; +static int trace_hw_branches_suspended __read_mostly; static struct trace_array *hw_branch_trace __read_mostly; /* - * Start tracing on the current cpu. + * Initialize the tracer for the current cpu. * The argument is ignored. * * pre: bts_tracer_lock must be locked. */ -static void bts_trace_start_cpu(void *arg) +static void bts_trace_init_cpu(void *arg) { if (this_tracer) ds_release_bts(this_tracer); - this_tracer = - ds_request_bts(/* task = */ NULL, this_buffer, SIZEOF_BTS, - /* ovfl = */ NULL, /* th = */ (size_t)-1, - BTS_KERNEL); + this_tracer = ds_request_bts(NULL, this_buffer, BTS_BUFFER_SIZE, + NULL, (size_t)-1, BTS_KERNEL); if (IS_ERR(this_tracer)) { this_tracer = NULL; return; } } -static void bts_trace_start(struct trace_array *tr) +static int bts_trace_init(struct trace_array *tr) { + int cpu, avail; + spin_lock(&bts_tracer_lock); - on_each_cpu(bts_trace_start_cpu, NULL, 1); - trace_hw_branches_enabled = 1; + hw_branch_trace = tr; + + on_each_cpu(bts_trace_init_cpu, NULL, 1); + + /* Check on how many cpus we could enable tracing */ + avail = 0; + for_each_online_cpu(cpu) + if (per_cpu(tracer, cpu)) + avail++; + + trace_hw_branches_enabled = (avail ? 1 : 0); + trace_hw_branches_suspended = 0; spin_unlock(&bts_tracer_lock); + + + /* If we could not enable tracing on a single cpu, we fail. */ + return avail ? 0 : -EOPNOTSUPP; } /* - * Stop tracing on the current cpu. + * Release the tracer for the current cpu. * The argument is ignored. * * pre: bts_tracer_lock must be locked. */ -static void bts_trace_stop_cpu(void *arg) +static void bts_trace_release_cpu(void *arg) { if (this_tracer) { ds_release_bts(this_tracer); @@ -87,12 +102,57 @@ static void bts_trace_stop_cpu(void *arg) } } -static void bts_trace_stop(struct trace_array *tr) +static void bts_trace_reset(struct trace_array *tr) { spin_lock(&bts_tracer_lock); + on_each_cpu(bts_trace_release_cpu, NULL, 1); trace_hw_branches_enabled = 0; - on_each_cpu(bts_trace_stop_cpu, NULL, 1); + trace_hw_branches_suspended = 0; + + spin_unlock(&bts_tracer_lock); +} + +/* + * Resume tracing on the current cpu. + * The argument is ignored. + * + * pre: bts_tracer_lock must be locked. + */ +static void bts_trace_resume_cpu(void *arg) +{ + if (this_tracer) + ds_resume_bts(this_tracer); +} + +static void bts_trace_start(struct trace_array *tr) +{ + spin_lock(&bts_tracer_lock); + + on_each_cpu(bts_trace_resume_cpu, NULL, 1); + trace_hw_branches_suspended = 0; + + spin_unlock(&bts_tracer_lock); +} + +/* + * Suspend tracing on the current cpu. + * The argument is ignored. + * + * pre: bts_tracer_lock must be locked. + */ +static void bts_trace_suspend_cpu(void *arg) +{ + if (this_tracer) + ds_suspend_bts(this_tracer); +} + +static void bts_trace_stop(struct trace_array *tr) +{ + spin_lock(&bts_tracer_lock); + + on_each_cpu(bts_trace_suspend_cpu, NULL, 1); + trace_hw_branches_suspended = 1; spin_unlock(&bts_tracer_lock); } @@ -110,10 +170,14 @@ static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb, switch (action) { case CPU_ONLINE: case CPU_DOWN_FAILED: - smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1); + smp_call_function_single(cpu, bts_trace_init_cpu, NULL, 1); + + if (trace_hw_branches_suspended) + smp_call_function_single(cpu, bts_trace_suspend_cpu, + NULL, 1); break; case CPU_DOWN_PREPARE: - smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1); + smp_call_function_single(cpu, bts_trace_release_cpu, NULL, 1); break; } @@ -126,20 +190,6 @@ static struct notifier_block bts_hotcpu_notifier __cpuinitdata = { .notifier_call = bts_hotcpu_handler }; -static int bts_trace_init(struct trace_array *tr) -{ - hw_branch_trace = tr; - - bts_trace_start(tr); - - return 0; -} - -static void bts_trace_reset(struct trace_array *tr) -{ - bts_trace_stop(tr); -} - static void bts_trace_print_header(struct seq_file *m) { seq_puts(m, "# CPU# TO <- FROM\n"); @@ -228,7 +278,7 @@ static void trace_bts_at(const struct bts_trace *trace, void *at) */ static void trace_bts_cpu(void *arg) { - struct trace_array *tr = (struct trace_array *) arg; + struct trace_array *tr = (struct trace_array *)arg; const struct bts_trace *trace; unsigned char *at; @@ -276,7 +326,8 @@ void trace_hw_branch_oops(void) { spin_lock(&bts_tracer_lock); - trace_bts_cpu(hw_branch_trace); + if (trace_hw_branches_enabled) + trace_bts_cpu(hw_branch_trace); spin_unlock(&bts_tracer_lock); } -- cgit v0.10.2 From 321bb5e1ac461c04b6a93f795010d6eb01d8c5ca Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 13 Mar 2009 10:50:27 +0100 Subject: x86, hw-branch-tracer: add selftest Add a selftest for the hw-branch-tracer. Signed-off-by: Markus Metzger LKML-Reference: <20090313105027.A30183@sedona.ch.intel.com> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 56ce34d..e7fbc82 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -576,6 +576,8 @@ extern int trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr); +extern int trace_selftest_startup_hw_branches(struct tracer *trace, + struct trace_array *tr); #endif /* CONFIG_FTRACE_STARTUP_TEST */ extern void *head_page(struct trace_array_cpu *data); diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c index a99a04c..4ca8270 100644 --- a/kernel/trace/trace_hw_branches.c +++ b/kernel/trace/trace_hw_branches.c @@ -342,7 +342,10 @@ struct tracer bts_tracer __read_mostly = .start = bts_trace_start, .stop = bts_trace_stop, .open = trace_bts_prepare, - .close = trace_bts_close + .close = trace_bts_close, +#ifdef CONFIG_FTRACE_SELFTEST + .selftest = trace_selftest_startup_hw_branches, +#endif /* CONFIG_FTRACE_SELFTEST */ }; __init static int init_bts_trace(void) diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index f907a2b..3c7b797 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -16,6 +16,7 @@ static inline int trace_valid_entry(struct trace_entry *entry) case TRACE_BRANCH: case TRACE_GRAPH_ENT: case TRACE_GRAPH_RET: + case TRACE_HW_BRANCHES: return 1; } return 0; @@ -691,3 +692,55 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr) return ret; } #endif /* CONFIG_BRANCH_TRACER */ + +#ifdef CONFIG_HW_BRANCH_TRACER +int +trace_selftest_startup_hw_branches(struct tracer *trace, + struct trace_array *tr) +{ + unsigned long count; + int ret; + struct trace_iterator iter; + struct tracer tracer; + + if (!trace->open) { + printk(KERN_CONT "missing open function..."); + return -1; + } + + ret = tracer_init(trace, tr); + if (ret) { + warn_failed_init_tracer(trace, ret); + return ret; + } + + /* + * The hw-branch tracer needs to collect the trace from the various + * cpu trace buffers - before tracing is stopped. + */ + memset(&iter, 0, sizeof(iter)); + memcpy(&tracer, trace, sizeof(tracer)); + + iter.trace = &tracer; + iter.tr = tr; + iter.pos = -1; + mutex_init(&iter.mutex); + + trace->open(&iter); + + mutex_destroy(&iter.mutex); + + tracing_stop(); + + ret = trace_test_buffer(tr, &count); + trace->reset(tr); + tracing_start(); + + if (!ret && !count) { + printk(KERN_CONT "no entries found.."); + ret = -1; + } + + return ret; +} +#endif /* CONFIG_HW_BRANCH_TRACER */ -- cgit v0.10.2 From e9a22d1fb94050b7d600019c32e6b672d539054b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 13 Mar 2009 11:54:40 +0100 Subject: x86, bts: cleanups Impact: cleanup, no code changed Cc: Markus Metzger LKML-Reference: <20090313104218.A30096@sedona.ch.intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index d9cab71..7363e01 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -19,43 +19,52 @@ * Markus Metzger , 2007-2009 */ - -#include - -#include +#include #include -#include +#include #include +#include #include -#include + +#include #include "ds_selftest.h" /* - * The configuration for a particular DS hardware implementation. + * The configuration for a particular DS hardware implementation: */ struct ds_configuration { - /* The name of the configuration. */ - const char *name; - /* The size of pointer-typed fields in DS, BTS, and PEBS. */ - unsigned char sizeof_ptr_field; - /* The size of a BTS/PEBS record in bytes. */ - unsigned char sizeof_rec[2]; - /* Control bit-masks indexed by enum ds_feature. */ - unsigned long ctl[dsf_ctl_max]; + /* The name of the configuration: */ + const char *name; + + /* The size of pointer-typed fields in DS, BTS, and PEBS: */ + unsigned char sizeof_ptr_field; + + /* The size of a BTS/PEBS record in bytes: */ + unsigned char sizeof_rec[2]; + + /* Control bit-masks indexed by enum ds_feature: */ + unsigned long ctl[dsf_ctl_max]; }; static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array); #define ds_cfg per_cpu(ds_cfg_array, smp_processor_id()) -#define MAX_SIZEOF_DS (12 * 8) /* Maximal size of a DS configuration. */ -#define MAX_SIZEOF_BTS (3 * 8) /* Maximal size of a BTS record. */ -#define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment. */ +/* Maximal size of a DS configuration: */ +#define MAX_SIZEOF_DS (12 * 8) -#define BTS_CONTROL \ - (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\ - ds_cfg.ctl[dsf_bts_overflow]) +/* Maximal size of a BTS record: */ +#define MAX_SIZEOF_BTS (3 * 8) +/* BTS and PEBS buffer alignment: */ +#define DS_ALIGNMENT (1 << 3) + +/* Mask of control bits in the DS MSR register: */ +#define BTS_CONTROL \ + ( ds_cfg.ctl[dsf_bts] | \ + ds_cfg.ctl[dsf_bts_kernel] | \ + ds_cfg.ctl[dsf_bts_user] | \ + ds_cfg.ctl[dsf_bts_overflow] ) /* * A BTS or PEBS tracer. @@ -65,28 +74,32 @@ static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array); */ struct ds_tracer { /* The DS context (partially) owned by this tracer. */ - struct ds_context *context; + struct ds_context *context; /* The buffer provided on ds_request() and its size in bytes. */ - void *buffer; - size_t size; + void *buffer; + size_t size; }; struct bts_tracer { - /* The common DS part. */ - struct ds_tracer ds; - /* The trace including the DS configuration. */ - struct bts_trace trace; - /* Buffer overflow notification function. */ - bts_ovfl_callback_t ovfl; + /* The common DS part: */ + struct ds_tracer ds; + + /* The trace including the DS configuration: */ + struct bts_trace trace; + + /* Buffer overflow notification function: */ + bts_ovfl_callback_t ovfl; }; struct pebs_tracer { - /* The common DS part. */ - struct ds_tracer ds; - /* The trace including the DS configuration. */ - struct pebs_trace trace; - /* Buffer overflow notification function. */ - pebs_ovfl_callback_t ovfl; + /* The common DS part: */ + struct ds_tracer ds; + + /* The trace including the DS configuration: */ + struct pebs_trace trace; + + /* Buffer overflow notification function: */ + pebs_ovfl_callback_t ovfl; }; /* @@ -95,6 +108,7 @@ struct pebs_tracer { * * The DS configuration consists of the following fields; different * architetures vary in the size of those fields. + * * - double-word aligned base linear address of the BTS buffer * - write pointer into the BTS buffer * - end linear address of the BTS buffer (one byte beyond the end of @@ -133,19 +147,20 @@ enum ds_field { }; enum ds_qualifier { - ds_bts = 0, + ds_bts = 0, ds_pebs }; -static inline unsigned long ds_get(const unsigned char *base, - enum ds_qualifier qual, enum ds_field field) +static inline unsigned long +ds_get(const unsigned char *base, enum ds_qualifier qual, enum ds_field field) { base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual))); return *(unsigned long *)base; } -static inline void ds_set(unsigned char *base, enum ds_qualifier qual, - enum ds_field field, unsigned long value) +static inline void +ds_set(unsigned char *base, enum ds_qualifier qual, enum ds_field field, + unsigned long value) { base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual))); (*(unsigned long *)base) = value; @@ -157,7 +172,6 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual, */ static DEFINE_SPINLOCK(ds_lock); - /* * We either support (system-wide) per-cpu or per-thread allocation. * We distinguish the two based on the task_struct pointer, where a @@ -211,17 +225,21 @@ static inline int check_tracer(struct task_struct *task) * deallocated when the last user puts the context. */ struct ds_context { - /* The DS configuration; goes into MSR_IA32_DS_AREA. */ - unsigned char ds[MAX_SIZEOF_DS]; - /* The owner of the BTS and PEBS configuration, respectively. */ - struct bts_tracer *bts_master; - struct pebs_tracer *pebs_master; - /* Use count. */ + /* The DS configuration; goes into MSR_IA32_DS_AREA: */ + unsigned char ds[MAX_SIZEOF_DS]; + + /* The owner of the BTS and PEBS configuration, respectively: */ + struct bts_tracer *bts_master; + struct pebs_tracer *pebs_master; + + /* Use count: */ unsigned long count; - /* Pointer to the context pointer field. */ - struct ds_context **this; - /* The traced task; NULL for current cpu. */ - struct task_struct *task; + + /* Pointer to the context pointer field: */ + struct ds_context **this; + + /* The traced task; NULL for current cpu: */ + struct task_struct *task; }; static DEFINE_PER_CPU(struct ds_context *, system_context_array); @@ -328,9 +346,9 @@ static void ds_overflow(struct ds_context *context, enum ds_qualifier qual) * The remainder of any partially written record is zeroed out. * * context: the DS context - * qual: the buffer type - * record: the data to write - * size: the size of the data + * qual: the buffer type + * record: the data to write + * size: the size of the data */ static int ds_write(struct ds_context *context, enum ds_qualifier qual, const void *record, size_t size) @@ -429,12 +447,12 @@ enum bts_field { bts_to, bts_flags, - bts_qual = bts_from, - bts_jiffies = bts_to, - bts_pid = bts_flags, + bts_qual = bts_from, + bts_jiffies = bts_to, + bts_pid = bts_flags, - bts_qual_mask = (bts_qual_max - 1), - bts_escape = ((unsigned long)-1 & ~bts_qual_mask) + bts_qual_mask = (bts_qual_max - 1), + bts_escape = ((unsigned long)-1 & ~bts_qual_mask) }; static inline unsigned long bts_get(const char *base, enum bts_field field) @@ -461,8 +479,8 @@ static inline void bts_set(char *base, enum bts_field field, unsigned long val) * * return: bytes read/written on success; -Eerrno, otherwise */ -static int bts_read(struct bts_tracer *tracer, const void *at, - struct bts_struct *out) +static int +bts_read(struct bts_tracer *tracer, const void *at, struct bts_struct *out) { if (!tracer) return -EINVAL; diff --git a/arch/x86/kernel/ds_selftest.h b/arch/x86/kernel/ds_selftest.h index 0e6e19d..2ba8745 100644 --- a/arch/x86/kernel/ds_selftest.h +++ b/arch/x86/kernel/ds_selftest.h @@ -12,4 +12,4 @@ extern int ds_selftest_pebs(void); #else static inline int ds_selftest_bts(void) { return 0; } static inline int ds_selftest_pebs(void) { return 0; } -#endif /* CONFIG_X86_DS_SELFTEST */ +#endif diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c index 4ca8270..8b2109a 100644 --- a/kernel/trace/trace_hw_branches.c +++ b/kernel/trace/trace_hw_branches.c @@ -1,5 +1,5 @@ /* - * h/w branch tracer for x86 based on bts + * h/w branch tracer for x86 based on BTS * * Copyright (C) 2008-2009 Intel Corporation. * Markus Metzger , 2008-2009 @@ -15,8 +15,8 @@ #include -#include "trace.h" #include "trace_output.h" +#include "trace.h" #define BTS_BUFFER_SIZE (1 << 13) @@ -197,10 +197,10 @@ static void bts_trace_print_header(struct seq_file *m) static enum print_line_t bts_trace_print_line(struct trace_iterator *iter) { + unsigned long symflags = TRACE_ITER_SYM_OFFSET; struct trace_entry *entry = iter->ent; struct trace_seq *seq = &iter->seq; struct hw_branch_entry *it; - unsigned long symflags = TRACE_ITER_SYM_OFFSET; trace_assign_type(it, entry); diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 3c7b797..b910912 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -189,6 +189,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, #else # define trace_selftest_startup_dynamic_tracing(trace, tr, func) ({ 0; }) #endif /* CONFIG_DYNAMIC_FTRACE */ + /* * Simple verification test of ftrace function tracer. * Enable ftrace, sleep 1/10 second, and then read the trace @@ -698,10 +699,10 @@ int trace_selftest_startup_hw_branches(struct tracer *trace, struct trace_array *tr) { - unsigned long count; - int ret; struct trace_iterator iter; struct tracer tracer; + unsigned long count; + int ret; if (!trace->open) { printk(KERN_CONT "missing open function..."); -- cgit v0.10.2 From 79258a354e0c69be94ae2871809a195bf4a647b1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 13 Mar 2009 12:02:08 +0100 Subject: x86, bts: detect size of DS fields, fix Impact: build fix One usage site was missed in the sizeof_field -> sizeof_ptr_field rename. Cc: Markus Metzger LKML-Reference: <20090313104218.A30096@sedona.ch.intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 7363e01..5fd5333 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -983,7 +983,7 @@ ds_configure(const struct ds_configuration *cfg, printk("bts/pebs record: %u/%u bytes\n", ds_cfg.sizeof_rec[ds_bts], ds_cfg.sizeof_rec[ds_pebs]); - WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_field)); + WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_ptr_field)); } void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) -- cgit v0.10.2 From c78a3956b982418186e40978a51636a2b43221bc Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Wed, 18 Mar 2009 19:27:00 +0100 Subject: x86, bts: use atomic memory allocation Ds_request_bts() needs to allocate memory. It uses GFP_KERNEL. Hw-branch-tracer calls ds_request_bts() within on_each_cpu(). Use atomic memory allocation to allow it to be used in that context. Signed-off-by: Markus Metzger LKML-Reference: <20090318192700.A6038@sedona.ch.intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 5fd5333..b1d6e1f 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -255,8 +255,13 @@ static inline struct ds_context *ds_get_context(struct task_struct *task) struct ds_context *new_context = NULL; unsigned long irq; - /* Chances are small that we already have a context. */ - new_context = kzalloc(sizeof(*new_context), GFP_KERNEL); + /* + * Chances are small that we already have a context. + * + * Contexts for per-cpu tracing are allocated using + * smp_call_function(). We must not sleep. + */ + new_context = kzalloc(sizeof(*new_context), GFP_ATOMIC); if (!new_context) return NULL; @@ -662,8 +667,12 @@ struct bts_tracer *ds_request_bts(struct task_struct *task, if (ovfl) goto out; + /* + * Per-cpu tracing is typically requested using smp_call_function(). + * We must not sleep. + */ error = -ENOMEM; - tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); + tracer = kzalloc(sizeof(*tracer), GFP_ATOMIC); if (!tracer) goto out; tracer->ovfl = ovfl; @@ -722,8 +731,12 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task, if (ovfl) goto out; + /* + * Per-cpu tracing is typically requested using smp_call_function(). + * We must not sleep. + */ error = -ENOMEM; - tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); + tracer = kzalloc(sizeof(*tracer), GFP_ATOMIC); if (!tracer) goto out; tracer->ovfl = ovfl; -- cgit v0.10.2 From 425480081e936d8725f0d44b8829d699bf088c6b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 24 Mar 2009 13:38:36 -0400 Subject: tracing: add handler to trace_stat Currently, if a trace_stat user wants a handle to some private data, the trace_stat infrastructure does not supply a way to do that. This patch passes the trace_stat structure to the start function of the trace_stat code. Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index ad8c22e..e6e3291 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -263,7 +263,7 @@ static int branch_stat_show(struct seq_file *m, void *v) return 0; } -static void *annotated_branch_stat_start(void) +static void *annotated_branch_stat_start(struct tracer_stat *trace) { return __start_annotated_branch_profile; } @@ -338,7 +338,7 @@ static int all_branch_stat_headers(struct seq_file *m) return 0; } -static void *all_branch_stat_start(void) +static void *all_branch_stat_start(struct tracer_stat *trace) { return __start_branch_profile; } diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index f71b85b..f8f48d8 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c @@ -85,7 +85,7 @@ static int stat_seq_init(struct tracer_stat_session *session) if (!ts->stat_cmp) ts->stat_cmp = dummy_cmp; - stat = ts->stat_start(); + stat = ts->stat_start(ts); if (!stat) goto exit; diff --git a/kernel/trace/trace_stat.h b/kernel/trace/trace_stat.h index 202274c..f3546a2 100644 --- a/kernel/trace/trace_stat.h +++ b/kernel/trace/trace_stat.h @@ -12,7 +12,7 @@ struct tracer_stat { /* The name of your stat file */ const char *name; /* Iteration over statistic entries */ - void *(*stat_start)(void); + void *(*stat_start)(struct tracer_stat *trace); void *(*stat_next)(void *prev, int idx); /* Compare two entries for stats sorting */ int (*stat_cmp)(void *p1, void *p2); diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c index 9ab035b..ee533c2 100644 --- a/kernel/trace/trace_workqueue.c +++ b/kernel/trace/trace_workqueue.c @@ -152,7 +152,7 @@ static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu) return ret; } -static void *workqueue_stat_start(void) +static void *workqueue_stat_start(struct tracer_stat *trace) { int cpu; void *ret = NULL; -- cgit v0.10.2 From bac429f037f1a51a74d62bad6d1518c3be065df3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 20 Mar 2009 12:50:56 -0400 Subject: tracing: add function profiler Impact: new profiling feature This patch adds a function profiler. In debugfs/tracing/ two new files are created. function_profile_enabled - to enable or disable profiling trace_stat/functions - the profiled functions. For example: echo 1 > /debugfs/tracing/function_profile_enabled ./hackbench 50 echo 0 > /debugfs/tracing/function_profile_enabled yields: cat /debugfs/tracing/trace_stat/functions Function Hit -------- --- _spin_lock 10106442 _spin_unlock 10097492 kfree 6013704 _spin_unlock_irqrestore 4423941 _spin_lock_irqsave 4406825 __phys_addr 4181686 __slab_free 4038222 dput 4030130 path_put 4023387 unroll_tree_refs 4019532 [...] The most hit functions are listed first. Functions that are not hit are not listed. This feature depends on and uses dynamic function tracing. When the function profiling is disabled, no overhead occurs. But it still takes up around 300KB to hold the data, thus it is not recomended to keep it enabled for systems low on memory. When a '1' is echoed into the function_profile_enabled file, the counters for is function is reset back to zero. Thus you can see what functions are hit most by different programs. Signed-off-by: Steven Rostedt diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 015a3d2..0456c3a 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -153,6 +153,10 @@ struct dyn_ftrace { unsigned long flags; struct dyn_ftrace *newlist; }; +#ifdef CONFIG_FUNCTION_PROFILER + unsigned long counter; + struct hlist_node node; +#endif struct dyn_arch_ftrace arch; }; diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 8a4d729..95e9ad5 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -105,6 +105,7 @@ config FUNCTION_GRAPH_TRACER This is done by setting the current return address on the current task structure into a stack of calls. + config IRQSOFF_TRACER bool "Interrupts-off Latency Tracer" default n @@ -376,6 +377,24 @@ config DYNAMIC_FTRACE were made. If so, it runs stop_machine (stops all CPUS) and modifies the code to jump over the call to ftrace. +config FUNCTION_PROFILER + bool "Kernel function profiler" + depends on DYNAMIC_FTRACE + default n + help + This option enables the kernel function profiler. When the dynamic + function tracing is enabled, a counter is added into the function + records used by the dynamic function tracer. A file is created in + debugfs called function_profile_enabled which defaults to zero. + When a 1 is echoed into this file profiling begins, and when a + zero is entered, profiling stops. A file in the trace_stats + directory called functions, that show the list of functions that + have been hit and their counters. + + This takes up around 320K more memory. + + If in doubt, say N + config FTRACE_MCOUNT_RECORD def_bool y depends on DYNAMIC_FTRACE diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 7b8722b..11f364c 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -34,6 +34,7 @@ #include #include "trace.h" +#include "trace_stat.h" #define FTRACE_WARN_ON(cond) \ do { \ @@ -261,7 +262,6 @@ struct ftrace_func_probe { struct rcu_head rcu; }; - enum { FTRACE_ENABLE_CALLS = (1 << 0), FTRACE_DISABLE_CALLS = (1 << 1), @@ -309,6 +309,307 @@ static struct dyn_ftrace *ftrace_free_records; } \ } +#ifdef CONFIG_FUNCTION_PROFILER +static struct hlist_head *ftrace_profile_hash; +static int ftrace_profile_bits; +static int ftrace_profile_enabled; +static DEFINE_MUTEX(ftrace_profile_lock); + +static void * +function_stat_next(void *v, int idx) +{ + struct dyn_ftrace *rec = v; + struct ftrace_page *pg; + + pg = (struct ftrace_page *)((unsigned long)rec & PAGE_MASK); + + again: + rec++; + if ((void *)rec >= (void *)&pg->records[pg->index]) { + pg = pg->next; + if (!pg) + return NULL; + rec = &pg->records[0]; + } + + if (rec->flags & FTRACE_FL_FREE || + rec->flags & FTRACE_FL_FAILED || + !(rec->flags & FTRACE_FL_CONVERTED) || + /* ignore non hit functions */ + !rec->counter) + goto again; + + return rec; +} + +static void *function_stat_start(struct tracer_stat *trace) +{ + return function_stat_next(&ftrace_pages_start->records[0], 0); +} + +static int function_stat_cmp(void *p1, void *p2) +{ + struct dyn_ftrace *a = p1; + struct dyn_ftrace *b = p2; + + if (a->counter < b->counter) + return -1; + if (a->counter > b->counter) + return 1; + else + return 0; +} + +static int function_stat_headers(struct seq_file *m) +{ + seq_printf(m, " Function Hit\n" + " -------- ---\n"); + return 0; +} + +static int function_stat_show(struct seq_file *m, void *v) +{ + struct dyn_ftrace *rec = v; + char str[KSYM_SYMBOL_LEN]; + + kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); + + seq_printf(m, " %-30.30s %10lu\n", str, rec->counter); + return 0; +} + +static struct tracer_stat function_stats = { + .name = "functions", + .stat_start = function_stat_start, + .stat_next = function_stat_next, + .stat_cmp = function_stat_cmp, + .stat_headers = function_stat_headers, + .stat_show = function_stat_show +}; + +static void ftrace_profile_init(int nr_funcs) +{ + unsigned long addr; + int order; + int size; + + /* + * We are profiling all functions, lets make it 1/4th of the + * number of functions that are in core kernel. So we have to + * iterate 4 times. + */ + order = (sizeof(struct hlist_head) * nr_funcs) / 4; + order = get_order(order); + size = 1 << (PAGE_SHIFT + order); + + pr_info("Allocating %d KB for profiler hash\n", size >> 10); + + addr = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); + if (!addr) { + pr_warning("Could not allocate function profiler hash\n"); + return; + } + + ftrace_profile_hash = (void *)addr; + + /* + * struct hlist_head should be a pointer of 4 or 8 bytes. + * And a simple bit manipulation can be done, but if for + * some reason struct hlist_head is not a mulitple of 2, + * then we play it safe, and simply count. This function + * is done once at boot up, so it is not that critical in + * performance. + */ + + size--; + size /= sizeof(struct hlist_head); + + for (; size; size >>= 1) + ftrace_profile_bits++; + + pr_info("Function profiler has %d hash buckets\n", + 1 << ftrace_profile_bits); + + return; +} + +static ssize_t +ftrace_profile_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[64]; + int r; + + r = sprintf(buf, "%u\n", ftrace_profile_enabled); + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + +static void ftrace_profile_reset(void) +{ + struct dyn_ftrace *rec; + struct ftrace_page *pg; + + do_for_each_ftrace_rec(pg, rec) { + rec->counter = 0; + } while_for_each_ftrace_rec(); +} + +static struct dyn_ftrace *ftrace_find_profiled_func(unsigned long ip) +{ + struct dyn_ftrace *rec; + struct hlist_head *hhd; + struct hlist_node *n; + unsigned long flags; + unsigned long key; + + if (!ftrace_profile_hash) + return NULL; + + key = hash_long(ip, ftrace_profile_bits); + hhd = &ftrace_profile_hash[key]; + + if (hlist_empty(hhd)) + return NULL; + + local_irq_save(flags); + hlist_for_each_entry_rcu(rec, n, hhd, node) { + if (rec->ip == ip) + goto out; + } + rec = NULL; + out: + local_irq_restore(flags); + + return rec; +} + +static void +function_profile_call(unsigned long ip, unsigned long parent_ip) +{ + struct dyn_ftrace *rec; + unsigned long flags; + + if (!ftrace_profile_enabled) + return; + + local_irq_save(flags); + rec = ftrace_find_profiled_func(ip); + if (!rec) + goto out; + + rec->counter++; + out: + local_irq_restore(flags); +} + +static struct ftrace_ops ftrace_profile_ops __read_mostly = +{ + .func = function_profile_call, +}; + +static ssize_t +ftrace_profile_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + unsigned long val; + char buf[64]; + int ret; + + if (!ftrace_profile_hash) { + pr_info("Can not enable hash due to earlier problems\n"); + return -ENODEV; + } + + if (cnt >= sizeof(buf)) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + ret = strict_strtoul(buf, 10, &val); + if (ret < 0) + return ret; + + val = !!val; + + mutex_lock(&ftrace_profile_lock); + if (ftrace_profile_enabled ^ val) { + if (val) { + ftrace_profile_reset(); + register_ftrace_function(&ftrace_profile_ops); + ftrace_profile_enabled = 1; + } else { + ftrace_profile_enabled = 0; + unregister_ftrace_function(&ftrace_profile_ops); + } + } + mutex_unlock(&ftrace_profile_lock); + + filp->f_pos += cnt; + + return cnt; +} + +static const struct file_operations ftrace_profile_fops = { + .open = tracing_open_generic, + .read = ftrace_profile_read, + .write = ftrace_profile_write, +}; + +static void ftrace_profile_debugfs(struct dentry *d_tracer) +{ + struct dentry *entry; + int ret; + + ret = register_stat_tracer(&function_stats); + if (ret) { + pr_warning("Warning: could not register " + "function stats\n"); + return; + } + + entry = debugfs_create_file("function_profile_enabled", 0644, + d_tracer, NULL, &ftrace_profile_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'function_profile_enabled' entry\n"); +} + +static void ftrace_add_profile(struct dyn_ftrace *rec) +{ + unsigned long key; + + if (!ftrace_profile_hash) + return; + + key = hash_long(rec->ip, ftrace_profile_bits); + hlist_add_head_rcu(&rec->node, &ftrace_profile_hash[key]); +} + +static void ftrace_profile_release(struct dyn_ftrace *rec) +{ + mutex_lock(&ftrace_profile_lock); + hlist_del(&rec->node); + mutex_unlock(&ftrace_profile_lock); +} + +#else /* CONFIG_FUNCTION_PROFILER */ +static void ftrace_profile_init(int nr_funcs) +{ +} +static void ftrace_add_profile(struct dyn_ftrace *rec) +{ +} +static void ftrace_profile_debugfs(struct dentry *d_tracer) +{ +} +static void ftrace_profile_release(struct dyn_ftrace *rec) +{ +} +#endif /* CONFIG_FUNCTION_PROFILER */ + #ifdef CONFIG_KPROBES static int frozen_record_count; @@ -359,8 +660,10 @@ void ftrace_release(void *start, unsigned long size) mutex_lock(&ftrace_lock); do_for_each_ftrace_rec(pg, rec) { if ((rec->ip >= s) && (rec->ip < e) && - !(rec->flags & FTRACE_FL_FREE)) + !(rec->flags & FTRACE_FL_FREE)) { ftrace_free_rec(rec); + ftrace_profile_release(rec); + } } while_for_each_ftrace_rec(); mutex_unlock(&ftrace_lock); } @@ -414,6 +717,8 @@ ftrace_record_ip(unsigned long ip) rec->newlist = ftrace_new_addrs; ftrace_new_addrs = rec; + ftrace_add_profile(rec); + return rec; } @@ -2157,6 +2462,8 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer) "'set_graph_function' entry\n"); #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + ftrace_profile_debugfs(d_tracer); + return 0; } @@ -2225,6 +2532,8 @@ void __init ftrace_init(void) if (ret) goto failed; + ftrace_profile_init(count); + last_ftrace_enabled = ftrace_enabled = 1; ret = ftrace_convert_nops(NULL, -- cgit v0.10.2 From 493762fc534c71d11d489f872c4b4a2c61173668 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 23 Mar 2009 17:12:36 -0400 Subject: tracing: move function profiler data out of function struct Impact: reduce size of memory in function profiler The function profiler originally introduces its counters into the function records itself. There is 20 thousand different functions on a normal system, and that is adding 20 thousand counters for profiling event when not needed. A normal run of the profiler yields only a couple of thousand functions executed, depending on what is being profiled. This means we have around 18 thousand useless counters. This patch rectifies this by moving the data out of the function records used by dynamic ftrace. Data is preallocated to hold the functions when the profiling begins. Checks are made during profiling to see if more recorcds should be allocated, and they are allocated if it is safe to do so. This also removes the dependency from using dynamic ftrace, and also removes the overhead by having it enabled. Signed-off-by: Steven Rostedt diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 0456c3a..015a3d2 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -153,10 +153,6 @@ struct dyn_ftrace { unsigned long flags; struct dyn_ftrace *newlist; }; -#ifdef CONFIG_FUNCTION_PROFILER - unsigned long counter; - struct hlist_node node; -#endif struct dyn_arch_ftrace arch; }; diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 95e9ad5..8a41360 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -379,20 +379,16 @@ config DYNAMIC_FTRACE config FUNCTION_PROFILER bool "Kernel function profiler" - depends on DYNAMIC_FTRACE + depends on FUNCTION_TRACER default n help - This option enables the kernel function profiler. When the dynamic - function tracing is enabled, a counter is added into the function - records used by the dynamic function tracer. A file is created in - debugfs called function_profile_enabled which defaults to zero. + This option enables the kernel function profiler. A file is created + in debugfs called function_profile_enabled which defaults to zero. When a 1 is echoed into this file profiling begins, and when a zero is entered, profiling stops. A file in the trace_stats directory called functions, that show the list of functions that have been hit and their counters. - This takes up around 320K more memory. - If in doubt, say N config FTRACE_MCOUNT_RECORD diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 11f364c..24dac44 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -241,87 +241,48 @@ static void ftrace_update_pid_func(void) #endif } -/* set when tracing only a pid */ -struct pid *ftrace_pid_trace; -static struct pid * const ftrace_swapper_pid = &init_struct_pid; - -#ifdef CONFIG_DYNAMIC_FTRACE - -#ifndef CONFIG_FTRACE_MCOUNT_RECORD -# error Dynamic ftrace depends on MCOUNT_RECORD -#endif - -static struct hlist_head ftrace_func_hash[FTRACE_FUNC_HASHSIZE] __read_mostly; - -struct ftrace_func_probe { - struct hlist_node node; - struct ftrace_probe_ops *ops; - unsigned long flags; - unsigned long ip; - void *data; - struct rcu_head rcu; +#ifdef CONFIG_FUNCTION_PROFILER +struct ftrace_profile { + struct hlist_node node; + unsigned long ip; + unsigned long counter; }; -enum { - FTRACE_ENABLE_CALLS = (1 << 0), - FTRACE_DISABLE_CALLS = (1 << 1), - FTRACE_UPDATE_TRACE_FUNC = (1 << 2), - FTRACE_ENABLE_MCOUNT = (1 << 3), - FTRACE_DISABLE_MCOUNT = (1 << 4), - FTRACE_START_FUNC_RET = (1 << 5), - FTRACE_STOP_FUNC_RET = (1 << 6), +struct ftrace_profile_page { + struct ftrace_profile_page *next; + unsigned long index; + struct ftrace_profile records[]; }; -static int ftrace_filtered; +#define PROFILE_RECORDS_SIZE \ + (PAGE_SIZE - offsetof(struct ftrace_profile_page, records)) -static struct dyn_ftrace *ftrace_new_addrs; +#define PROFILES_PER_PAGE \ + (PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile)) -static DEFINE_MUTEX(ftrace_regex_lock); - -struct ftrace_page { - struct ftrace_page *next; - int index; - struct dyn_ftrace records[]; -}; +/* TODO: make these percpu, to prevent cache line bouncing */ +static struct ftrace_profile_page *profile_pages_start; +static struct ftrace_profile_page *profile_pages; -#define ENTRIES_PER_PAGE \ - ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace)) - -/* estimate from running different kernels */ -#define NR_TO_INIT 10000 - -static struct ftrace_page *ftrace_pages_start; -static struct ftrace_page *ftrace_pages; - -static struct dyn_ftrace *ftrace_free_records; - -/* - * This is a double for. Do not use 'break' to break out of the loop, - * you must use a goto. - */ -#define do_for_each_ftrace_rec(pg, rec) \ - for (pg = ftrace_pages_start; pg; pg = pg->next) { \ - int _____i; \ - for (_____i = 0; _____i < pg->index; _____i++) { \ - rec = &pg->records[_____i]; - -#define while_for_each_ftrace_rec() \ - } \ - } - -#ifdef CONFIG_FUNCTION_PROFILER static struct hlist_head *ftrace_profile_hash; static int ftrace_profile_bits; static int ftrace_profile_enabled; static DEFINE_MUTEX(ftrace_profile_lock); +static DEFINE_PER_CPU(atomic_t, ftrace_profile_disable); + +#define FTRACE_PROFILE_HASH_SIZE 1024 /* must be power of 2 */ + +static raw_spinlock_t ftrace_profile_rec_lock = + (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + static void * function_stat_next(void *v, int idx) { - struct dyn_ftrace *rec = v; - struct ftrace_page *pg; + struct ftrace_profile *rec = v; + struct ftrace_profile_page *pg; - pg = (struct ftrace_page *)((unsigned long)rec & PAGE_MASK); + pg = (struct ftrace_profile_page *)((unsigned long)rec & PAGE_MASK); again: rec++; @@ -330,27 +291,22 @@ function_stat_next(void *v, int idx) if (!pg) return NULL; rec = &pg->records[0]; + if (!rec->counter) + goto again; } - if (rec->flags & FTRACE_FL_FREE || - rec->flags & FTRACE_FL_FAILED || - !(rec->flags & FTRACE_FL_CONVERTED) || - /* ignore non hit functions */ - !rec->counter) - goto again; - return rec; } static void *function_stat_start(struct tracer_stat *trace) { - return function_stat_next(&ftrace_pages_start->records[0], 0); + return function_stat_next(&profile_pages_start->records[0], 0); } static int function_stat_cmp(void *p1, void *p2) { - struct dyn_ftrace *a = p1; - struct dyn_ftrace *b = p2; + struct ftrace_profile *a = p1; + struct ftrace_profile *b = p2; if (a->counter < b->counter) return -1; @@ -369,7 +325,7 @@ static int function_stat_headers(struct seq_file *m) static int function_stat_show(struct seq_file *m, void *v) { - struct dyn_ftrace *rec = v; + struct ftrace_profile *rec = v; char str[KSYM_SYMBOL_LEN]; kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); @@ -387,115 +343,191 @@ static struct tracer_stat function_stats = { .stat_show = function_stat_show }; -static void ftrace_profile_init(int nr_funcs) +static void ftrace_profile_reset(void) { - unsigned long addr; - int order; - int size; + struct ftrace_profile_page *pg; - /* - * We are profiling all functions, lets make it 1/4th of the - * number of functions that are in core kernel. So we have to - * iterate 4 times. - */ - order = (sizeof(struct hlist_head) * nr_funcs) / 4; - order = get_order(order); - size = 1 << (PAGE_SHIFT + order); - - pr_info("Allocating %d KB for profiler hash\n", size >> 10); + pg = profile_pages = profile_pages_start; - addr = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); - if (!addr) { - pr_warning("Could not allocate function profiler hash\n"); - return; + while (pg) { + memset(pg->records, 0, PROFILE_RECORDS_SIZE); + pg->index = 0; + pg = pg->next; } - ftrace_profile_hash = (void *)addr; + memset(ftrace_profile_hash, 0, + FTRACE_PROFILE_HASH_SIZE * sizeof(struct hlist_head)); +} - /* - * struct hlist_head should be a pointer of 4 or 8 bytes. - * And a simple bit manipulation can be done, but if for - * some reason struct hlist_head is not a mulitple of 2, - * then we play it safe, and simply count. This function - * is done once at boot up, so it is not that critical in - * performance. - */ +int ftrace_profile_pages_init(void) +{ + struct ftrace_profile_page *pg; + int i; - size--; - size /= sizeof(struct hlist_head); + /* If we already allocated, do nothing */ + if (profile_pages) + return 0; - for (; size; size >>= 1) - ftrace_profile_bits++; + profile_pages = (void *)get_zeroed_page(GFP_KERNEL); + if (!profile_pages) + return -ENOMEM; - pr_info("Function profiler has %d hash buckets\n", - 1 << ftrace_profile_bits); + pg = profile_pages_start = profile_pages; - return; + /* allocate 10 more pages to start */ + for (i = 0; i < 10; i++) { + pg->next = (void *)get_zeroed_page(GFP_KERNEL); + /* + * We only care about allocating profile_pages, if + * we failed to allocate here, hopefully we will allocate + * later. + */ + if (!pg->next) + break; + pg = pg->next; + } + + return 0; } -static ssize_t -ftrace_profile_read(struct file *filp, char __user *ubuf, - size_t cnt, loff_t *ppos) +static int ftrace_profile_init(void) { - char buf[64]; - int r; + int size; - r = sprintf(buf, "%u\n", ftrace_profile_enabled); - return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); -} + if (ftrace_profile_hash) { + /* If the profile is already created, simply reset it */ + ftrace_profile_reset(); + return 0; + } -static void ftrace_profile_reset(void) -{ - struct dyn_ftrace *rec; - struct ftrace_page *pg; + /* + * We are profiling all functions, but usually only a few thousand + * functions are hit. We'll make a hash of 1024 items. + */ + size = FTRACE_PROFILE_HASH_SIZE; - do_for_each_ftrace_rec(pg, rec) { - rec->counter = 0; - } while_for_each_ftrace_rec(); + ftrace_profile_hash = + kzalloc(sizeof(struct hlist_head) * size, GFP_KERNEL); + + if (!ftrace_profile_hash) + return -ENOMEM; + + size--; + + for (; size; size >>= 1) + ftrace_profile_bits++; + + /* Preallocate a few pages */ + if (ftrace_profile_pages_init() < 0) { + kfree(ftrace_profile_hash); + ftrace_profile_hash = NULL; + return -ENOMEM; + } + + return 0; } -static struct dyn_ftrace *ftrace_find_profiled_func(unsigned long ip) +/* interrupts must be disabled */ +static struct ftrace_profile *ftrace_find_profiled_func(unsigned long ip) { - struct dyn_ftrace *rec; + struct ftrace_profile *rec; struct hlist_head *hhd; struct hlist_node *n; - unsigned long flags; unsigned long key; - if (!ftrace_profile_hash) - return NULL; - key = hash_long(ip, ftrace_profile_bits); hhd = &ftrace_profile_hash[key]; if (hlist_empty(hhd)) return NULL; - local_irq_save(flags); hlist_for_each_entry_rcu(rec, n, hhd, node) { if (rec->ip == ip) - goto out; + return rec; + } + + return NULL; +} + +static void ftrace_add_profile(struct ftrace_profile *rec) +{ + unsigned long key; + + key = hash_long(rec->ip, ftrace_profile_bits); + hlist_add_head_rcu(&rec->node, &ftrace_profile_hash[key]); +} + +/* Interrupts must be disabled calling this */ +static struct ftrace_profile * +ftrace_profile_alloc(unsigned long ip, bool alloc_safe) +{ + struct ftrace_profile *rec = NULL; + + /* prevent recursion */ + if (atomic_inc_return(&__get_cpu_var(ftrace_profile_disable)) != 1) + goto out; + + __raw_spin_lock(&ftrace_profile_rec_lock); + + /* Try to always keep another page available */ + if (!profile_pages->next && alloc_safe) + profile_pages->next = (void *)get_zeroed_page(GFP_ATOMIC); + + /* + * Try to find the function again since another + * task on another CPU could have added it + */ + rec = ftrace_find_profiled_func(ip); + if (rec) + goto out_unlock; + + if (profile_pages->index == PROFILES_PER_PAGE) { + if (!profile_pages->next) + goto out_unlock; + profile_pages = profile_pages->next; } - rec = NULL; + + rec = &profile_pages->records[profile_pages->index++]; + rec->ip = ip; + ftrace_add_profile(rec); + + out_unlock: + __raw_spin_unlock(&ftrace_profile_rec_lock); out: - local_irq_restore(flags); + atomic_dec(&__get_cpu_var(ftrace_profile_disable)); return rec; } +/* + * If we are not in an interrupt, or softirq and + * and interrupts are disabled and preemption is not enabled + * (not in a spinlock) then it should be safe to allocate memory. + */ +static bool ftrace_safe_to_allocate(void) +{ + return !in_interrupt() && irqs_disabled() && !preempt_count(); +} + static void function_profile_call(unsigned long ip, unsigned long parent_ip) { - struct dyn_ftrace *rec; + struct ftrace_profile *rec; unsigned long flags; + bool alloc_safe; if (!ftrace_profile_enabled) return; + alloc_safe = ftrace_safe_to_allocate(); + local_irq_save(flags); rec = ftrace_find_profiled_func(ip); - if (!rec) - goto out; + if (!rec) { + rec = ftrace_profile_alloc(ip, alloc_safe); + if (!rec) + goto out; + } rec->counter++; out: @@ -515,11 +547,6 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf, char buf[64]; int ret; - if (!ftrace_profile_hash) { - pr_info("Can not enable hash due to earlier problems\n"); - return -ENODEV; - } - if (cnt >= sizeof(buf)) return -EINVAL; @@ -537,7 +564,12 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf, mutex_lock(&ftrace_profile_lock); if (ftrace_profile_enabled ^ val) { if (val) { - ftrace_profile_reset(); + ret = ftrace_profile_init(); + if (ret < 0) { + cnt = ret; + goto out; + } + register_ftrace_function(&ftrace_profile_ops); ftrace_profile_enabled = 1; } else { @@ -545,6 +577,7 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf, unregister_ftrace_function(&ftrace_profile_ops); } } + out: mutex_unlock(&ftrace_profile_lock); filp->f_pos += cnt; @@ -552,6 +585,17 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf, return cnt; } +static ssize_t +ftrace_profile_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[64]; + int r; + + r = sprintf(buf, "%u\n", ftrace_profile_enabled); + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + static const struct file_operations ftrace_profile_fops = { .open = tracing_open_generic, .read = ftrace_profile_read, @@ -577,39 +621,80 @@ static void ftrace_profile_debugfs(struct dentry *d_tracer) "'function_profile_enabled' entry\n"); } -static void ftrace_add_profile(struct dyn_ftrace *rec) -{ - unsigned long key; - - if (!ftrace_profile_hash) - return; - - key = hash_long(rec->ip, ftrace_profile_bits); - hlist_add_head_rcu(&rec->node, &ftrace_profile_hash[key]); -} - -static void ftrace_profile_release(struct dyn_ftrace *rec) -{ - mutex_lock(&ftrace_profile_lock); - hlist_del(&rec->node); - mutex_unlock(&ftrace_profile_lock); -} - #else /* CONFIG_FUNCTION_PROFILER */ -static void ftrace_profile_init(int nr_funcs) -{ -} -static void ftrace_add_profile(struct dyn_ftrace *rec) -{ -} static void ftrace_profile_debugfs(struct dentry *d_tracer) { } -static void ftrace_profile_release(struct dyn_ftrace *rec) -{ -} #endif /* CONFIG_FUNCTION_PROFILER */ +/* set when tracing only a pid */ +struct pid *ftrace_pid_trace; +static struct pid * const ftrace_swapper_pid = &init_struct_pid; + +#ifdef CONFIG_DYNAMIC_FTRACE + +#ifndef CONFIG_FTRACE_MCOUNT_RECORD +# error Dynamic ftrace depends on MCOUNT_RECORD +#endif + +static struct hlist_head ftrace_func_hash[FTRACE_FUNC_HASHSIZE] __read_mostly; + +struct ftrace_func_probe { + struct hlist_node node; + struct ftrace_probe_ops *ops; + unsigned long flags; + unsigned long ip; + void *data; + struct rcu_head rcu; +}; + +enum { + FTRACE_ENABLE_CALLS = (1 << 0), + FTRACE_DISABLE_CALLS = (1 << 1), + FTRACE_UPDATE_TRACE_FUNC = (1 << 2), + FTRACE_ENABLE_MCOUNT = (1 << 3), + FTRACE_DISABLE_MCOUNT = (1 << 4), + FTRACE_START_FUNC_RET = (1 << 5), + FTRACE_STOP_FUNC_RET = (1 << 6), +}; + +static int ftrace_filtered; + +static struct dyn_ftrace *ftrace_new_addrs; + +static DEFINE_MUTEX(ftrace_regex_lock); + +struct ftrace_page { + struct ftrace_page *next; + int index; + struct dyn_ftrace records[]; +}; + +#define ENTRIES_PER_PAGE \ + ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace)) + +/* estimate from running different kernels */ +#define NR_TO_INIT 10000 + +static struct ftrace_page *ftrace_pages_start; +static struct ftrace_page *ftrace_pages; + +static struct dyn_ftrace *ftrace_free_records; + +/* + * This is a double for. Do not use 'break' to break out of the loop, + * you must use a goto. + */ +#define do_for_each_ftrace_rec(pg, rec) \ + for (pg = ftrace_pages_start; pg; pg = pg->next) { \ + int _____i; \ + for (_____i = 0; _____i < pg->index; _____i++) { \ + rec = &pg->records[_____i]; + +#define while_for_each_ftrace_rec() \ + } \ + } + #ifdef CONFIG_KPROBES static int frozen_record_count; @@ -660,10 +745,8 @@ void ftrace_release(void *start, unsigned long size) mutex_lock(&ftrace_lock); do_for_each_ftrace_rec(pg, rec) { if ((rec->ip >= s) && (rec->ip < e) && - !(rec->flags & FTRACE_FL_FREE)) { + !(rec->flags & FTRACE_FL_FREE)) ftrace_free_rec(rec); - ftrace_profile_release(rec); - } } while_for_each_ftrace_rec(); mutex_unlock(&ftrace_lock); } @@ -717,8 +800,6 @@ ftrace_record_ip(unsigned long ip) rec->newlist = ftrace_new_addrs; ftrace_new_addrs = rec; - ftrace_add_profile(rec); - return rec; } @@ -2462,8 +2543,6 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer) "'set_graph_function' entry\n"); #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ - ftrace_profile_debugfs(d_tracer); - return 0; } @@ -2532,8 +2611,6 @@ void __init ftrace_init(void) if (ret) goto failed; - ftrace_profile_init(count); - last_ftrace_enabled = ftrace_enabled = 1; ret = ftrace_convert_nops(NULL, @@ -2734,6 +2811,9 @@ static __init int ftrace_init_debugfs(void) if (!entry) pr_warning("Could not create debugfs " "'set_ftrace_pid' entry\n"); + + ftrace_profile_debugfs(d_tracer); + return 0; } fs_initcall(ftrace_init_debugfs); -- cgit v0.10.2 From 0706f1c48ca8a7ab478090b4e38f2e578ae2bfe0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 23 Mar 2009 23:12:58 -0400 Subject: tracing: adding function timings to function profiler If the function graph trace is enabled, the function profiler will use it to take the timing of the functions. cat /debug/tracing/trace_stat/functions Function Hit Time -------- --- ---- mwait_idle 127 183028.4 us schedule 26 151997.7 us __schedule 31 151975.1 us sys_wait4 2 74080.53 us do_wait 2 74077.80 us sys_newlstat 138 39929.16 us do_path_lookup 179 39845.79 us vfs_lstat_fd 138 39761.97 us user_path_at 153 39469.58 us path_walk 179 39435.76 us __link_path_walk 189 39143.73 us [...] Note the times are skewed due to the function graph tracer not taking into account schedules. Signed-off-by: Steven Rostedt diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 24dac44..a9ccd71 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -33,7 +33,7 @@ #include -#include "trace.h" +#include "trace_output.h" #include "trace_stat.h" #define FTRACE_WARN_ON(cond) \ @@ -246,6 +246,9 @@ struct ftrace_profile { struct hlist_node node; unsigned long ip; unsigned long counter; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + unsigned long long time; +#endif }; struct ftrace_profile_page { @@ -303,6 +306,22 @@ static void *function_stat_start(struct tracer_stat *trace) return function_stat_next(&profile_pages_start->records[0], 0); } +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +/* function graph compares on total time */ +static int function_stat_cmp(void *p1, void *p2) +{ + struct ftrace_profile *a = p1; + struct ftrace_profile *b = p2; + + if (a->time < b->time) + return -1; + if (a->time > b->time) + return 1; + else + return 0; +} +#else +/* not function graph compares against hits */ static int function_stat_cmp(void *p1, void *p2) { struct ftrace_profile *a = p1; @@ -315,11 +334,17 @@ static int function_stat_cmp(void *p1, void *p2) else return 0; } +#endif static int function_stat_headers(struct seq_file *m) { +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + seq_printf(m, " Function Hit Time\n" + " -------- --- ----\n"); +#else seq_printf(m, " Function Hit\n" " -------- ---\n"); +#endif return 0; } @@ -327,10 +352,25 @@ static int function_stat_show(struct seq_file *m, void *v) { struct ftrace_profile *rec = v; char str[KSYM_SYMBOL_LEN]; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + static struct trace_seq s; + static DEFINE_MUTEX(mutex); + + mutex_lock(&mutex); + trace_seq_init(&s); + trace_print_graph_duration(rec->time, &s); +#endif kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); + seq_printf(m, " %-30.30s %10lu", str, rec->counter); + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + seq_printf(m, " "); + trace_print_seq(m, &s); + mutex_unlock(&mutex); +#endif + seq_putc(m, '\n'); - seq_printf(m, " %-30.30s %10lu\n", str, rec->counter); return 0; } @@ -534,11 +574,52 @@ function_profile_call(unsigned long ip, unsigned long parent_ip) local_irq_restore(flags); } +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +static int profile_graph_entry(struct ftrace_graph_ent *trace) +{ + function_profile_call(trace->func, 0); + return 1; +} + +static void profile_graph_return(struct ftrace_graph_ret *trace) +{ + unsigned long flags; + struct ftrace_profile *rec; + + local_irq_save(flags); + rec = ftrace_find_profiled_func(trace->func); + if (rec) + rec->time += trace->rettime - trace->calltime; + local_irq_restore(flags); +} + +static int register_ftrace_profiler(void) +{ + return register_ftrace_graph(&profile_graph_return, + &profile_graph_entry); +} + +static void unregister_ftrace_profiler(void) +{ + unregister_ftrace_graph(); +} +#else static struct ftrace_ops ftrace_profile_ops __read_mostly = { .func = function_profile_call, }; +static int register_ftrace_profiler(void) +{ + return register_ftrace_function(&ftrace_profile_ops); +} + +static void unregister_ftrace_profiler(void) +{ + unregister_ftrace_function(&ftrace_profile_ops); +} +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + static ssize_t ftrace_profile_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) @@ -570,11 +651,15 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf, goto out; } - register_ftrace_function(&ftrace_profile_ops); + ret = register_ftrace_profiler(); + if (ret < 0) { + cnt = ret; + goto out; + } ftrace_profile_enabled = 1; } else { ftrace_profile_enabled = 0; - unregister_ftrace_function(&ftrace_profile_ops); + unregister_ftrace_profiler(); } } out: diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 67c6a21..821bf49 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -402,17 +402,6 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) return cnt; } -static void -trace_print_seq(struct seq_file *m, struct trace_seq *s) -{ - int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len; - - s->buffer[len] = 0; - seq_puts(m, s->buffer); - - trace_seq_init(s); -} - /** * update_max_tr - snapshot all trace buffers from global_trace to max_tr * @tr: tracer diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index d7410bb..c66ca3b 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -605,6 +605,8 @@ extern unsigned long trace_flags; /* Standard output formatting function used for function return traces */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER extern enum print_line_t print_graph_function(struct trace_iterator *iter); +extern enum print_line_t +trace_print_graph_duration(unsigned long long duration, struct trace_seq *s); #ifdef CONFIG_DYNAMIC_FTRACE /* TODO: make this variable */ @@ -636,7 +638,6 @@ static inline int ftrace_graph_addr(unsigned long addr) return 1; } #endif /* CONFIG_DYNAMIC_FTRACE */ - #else /* CONFIG_FUNCTION_GRAPH_TRACER */ static inline enum print_line_t print_graph_function(struct trace_iterator *iter) diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index d28687e..85bba0f 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -426,8 +426,8 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr, return TRACE_TYPE_HANDLED; } -static enum print_line_t -print_graph_duration(unsigned long long duration, struct trace_seq *s) +enum print_line_t +trace_print_graph_duration(unsigned long long duration, struct trace_seq *s) { unsigned long nsecs_rem = do_div(duration, 1000); /* log10(ULONG_MAX) + '\0' */ @@ -464,12 +464,23 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s) if (!ret) return TRACE_TYPE_PARTIAL_LINE; } + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t +print_graph_duration(unsigned long long duration, struct trace_seq *s) +{ + int ret; + + ret = trace_print_graph_duration(duration, s); + if (ret != TRACE_TYPE_HANDLED) + return ret; ret = trace_seq_printf(s, "| "); if (!ret) return TRACE_TYPE_PARTIAL_LINE; - return TRACE_TYPE_HANDLED; + return TRACE_TYPE_HANDLED; } /* Case of a leaf function on its call entry */ diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 19261fd..a3b6e3f 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -19,6 +19,16 @@ static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly; static int next_event_type = __TRACE_LAST_TYPE + 1; +void trace_print_seq(struct seq_file *m, struct trace_seq *s) +{ + int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len; + + s->buffer[len] = 0; + seq_puts(m, s->buffer); + + trace_seq_init(s); +} + enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h index 35c422f..1eac297 100644 --- a/kernel/trace/trace_output.h +++ b/kernel/trace/trace_output.h @@ -20,6 +20,8 @@ trace_print_bprintk_msg_only(struct trace_iterator *iter); extern enum print_line_t trace_print_printk_msg_only(struct trace_iterator *iter); +extern void trace_print_seq(struct seq_file *m, struct trace_seq *s); + extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))); extern int -- cgit v0.10.2 From cafb168a1c92e4c9e1731fe3d666c39611762c49 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 24 Mar 2009 20:50:39 -0400 Subject: tracing: make the function profiler per cpu Impact: speed enhancement By making the function profiler record in per cpu data we not only get better readings, avoid races, we also do not have to take any locks. Signed-off-by: Steven Rostedt diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index a9ccd71..ed1fc50 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -257,28 +257,28 @@ struct ftrace_profile_page { struct ftrace_profile records[]; }; +struct ftrace_profile_stat { + atomic_t disabled; + struct hlist_head *hash; + struct ftrace_profile_page *pages; + struct ftrace_profile_page *start; + struct tracer_stat stat; +}; + #define PROFILE_RECORDS_SIZE \ (PAGE_SIZE - offsetof(struct ftrace_profile_page, records)) #define PROFILES_PER_PAGE \ (PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile)) -/* TODO: make these percpu, to prevent cache line bouncing */ -static struct ftrace_profile_page *profile_pages_start; -static struct ftrace_profile_page *profile_pages; - -static struct hlist_head *ftrace_profile_hash; static int ftrace_profile_bits; static int ftrace_profile_enabled; static DEFINE_MUTEX(ftrace_profile_lock); -static DEFINE_PER_CPU(atomic_t, ftrace_profile_disable); +static DEFINE_PER_CPU(struct ftrace_profile_stat, ftrace_profile_stats); #define FTRACE_PROFILE_HASH_SIZE 1024 /* must be power of 2 */ -static raw_spinlock_t ftrace_profile_rec_lock = - (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; - static void * function_stat_next(void *v, int idx) { @@ -303,7 +303,13 @@ function_stat_next(void *v, int idx) static void *function_stat_start(struct tracer_stat *trace) { - return function_stat_next(&profile_pages_start->records[0], 0); + struct ftrace_profile_stat *stat = + container_of(trace, struct ftrace_profile_stat, stat); + + if (!stat || !stat->start) + return NULL; + + return function_stat_next(&stat->start->records[0], 0); } #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -374,20 +380,11 @@ static int function_stat_show(struct seq_file *m, void *v) return 0; } -static struct tracer_stat function_stats = { - .name = "functions", - .stat_start = function_stat_start, - .stat_next = function_stat_next, - .stat_cmp = function_stat_cmp, - .stat_headers = function_stat_headers, - .stat_show = function_stat_show -}; - -static void ftrace_profile_reset(void) +static void ftrace_profile_reset(struct ftrace_profile_stat *stat) { struct ftrace_profile_page *pg; - pg = profile_pages = profile_pages_start; + pg = stat->pages = stat->start; while (pg) { memset(pg->records, 0, PROFILE_RECORDS_SIZE); @@ -395,24 +392,24 @@ static void ftrace_profile_reset(void) pg = pg->next; } - memset(ftrace_profile_hash, 0, + memset(stat->hash, 0, FTRACE_PROFILE_HASH_SIZE * sizeof(struct hlist_head)); } -int ftrace_profile_pages_init(void) +int ftrace_profile_pages_init(struct ftrace_profile_stat *stat) { struct ftrace_profile_page *pg; int i; /* If we already allocated, do nothing */ - if (profile_pages) + if (stat->pages) return 0; - profile_pages = (void *)get_zeroed_page(GFP_KERNEL); - if (!profile_pages) + stat->pages = (void *)get_zeroed_page(GFP_KERNEL); + if (!stat->pages) return -ENOMEM; - pg = profile_pages_start = profile_pages; + pg = stat->start = stat->pages; /* allocate 10 more pages to start */ for (i = 0; i < 10; i++) { @@ -430,13 +427,16 @@ int ftrace_profile_pages_init(void) return 0; } -static int ftrace_profile_init(void) +static int ftrace_profile_init_cpu(int cpu) { + struct ftrace_profile_stat *stat; int size; - if (ftrace_profile_hash) { + stat = &per_cpu(ftrace_profile_stats, cpu); + + if (stat->hash) { /* If the profile is already created, simply reset it */ - ftrace_profile_reset(); + ftrace_profile_reset(stat); return 0; } @@ -446,29 +446,45 @@ static int ftrace_profile_init(void) */ size = FTRACE_PROFILE_HASH_SIZE; - ftrace_profile_hash = - kzalloc(sizeof(struct hlist_head) * size, GFP_KERNEL); + stat->hash = kzalloc(sizeof(struct hlist_head) * size, GFP_KERNEL); - if (!ftrace_profile_hash) + if (!stat->hash) return -ENOMEM; - size--; + if (!ftrace_profile_bits) { + size--; - for (; size; size >>= 1) - ftrace_profile_bits++; + for (; size; size >>= 1) + ftrace_profile_bits++; + } /* Preallocate a few pages */ - if (ftrace_profile_pages_init() < 0) { - kfree(ftrace_profile_hash); - ftrace_profile_hash = NULL; + if (ftrace_profile_pages_init(stat) < 0) { + kfree(stat->hash); + stat->hash = NULL; return -ENOMEM; } return 0; } +static int ftrace_profile_init(void) +{ + int cpu; + int ret = 0; + + for_each_online_cpu(cpu) { + ret = ftrace_profile_init_cpu(cpu); + if (ret) + break; + } + + return ret; +} + /* interrupts must be disabled */ -static struct ftrace_profile *ftrace_find_profiled_func(unsigned long ip) +static struct ftrace_profile * +ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip) { struct ftrace_profile *rec; struct hlist_head *hhd; @@ -476,7 +492,7 @@ static struct ftrace_profile *ftrace_find_profiled_func(unsigned long ip) unsigned long key; key = hash_long(ip, ftrace_profile_bits); - hhd = &ftrace_profile_hash[key]; + hhd = &stat->hash[key]; if (hlist_empty(hhd)) return NULL; @@ -489,52 +505,50 @@ static struct ftrace_profile *ftrace_find_profiled_func(unsigned long ip) return NULL; } -static void ftrace_add_profile(struct ftrace_profile *rec) +static void ftrace_add_profile(struct ftrace_profile_stat *stat, + struct ftrace_profile *rec) { unsigned long key; key = hash_long(rec->ip, ftrace_profile_bits); - hlist_add_head_rcu(&rec->node, &ftrace_profile_hash[key]); + hlist_add_head_rcu(&rec->node, &stat->hash[key]); } /* Interrupts must be disabled calling this */ static struct ftrace_profile * -ftrace_profile_alloc(unsigned long ip, bool alloc_safe) +ftrace_profile_alloc(struct ftrace_profile_stat *stat, + unsigned long ip, bool alloc_safe) { struct ftrace_profile *rec = NULL; /* prevent recursion */ - if (atomic_inc_return(&__get_cpu_var(ftrace_profile_disable)) != 1) + if (atomic_inc_return(&stat->disabled) != 1) goto out; - __raw_spin_lock(&ftrace_profile_rec_lock); - /* Try to always keep another page available */ - if (!profile_pages->next && alloc_safe) - profile_pages->next = (void *)get_zeroed_page(GFP_ATOMIC); + if (!stat->pages->next && alloc_safe) + stat->pages->next = (void *)get_zeroed_page(GFP_ATOMIC); /* * Try to find the function again since another * task on another CPU could have added it */ - rec = ftrace_find_profiled_func(ip); + rec = ftrace_find_profiled_func(stat, ip); if (rec) - goto out_unlock; + goto out; - if (profile_pages->index == PROFILES_PER_PAGE) { - if (!profile_pages->next) - goto out_unlock; - profile_pages = profile_pages->next; + if (stat->pages->index == PROFILES_PER_PAGE) { + if (!stat->pages->next) + goto out; + stat->pages = stat->pages->next; } - rec = &profile_pages->records[profile_pages->index++]; + rec = &stat->pages->records[stat->pages->index++]; rec->ip = ip; - ftrace_add_profile(rec); + ftrace_add_profile(stat, rec); - out_unlock: - __raw_spin_unlock(&ftrace_profile_rec_lock); out: - atomic_dec(&__get_cpu_var(ftrace_profile_disable)); + atomic_dec(&stat->disabled); return rec; } @@ -552,6 +566,7 @@ static bool ftrace_safe_to_allocate(void) static void function_profile_call(unsigned long ip, unsigned long parent_ip) { + struct ftrace_profile_stat *stat; struct ftrace_profile *rec; unsigned long flags; bool alloc_safe; @@ -562,9 +577,14 @@ function_profile_call(unsigned long ip, unsigned long parent_ip) alloc_safe = ftrace_safe_to_allocate(); local_irq_save(flags); - rec = ftrace_find_profiled_func(ip); + + stat = &__get_cpu_var(ftrace_profile_stats); + if (!stat->hash) + goto out; + + rec = ftrace_find_profiled_func(stat, ip); if (!rec) { - rec = ftrace_profile_alloc(ip, alloc_safe); + rec = ftrace_profile_alloc(stat, ip, alloc_safe); if (!rec) goto out; } @@ -583,13 +603,19 @@ static int profile_graph_entry(struct ftrace_graph_ent *trace) static void profile_graph_return(struct ftrace_graph_ret *trace) { - unsigned long flags; + struct ftrace_profile_stat *stat; struct ftrace_profile *rec; + unsigned long flags; local_irq_save(flags); - rec = ftrace_find_profiled_func(trace->func); + stat = &__get_cpu_var(ftrace_profile_stats); + if (!stat->hash) + goto out; + + rec = ftrace_find_profiled_func(stat, trace->func); if (rec) rec->time += trace->rettime - trace->calltime; + out: local_irq_restore(flags); } @@ -687,16 +713,51 @@ static const struct file_operations ftrace_profile_fops = { .write = ftrace_profile_write, }; +/* used to initialize the real stat files */ +static struct tracer_stat function_stats __initdata = { + .name = "functions", + .stat_start = function_stat_start, + .stat_next = function_stat_next, + .stat_cmp = function_stat_cmp, + .stat_headers = function_stat_headers, + .stat_show = function_stat_show +}; + static void ftrace_profile_debugfs(struct dentry *d_tracer) { + struct ftrace_profile_stat *stat; struct dentry *entry; + char *name; int ret; + int cpu; - ret = register_stat_tracer(&function_stats); - if (ret) { - pr_warning("Warning: could not register " - "function stats\n"); - return; + for_each_possible_cpu(cpu) { + stat = &per_cpu(ftrace_profile_stats, cpu); + + /* allocate enough for function name + cpu number */ + name = kmalloc(32, GFP_KERNEL); + if (!name) { + /* + * The files created are permanent, if something happens + * we still do not free memory. + */ + kfree(stat); + WARN(1, + "Could not allocate stat file for cpu %d\n", + cpu); + return; + } + stat->stat = function_stats; + snprintf(name, 32, "function%d", cpu); + stat->stat.name = name; + ret = register_stat_tracer(&stat->stat); + if (ret) { + WARN(1, + "Could not register function stat for cpu %d\n", + cpu); + kfree(name); + return; + } } entry = debugfs_create_file("function_profile_enabled", 0644, -- cgit v0.10.2 From a2a16d6a3156ef7309ca7328a20c35df9418e670 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 24 Mar 2009 23:17:58 -0400 Subject: function-graph: add option to calculate graph time or not graph time is the time that a function is executing another function. Thus if function A calls B, if graph-time is set, then the time for A includes B. This is the default behavior. But if graph-time is off, then the time spent executing B is subtracted from A. Signed-off-by: Steven Rostedt diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 015a3d2..9e0a8d2 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -365,6 +365,7 @@ struct ftrace_ret_stack { unsigned long ret; unsigned long func; unsigned long long calltime; + unsigned long long subtime; }; /* @@ -376,8 +377,6 @@ extern void return_to_handler(void); extern int ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth); -extern void -ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret); /* * Sometimes we don't want to trace a function with the function diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index ed1fc50..71e5fae 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -604,6 +604,7 @@ static int profile_graph_entry(struct ftrace_graph_ent *trace) static void profile_graph_return(struct ftrace_graph_ret *trace) { struct ftrace_profile_stat *stat; + unsigned long long calltime; struct ftrace_profile *rec; unsigned long flags; @@ -612,9 +613,27 @@ static void profile_graph_return(struct ftrace_graph_ret *trace) if (!stat->hash) goto out; + calltime = trace->rettime - trace->calltime; + + if (!(trace_flags & TRACE_ITER_GRAPH_TIME)) { + int index; + + index = trace->depth; + + /* Append this call time to the parent time to subtract */ + if (index) + current->ret_stack[index - 1].subtime += calltime; + + if (current->ret_stack[index].subtime < calltime) + calltime -= current->ret_stack[index].subtime; + else + calltime = 0; + } + rec = ftrace_find_profiled_func(stat, trace->func); if (rec) - rec->time += trace->rettime - trace->calltime; + rec->time += calltime; + out: local_irq_restore(flags); } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 821bf49..5d1a16c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -255,7 +255,8 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait); /* trace_flags holds trace_options default values */ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | - TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME; + TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME | + TRACE_ITER_GRAPH_TIME; /** * trace_wake_up - wake up tasks waiting for trace input @@ -317,6 +318,7 @@ static const char *trace_options[] = { "latency-format", "global-clock", "sleep-time", + "graph-time", NULL }; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index c66ca3b..e3429a8 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -685,6 +685,7 @@ enum trace_iterator_flags { TRACE_ITER_LATENCY_FMT = 0x40000, TRACE_ITER_GLOBAL_CLK = 0x80000, TRACE_ITER_SLEEP_TIME = 0x100000, + TRACE_ITER_GRAPH_TIME = 0x200000, }; /* diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 85bba0f..10f6ad7 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -78,13 +78,14 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth) current->ret_stack[index].ret = ret; current->ret_stack[index].func = func; current->ret_stack[index].calltime = calltime; + current->ret_stack[index].subtime = 0; *depth = index; return 0; } /* Retrieve a function return address to the trace stack on thread info.*/ -void +static void ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) { int index; @@ -104,9 +105,6 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) trace->calltime = current->ret_stack[index].calltime; trace->overrun = atomic_read(¤t->trace_overrun); trace->depth = index; - barrier(); - current->curr_ret_stack--; - } /* @@ -121,6 +119,8 @@ unsigned long ftrace_return_to_handler(void) ftrace_pop_return_trace(&trace, &ret); trace.rettime = trace_clock_local(); ftrace_graph_return(&trace); + barrier(); + current->curr_ret_stack--; if (unlikely(!ret)) { ftrace_graph_stop(); -- cgit v0.10.2 From fb9fb015e92123fa3a8e0c2e2fff491d4a56b470 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 25 Mar 2009 13:26:41 -0400 Subject: tracing: clean up tracing profiler Ingo Molnar suggested clean ups for the profiling code. This patch makes those updates. Reported-by: Ingo Molnar Signed-off-by: Steven Rostedt diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 71e5fae..a141d84 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -69,7 +69,7 @@ static DEFINE_MUTEX(ftrace_lock); static struct ftrace_ops ftrace_list_end __read_mostly = { - .func = ftrace_stub, + .func = ftrace_stub, }; static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end; @@ -271,8 +271,10 @@ struct ftrace_profile_stat { #define PROFILES_PER_PAGE \ (PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile)) -static int ftrace_profile_bits; -static int ftrace_profile_enabled; +static int ftrace_profile_bits __read_mostly; +static int ftrace_profile_enabled __read_mostly; + +/* ftrace_profile_lock - synchronize the enable and disable of the profiler */ static DEFINE_MUTEX(ftrace_profile_lock); static DEFINE_PER_CPU(struct ftrace_profile_stat, ftrace_profile_stats); @@ -651,7 +653,7 @@ static void unregister_ftrace_profiler(void) #else static struct ftrace_ops ftrace_profile_ops __read_mostly = { - .func = function_profile_call, + .func = function_profile_call, }; static int register_ftrace_profiler(void) @@ -670,7 +672,7 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { unsigned long val; - char buf[64]; + char buf[64]; /* big enough to hold a number */ int ret; if (cnt >= sizeof(buf)) @@ -719,7 +721,7 @@ static ssize_t ftrace_profile_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - char buf[64]; + char buf[64]; /* big enough to hold a number */ int r; r = sprintf(buf, "%u\n", ftrace_profile_enabled); @@ -734,12 +736,12 @@ static const struct file_operations ftrace_profile_fops = { /* used to initialize the real stat files */ static struct tracer_stat function_stats __initdata = { - .name = "functions", - .stat_start = function_stat_start, - .stat_next = function_stat_next, - .stat_cmp = function_stat_cmp, - .stat_headers = function_stat_headers, - .stat_show = function_stat_show + .name = "functions", + .stat_start = function_stat_start, + .stat_next = function_stat_next, + .stat_cmp = function_stat_cmp, + .stat_headers = function_stat_headers, + .stat_show = function_stat_show }; static void ftrace_profile_debugfs(struct dentry *d_tracer) @@ -1954,7 +1956,7 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip) static struct ftrace_ops trace_probe_ops __read_mostly = { - .func = function_trace_probe_call, + .func = function_trace_probe_call, }; static int ftrace_probe_registered; -- cgit v0.10.2 From 318e0a73c9e41b9a17241829bcd0605a39b87cb9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 25 Mar 2009 20:06:34 -0400 Subject: tracing: remove on the fly allocator from function profiler Impact: safer code The on the fly allocator for the function profiler was to save memory. But at the expense of stability. Although it survived several tests, allocating from the function tracer is just too risky, just to save space. This patch removes the allocator and simply allocates enough entries at start up. Each function gets a profiling structure of 40 bytes. With an average of 20K functions, and this is for each CPU, we have 800K per online CPU. This is not too bad, at least for non-embedded. Signed-off-by: Steven Rostedt diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index a141d84..4d90c91 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -401,6 +401,8 @@ static void ftrace_profile_reset(struct ftrace_profile_stat *stat) int ftrace_profile_pages_init(struct ftrace_profile_stat *stat) { struct ftrace_profile_page *pg; + int functions; + int pages; int i; /* If we already allocated, do nothing */ @@ -411,22 +413,46 @@ int ftrace_profile_pages_init(struct ftrace_profile_stat *stat) if (!stat->pages) return -ENOMEM; +#ifdef CONFIG_DYNAMIC_FTRACE + functions = ftrace_update_tot_cnt; +#else + /* + * We do not know the number of functions that exist because + * dynamic tracing is what counts them. With past experience + * we have around 20K functions. That should be more than enough. + * It is highly unlikely we will execute every function in + * the kernel. + */ + functions = 20000; +#endif + pg = stat->start = stat->pages; - /* allocate 10 more pages to start */ - for (i = 0; i < 10; i++) { + pages = DIV_ROUND_UP(functions, PROFILES_PER_PAGE); + + for (i = 0; i < pages; i++) { pg->next = (void *)get_zeroed_page(GFP_KERNEL); - /* - * We only care about allocating profile_pages, if - * we failed to allocate here, hopefully we will allocate - * later. - */ if (!pg->next) - break; + goto out_free; pg = pg->next; } return 0; + + out_free: + pg = stat->start; + while (pg) { + unsigned long tmp = (unsigned long)pg; + + pg = pg->next; + free_page(tmp); + } + + free_page((unsigned long)stat->pages); + stat->pages = NULL; + stat->start = NULL; + + return -ENOMEM; } static int ftrace_profile_init_cpu(int cpu) @@ -460,7 +486,7 @@ static int ftrace_profile_init_cpu(int cpu) ftrace_profile_bits++; } - /* Preallocate a few pages */ + /* Preallocate the function profiling pages */ if (ftrace_profile_pages_init(stat) < 0) { kfree(stat->hash); stat->hash = NULL; @@ -516,24 +542,21 @@ static void ftrace_add_profile(struct ftrace_profile_stat *stat, hlist_add_head_rcu(&rec->node, &stat->hash[key]); } -/* Interrupts must be disabled calling this */ +/* + * The memory is already allocated, this simply finds a new record to use. + */ static struct ftrace_profile * -ftrace_profile_alloc(struct ftrace_profile_stat *stat, - unsigned long ip, bool alloc_safe) +ftrace_profile_alloc(struct ftrace_profile_stat *stat, unsigned long ip) { struct ftrace_profile *rec = NULL; - /* prevent recursion */ + /* prevent recursion (from NMIs) */ if (atomic_inc_return(&stat->disabled) != 1) goto out; - /* Try to always keep another page available */ - if (!stat->pages->next && alloc_safe) - stat->pages->next = (void *)get_zeroed_page(GFP_ATOMIC); - /* - * Try to find the function again since another - * task on another CPU could have added it + * Try to find the function again since an NMI + * could have added it */ rec = ftrace_find_profiled_func(stat, ip); if (rec) @@ -555,29 +578,16 @@ ftrace_profile_alloc(struct ftrace_profile_stat *stat, return rec; } -/* - * If we are not in an interrupt, or softirq and - * and interrupts are disabled and preemption is not enabled - * (not in a spinlock) then it should be safe to allocate memory. - */ -static bool ftrace_safe_to_allocate(void) -{ - return !in_interrupt() && irqs_disabled() && !preempt_count(); -} - static void function_profile_call(unsigned long ip, unsigned long parent_ip) { struct ftrace_profile_stat *stat; struct ftrace_profile *rec; unsigned long flags; - bool alloc_safe; if (!ftrace_profile_enabled) return; - alloc_safe = ftrace_safe_to_allocate(); - local_irq_save(flags); stat = &__get_cpu_var(ftrace_profile_stats); @@ -586,7 +596,7 @@ function_profile_call(unsigned long ip, unsigned long parent_ip) rec = ftrace_find_profiled_func(stat, ip); if (!rec) { - rec = ftrace_profile_alloc(stat, ip, alloc_safe); + rec = ftrace_profile_alloc(stat, ip); if (!rec) goto out; } -- cgit v0.10.2 From 34886c8bc590f078d4c0b88f50d061326639198d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 25 Mar 2009 21:00:47 -0400 Subject: tracing: add average time in function to function profiler Show the average time in the function (Time / Hit) Function Hit Time Avg -------- --- ---- --- mwait_idle 51 140326.6 us 2751.503 us smp_apic_timer_interrupt 47 3517.735 us 74.845 us schedule 10 2738.754 us 273.875 us __schedule 10 2732.857 us 273.285 us hrtimer_interrupt 47 1896.104 us 40.342 us irq_exit 56 1711.833 us 30.568 us __run_hrtimer 47 1315.589 us 27.991 us tick_sched_timer 47 1138.690 us 24.227 us do_softirq 56 1116.829 us 19.943 us __do_softirq 56 1066.932 us 19.052 us do_IRQ 9 926.153 us 102.905 us Signed-off-by: Steven Rostedt diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 4d90c91..c7f4a4b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -347,8 +347,10 @@ static int function_stat_cmp(void *p1, void *p2) static int function_stat_headers(struct seq_file *m) { #ifdef CONFIG_FUNCTION_GRAPH_TRACER - seq_printf(m, " Function Hit Time\n" - " -------- --- ----\n"); + seq_printf(m, " Function " + "Hit Time Avg\n" + " -------- " + "--- ---- ---\n"); #else seq_printf(m, " Function Hit\n" " -------- ---\n"); @@ -361,12 +363,9 @@ static int function_stat_show(struct seq_file *m, void *v) struct ftrace_profile *rec = v; char str[KSYM_SYMBOL_LEN]; #ifdef CONFIG_FUNCTION_GRAPH_TRACER - static struct trace_seq s; static DEFINE_MUTEX(mutex); - - mutex_lock(&mutex); - trace_seq_init(&s); - trace_print_graph_duration(rec->time, &s); + static struct trace_seq s; + unsigned long long avg; #endif kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); @@ -374,6 +373,14 @@ static int function_stat_show(struct seq_file *m, void *v) #ifdef CONFIG_FUNCTION_GRAPH_TRACER seq_printf(m, " "); + avg = rec->time; + do_div(avg, rec->counter); + + mutex_lock(&mutex); + trace_seq_init(&s); + trace_print_graph_duration(rec->time, &s); + trace_seq_puts(&s, " "); + trace_print_graph_duration(avg, &s); trace_print_seq(m, &s); mutex_unlock(&mutex); #endif -- cgit v0.10.2 From a8a93f3f03b7a8008d720e8d91798efe599d416c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 12 Feb 2009 13:45:34 -0800 Subject: mm: disable preemption in apply_to_pte_range Impact: bugfix Lazy mmu mode needs preemption disabled, so if we're apply to init_mm (which doesn't require any pte locks), then explicitly disable preemption. (Do it unconditionally after checking we've successfully done the allocation to simplify the error handling.) Signed-off-by: Jeremy Fitzhardinge diff --git a/mm/memory.c b/mm/memory.c index baa999e..b80cc31 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1718,6 +1718,7 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, BUG_ON(pmd_huge(*pmd)); + preempt_disable(); arch_enter_lazy_mmu_mode(); token = pmd_pgtable(*pmd); @@ -1729,6 +1730,7 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, } while (pte++, addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); + preempt_enable(); if (mm != &init_mm) pte_unmap_unlock(pte-1, ptl); -- cgit v0.10.2 From b8bcfe997e46150fedcc3f5b26b846400122fdd9 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Feb 2009 23:05:19 -0800 Subject: x86/paravirt: remove lazy mode in interrupts Impact: simplification, robustness Make paravirt_lazy_mode() always return PARAVIRT_LAZY_NONE when in an interrupt. This prevents interrupt code from accidentally inheriting an outer lazy state, and instead does everything synchronously. Outer batched operations are left deferred. Signed-off-by: Jeremy Fitzhardinge Acked-by: Peter Zijlstra Cc: Thomas Gleixner diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 63dd358..8ab250a 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -282,6 +282,9 @@ void paravirt_leave_lazy_cpu(void) enum paravirt_lazy_mode paravirt_get_lazy_mode(void) { + if (in_interrupt()) + return PARAVIRT_LAZY_NONE; + return __get_cpu_var(paravirt_lazy_mode); } diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index a03b727..cfbb4a7 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -225,12 +225,10 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) if (!pmd_present(*pmd_k)) return NULL; - if (!pmd_present(*pmd)) { + if (!pmd_present(*pmd)) set_pmd(pmd, *pmd_k); - arch_flush_lazy_mmu_mode(); - } else { + else BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); - } return pmd_k; } diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 00f127c..e81dfa4 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -87,7 +87,6 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); BUG_ON(!pte_none(*(kmap_pte-idx))); set_pte(kmap_pte-idx, mk_pte(page, prot)); - arch_flush_lazy_mmu_mode(); return (void *)vaddr; } @@ -117,7 +116,6 @@ void kunmap_atomic(void *kvaddr, enum km_type type) #endif } - arch_flush_lazy_mmu_mode(); pagefault_enable(); } diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 04102d4..b6a61f3 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -74,7 +74,6 @@ iounmap_atomic(void *kvaddr, enum km_type type) if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) kpte_clear_flush(kmap_pte-idx, vaddr); - arch_flush_lazy_mmu_mode(); pagefault_enable(); } EXPORT_SYMBOL_GPL(iounmap_atomic); diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 9c42949..9015e5e 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -824,13 +824,6 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, vm_unmap_aliases(); - /* - * If we're called with lazy mmu updates enabled, the - * in-memory pte state may be stale. Flush pending updates to - * bring them up to date. - */ - arch_flush_lazy_mmu_mode(); - cpa.vaddr = addr; cpa.numpages = numpages; cpa.mask_set = mask_set; @@ -873,13 +866,6 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, } else cpa_flush_all(cache); - /* - * If we've been called with lazy mmu updates enabled, then - * make sure that everything gets flushed out before we - * return. - */ - arch_flush_lazy_mmu_mode(); - out: return ret; } -- cgit v0.10.2 From 7fd7d83d49914f03aefffba6aee09032fcd54cce Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Feb 2009 23:24:03 -0800 Subject: x86/pvops: replace arch_enter_lazy_cpu_mode with arch_start_context_switch Impact: simplification, prepare for later changes Make lazy cpu mode more specific to context switching, so that it makes sense to do more context-switch specific things in the callbacks. Signed-off-by: Jeremy Fitzhardinge Acked-by: Peter Zijlstra diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 0617d5c..7b28aba 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -1420,19 +1420,17 @@ void paravirt_enter_lazy_mmu(void); void paravirt_leave_lazy_mmu(void); void paravirt_leave_lazy(enum paravirt_lazy_mode mode); -#define __HAVE_ARCH_ENTER_LAZY_CPU_MODE -static inline void arch_enter_lazy_cpu_mode(void) +#define __HAVE_ARCH_START_CONTEXT_SWITCH +static inline void arch_start_context_switch(void) { PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter); } -static inline void arch_leave_lazy_cpu_mode(void) +static inline void arch_end_context_switch(void) { PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave); } -void arch_flush_lazy_cpu_mode(void); - #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE static inline void arch_enter_lazy_mmu_mode(void) { diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 8ab250a..5eea954 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -301,19 +301,6 @@ void arch_flush_lazy_mmu_mode(void) preempt_enable(); } -void arch_flush_lazy_cpu_mode(void) -{ - preempt_disable(); - - if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { - WARN_ON(preempt_count() == 1); - arch_leave_lazy_cpu_mode(); - arch_enter_lazy_cpu_mode(); - } - - preempt_enable(); -} - struct pv_info pv_info = { .name = "bare hardware", .paravirt_enabled = 0, diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 14014d7..57e49a8 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -407,7 +407,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * done before math_state_restore, so the TS bit is up * to date. */ - arch_leave_lazy_cpu_mode(); + arch_end_context_switch(); /* If the task has used fpu the last 5 timeslices, just do a full * restore of the math state immediately to avoid the trap; the diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index abb7e6a..7115e60 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -428,7 +428,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * done before math_state_restore, so the TS bit is up * to date. */ - arch_leave_lazy_cpu_mode(); + arch_end_context_switch(); /* * Switch FS and GS. diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index cb6afa4..6b98f87 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1119,10 +1119,8 @@ static void drop_other_mm_ref(void *info) /* If this cpu still has a stale cr3 reference, then make sure it has been flushed. */ - if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) { + if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) load_cr3(swapper_pg_dir); - arch_flush_lazy_cpu_mode(); - } } static void xen_drop_mm_ref(struct mm_struct *mm) @@ -1135,7 +1133,6 @@ static void xen_drop_mm_ref(struct mm_struct *mm) load_cr3(swapper_pg_dir); else leave_mm(smp_processor_id()); - arch_flush_lazy_cpu_mode(); } /* Get the "official" set of cpus referring to our pagetable. */ diff --git a/include/asm-frv/pgtable.h b/include/asm-frv/pgtable.h index e16fdb1..235e34a 100644 --- a/include/asm-frv/pgtable.h +++ b/include/asm-frv/pgtable.h @@ -73,8 +73,8 @@ static inline int pte_file(pte_t pte) { return 0; } #define pgtable_cache_init() do {} while (0) #define arch_enter_lazy_mmu_mode() do {} while (0) #define arch_leave_lazy_mmu_mode() do {} while (0) -#define arch_enter_lazy_cpu_mode() do {} while (0) -#define arch_leave_lazy_cpu_mode() do {} while (0) + +#define arch_start_context_switch() do {} while (0) #else /* !CONFIG_MMU */ /*****************************************************************************/ diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 8e6d0ca..922f036 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -280,17 +280,18 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm, #endif /* - * A facility to provide batching of the reload of page tables with the - * actual context switch code for paravirtualized guests. By convention, - * only one of the lazy modes (CPU, MMU) should be active at any given - * time, entry should never be nested, and entry and exits should always - * be paired. This is for sanity of maintaining and reasoning about the - * kernel code. + * A facility to provide batching of the reload of page tables and + * other process state with the actual context switch code for + * paravirtualized guests. By convention, only one of the batched + * update (lazy) modes (CPU, MMU) should be active at any given time, + * entry should never be nested, and entry and exits should always be + * paired. This is for sanity of maintaining and reasoning about the + * kernel code. In this case, the exit (end of the context switch) is + * in architecture-specific code, and so doesn't need a generic + * definition. */ -#ifndef __HAVE_ARCH_ENTER_LAZY_CPU_MODE -#define arch_enter_lazy_cpu_mode() do {} while (0) -#define arch_leave_lazy_cpu_mode() do {} while (0) -#define arch_flush_lazy_cpu_mode() do {} while (0) +#ifndef __HAVE_ARCH_START_CONTEXT_SWITCH +#define arch_start_context_switch() do {} while (0) #endif #ifndef __HAVE_PFNMAP_TRACKING diff --git a/kernel/sched.c b/kernel/sched.c index 5757e03..7530fdd 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2746,7 +2746,7 @@ context_switch(struct rq *rq, struct task_struct *prev, * combine the page table reload and the switch backend into * one hypercall. */ - arch_enter_lazy_cpu_mode(); + arch_start_context_switch(); if (unlikely(!mm)) { next->active_mm = oldmm; -- cgit v0.10.2 From b407fc57b815b2016186220baabc76cc8264206e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Feb 2009 23:46:21 -0800 Subject: x86/paravirt: flush pending mmu updates on context switch Impact: allow preemption during lazy mmu updates If we're in lazy mmu mode when context switching, leave lazy mmu mode, but remember the task's state in TIF_LAZY_MMU_UPDATES. When we resume the task, check this flag and re-enter lazy mmu mode if its set. This sets things up for allowing lazy mmu mode while preemptible, though that won't actually be active until the next change. Signed-off-by: Jeremy Fitzhardinge Acked-by: Peter Zijlstra diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 7b28aba..58d2481 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -1418,7 +1418,6 @@ void paravirt_enter_lazy_cpu(void); void paravirt_leave_lazy_cpu(void); void paravirt_enter_lazy_mmu(void); void paravirt_leave_lazy_mmu(void); -void paravirt_leave_lazy(enum paravirt_lazy_mode mode); #define __HAVE_ARCH_START_CONTEXT_SWITCH static inline void arch_start_context_switch(void) diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index df9d5f7..2f34d64 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -94,6 +94,7 @@ struct thread_info { #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ +#define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) @@ -115,6 +116,7 @@ struct thread_info { #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) #define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) +#define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) /* work to do in syscall_trace_enter() */ #define _TIF_WORK_SYSCALL_ENTRY \ diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 478bca9..5d7f6e7 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -201,7 +201,7 @@ static void kvm_leave_lazy_mmu(void) struct kvm_para_state *state = kvm_para_state(); mmu_queue_flush(state); - paravirt_leave_lazy(paravirt_get_lazy_mode()); + paravirt_leave_lazy_mmu(); state->mode = paravirt_get_lazy_mode(); } diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 5eea954..430a0e3 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -252,7 +252,7 @@ static inline void enter_lazy(enum paravirt_lazy_mode mode) __get_cpu_var(paravirt_lazy_mode) = mode; } -void paravirt_leave_lazy(enum paravirt_lazy_mode mode) +static void leave_lazy(enum paravirt_lazy_mode mode) { BUG_ON(__get_cpu_var(paravirt_lazy_mode) != mode); BUG_ON(preemptible()); @@ -267,17 +267,24 @@ void paravirt_enter_lazy_mmu(void) void paravirt_leave_lazy_mmu(void) { - paravirt_leave_lazy(PARAVIRT_LAZY_MMU); + leave_lazy(PARAVIRT_LAZY_MMU); } void paravirt_enter_lazy_cpu(void) { + if (percpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) { + arch_leave_lazy_mmu_mode(); + set_thread_flag(TIF_LAZY_MMU_UPDATES); + } enter_lazy(PARAVIRT_LAZY_CPU); } void paravirt_leave_lazy_cpu(void) { - paravirt_leave_lazy(PARAVIRT_LAZY_CPU); + leave_lazy(PARAVIRT_LAZY_CPU); + + if (test_and_clear_thread_flag(TIF_LAZY_MMU_UPDATES)) + arch_enter_lazy_mmu_mode(); } enum paravirt_lazy_mode paravirt_get_lazy_mode(void) diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 2cc4a90..950929c 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -473,16 +473,22 @@ static void vmi_enter_lazy_cpu(void) vmi_ops.set_lazy_mode(2); } +static void vmi_leave_lazy_cpu(void) +{ + vmi_ops.set_lazy_mode(0); + paravirt_leave_lazy_cpu(); +} + static void vmi_enter_lazy_mmu(void) { paravirt_enter_lazy_mmu(); vmi_ops.set_lazy_mode(1); } -static void vmi_leave_lazy(void) +static void vmi_leave_lazy_mmu(void) { - paravirt_leave_lazy(paravirt_get_lazy_mode()); vmi_ops.set_lazy_mode(0); + paravirt_leave_lazy_mmu(); } static inline int __init check_vmi_rom(struct vrom_header *rom) @@ -718,12 +724,12 @@ static inline int __init activate_vmi(void) para_wrap(pv_cpu_ops.lazy_mode.enter, vmi_enter_lazy_cpu, set_lazy_mode, SetLazyMode); - para_wrap(pv_cpu_ops.lazy_mode.leave, vmi_leave_lazy, + para_wrap(pv_cpu_ops.lazy_mode.leave, vmi_leave_lazy_cpu, set_lazy_mode, SetLazyMode); para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu, set_lazy_mode, SetLazyMode); - para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy, + para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy_mmu, set_lazy_mode, SetLazyMode); /* user and kernel flush are just handled with different flags to FlushTLB */ diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 9fe4dda..41a5562 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -147,10 +147,16 @@ static void lazy_hcall(unsigned long call, /* When lazy mode is turned off reset the per-cpu lazy mode variable and then * issue the do-nothing hypercall to flush any stored calls. */ -static void lguest_leave_lazy_mode(void) +static void lguest_leave_lazy_mmu_mode(void) { - paravirt_leave_lazy(paravirt_get_lazy_mode()); hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0); + paravirt_leave_lazy_mmu(); +} + +static void lguest_leave_lazy_cpu_mode(void) +{ + hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0); + paravirt_leave_lazy_cpu(); } /*G:033 @@ -1026,7 +1032,7 @@ __init void lguest_init(void) pv_cpu_ops.write_idt_entry = lguest_write_idt_entry; pv_cpu_ops.wbinvd = lguest_wbinvd; pv_cpu_ops.lazy_mode.enter = paravirt_enter_lazy_cpu; - pv_cpu_ops.lazy_mode.leave = lguest_leave_lazy_mode; + pv_cpu_ops.lazy_mode.leave = lguest_leave_lazy_cpu_mode; /* pagetable management */ pv_mmu_ops.write_cr3 = lguest_write_cr3; @@ -1039,7 +1045,7 @@ __init void lguest_init(void) pv_mmu_ops.read_cr2 = lguest_read_cr2; pv_mmu_ops.read_cr3 = lguest_read_cr3; pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu; - pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mode; + pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mmu_mode; #ifdef CONFIG_X86_LOCAL_APIC /* apic read/write intercepts */ diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 82cd39a..f586e63 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -203,10 +203,10 @@ static unsigned long xen_get_debugreg(int reg) return HYPERVISOR_get_debugreg(reg); } -void xen_leave_lazy(void) +static void xen_leave_lazy_cpu(void) { - paravirt_leave_lazy(paravirt_get_lazy_mode()); xen_mc_flush(); + paravirt_leave_lazy_cpu(); } static unsigned long xen_store_tr(void) @@ -819,7 +819,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { .lazy_mode = { .enter = paravirt_enter_lazy_cpu, - .leave = xen_leave_lazy, + .leave = xen_leave_lazy_cpu, }, }; diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 6b98f87..f5f8faa 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1816,6 +1816,11 @@ __init void xen_post_allocator_init(void) xen_mark_init_mm_pinned(); } +static void xen_leave_lazy_mmu(void) +{ + xen_mc_flush(); + paravirt_leave_lazy_mmu(); +} const struct pv_mmu_ops xen_mmu_ops __initdata = { .pagetable_setup_start = xen_pagetable_setup_start, @@ -1891,7 +1896,7 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = { .lazy_mode = { .enter = paravirt_enter_lazy_mmu, - .leave = xen_leave_lazy, + .leave = xen_leave_lazy_mmu, }, .set_fixmap = xen_set_fixmap, diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 2f5ef26..f897cdf 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -30,7 +30,6 @@ pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); void xen_ident_map_ISA(void); void xen_reserve_top(void); -void xen_leave_lazy(void); void xen_post_allocator_init(void); char * __init xen_memory_setup(void); -- cgit v0.10.2 From 224101ed69d3fbb486868e0f6e0f9fa37302efb4 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 18 Feb 2009 11:18:57 -0800 Subject: x86/paravirt: finish change from lazy cpu to context switch start/end Impact: fix lazy context switch API Pass the previous and next tasks into the context switch start end calls, so that the called functions can properly access the task state (esp in end_context_switch, in which the next task is not yet completely current). Signed-off-by: Jeremy Fitzhardinge Acked-by: Peter Zijlstra diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 58d2481..dfdee0c 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -56,6 +56,7 @@ struct desc_ptr; struct tss_struct; struct mm_struct; struct desc_struct; +struct task_struct; /* * Wrapper type for pointers to code which uses the non-standard @@ -203,7 +204,8 @@ struct pv_cpu_ops { void (*swapgs)(void); - struct pv_lazy_ops lazy_mode; + void (*start_context_switch)(struct task_struct *prev); + void (*end_context_switch)(struct task_struct *next); }; struct pv_irq_ops { @@ -1414,20 +1416,21 @@ enum paravirt_lazy_mode { }; enum paravirt_lazy_mode paravirt_get_lazy_mode(void); -void paravirt_enter_lazy_cpu(void); -void paravirt_leave_lazy_cpu(void); +void paravirt_start_context_switch(struct task_struct *prev); +void paravirt_end_context_switch(struct task_struct *next); + void paravirt_enter_lazy_mmu(void); void paravirt_leave_lazy_mmu(void); #define __HAVE_ARCH_START_CONTEXT_SWITCH -static inline void arch_start_context_switch(void) +static inline void arch_start_context_switch(struct task_struct *prev) { - PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter); + PVOP_VCALL1(pv_cpu_ops.start_context_switch, prev); } -static inline void arch_end_context_switch(void) +static inline void arch_end_context_switch(struct task_struct *next) { - PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave); + PVOP_VCALL1(pv_cpu_ops.end_context_switch, next); } #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index d0812e1..24e4283 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -83,6 +83,8 @@ static inline void __init paravirt_pagetable_setup_done(pgd_t *base) #define pte_val(x) native_pte_val(x) #define __pte(x) native_make_pte(x) +#define arch_end_context_switch(prev) do {} while(0) + #endif /* CONFIG_PARAVIRT */ /* diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 430a0e3..cf14375 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -270,20 +270,20 @@ void paravirt_leave_lazy_mmu(void) leave_lazy(PARAVIRT_LAZY_MMU); } -void paravirt_enter_lazy_cpu(void) +void paravirt_start_context_switch(struct task_struct *prev) { if (percpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) { arch_leave_lazy_mmu_mode(); - set_thread_flag(TIF_LAZY_MMU_UPDATES); + set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES); } enter_lazy(PARAVIRT_LAZY_CPU); } -void paravirt_leave_lazy_cpu(void) +void paravirt_end_context_switch(struct task_struct *next) { leave_lazy(PARAVIRT_LAZY_CPU); - if (test_and_clear_thread_flag(TIF_LAZY_MMU_UPDATES)) + if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES)) arch_enter_lazy_mmu_mode(); } @@ -399,10 +399,8 @@ struct pv_cpu_ops pv_cpu_ops = { .set_iopl_mask = native_set_iopl_mask, .io_delay = native_io_delay, - .lazy_mode = { - .enter = paravirt_nop, - .leave = paravirt_nop, - }, + .start_context_switch = paravirt_nop, + .end_context_switch = paravirt_nop, }; struct pv_apic_ops pv_apic_ops = { diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 57e49a8..d766c76 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -407,7 +407,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * done before math_state_restore, so the TS bit is up * to date. */ - arch_end_context_switch(); + arch_end_context_switch(next_p); /* If the task has used fpu the last 5 timeslices, just do a full * restore of the math state immediately to avoid the trap; the diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 7115e60..e8a9aaf 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -428,7 +428,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * done before math_state_restore, so the TS bit is up * to date. */ - arch_end_context_switch(); + arch_end_context_switch(next_p); /* * Switch FS and GS. diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 950929c..55a5d69 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -467,16 +467,16 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, } #endif -static void vmi_enter_lazy_cpu(void) +static void vmi_start_context_switch(struct task_struct *prev) { - paravirt_enter_lazy_cpu(); + paravirt_start_context_switch(prev); vmi_ops.set_lazy_mode(2); } -static void vmi_leave_lazy_cpu(void) +static void vmi_end_context_switch(struct task_struct *next) { vmi_ops.set_lazy_mode(0); - paravirt_leave_lazy_cpu(); + paravirt_end_context_switch(next); } static void vmi_enter_lazy_mmu(void) @@ -722,9 +722,9 @@ static inline int __init activate_vmi(void) para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask); para_fill(pv_cpu_ops.io_delay, IODelay); - para_wrap(pv_cpu_ops.lazy_mode.enter, vmi_enter_lazy_cpu, + para_wrap(pv_cpu_ops.start_context_switch, vmi_start_context_switch, set_lazy_mode, SetLazyMode); - para_wrap(pv_cpu_ops.lazy_mode.leave, vmi_leave_lazy_cpu, + para_wrap(pv_cpu_ops.end_context_switch, vmi_end_context_switch, set_lazy_mode, SetLazyMode); para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu, diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 41a5562..5287081 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -153,10 +153,10 @@ static void lguest_leave_lazy_mmu_mode(void) paravirt_leave_lazy_mmu(); } -static void lguest_leave_lazy_cpu_mode(void) +static void lguest_end_context_switch(struct task_struct *next) { hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0); - paravirt_leave_lazy_cpu(); + paravirt_end_context_switch(next); } /*G:033 @@ -1031,8 +1031,8 @@ __init void lguest_init(void) pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry; pv_cpu_ops.write_idt_entry = lguest_write_idt_entry; pv_cpu_ops.wbinvd = lguest_wbinvd; - pv_cpu_ops.lazy_mode.enter = paravirt_enter_lazy_cpu; - pv_cpu_ops.lazy_mode.leave = lguest_leave_lazy_cpu_mode; + pv_cpu_ops.start_context_switch = paravirt_start_context_switch; + pv_cpu_ops.end_context_switch = lguest_end_context_switch; /* pagetable management */ pv_mmu_ops.write_cr3 = lguest_write_cr3; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index f586e63..70b355d3a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -203,10 +203,10 @@ static unsigned long xen_get_debugreg(int reg) return HYPERVISOR_get_debugreg(reg); } -static void xen_leave_lazy_cpu(void) +static void xen_end_context_switch(struct task_struct *next) { xen_mc_flush(); - paravirt_leave_lazy_cpu(); + paravirt_end_context_switch(next); } static unsigned long xen_store_tr(void) @@ -817,10 +817,8 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { /* Xen takes care of %gs when switching to usermode for us */ .swapgs = paravirt_nop, - .lazy_mode = { - .enter = paravirt_enter_lazy_cpu, - .leave = xen_leave_lazy_cpu, - }, + .start_context_switch = paravirt_start_context_switch, + .end_context_switch = xen_end_context_switch, }; static const struct pv_apic_ops xen_apic_ops __initdata = { diff --git a/include/asm-frv/pgtable.h b/include/asm-frv/pgtable.h index 235e34a..0988704 100644 --- a/include/asm-frv/pgtable.h +++ b/include/asm-frv/pgtable.h @@ -74,7 +74,7 @@ static inline int pte_file(pte_t pte) { return 0; } #define arch_enter_lazy_mmu_mode() do {} while (0) #define arch_leave_lazy_mmu_mode() do {} while (0) -#define arch_start_context_switch() do {} while (0) +#define arch_start_context_switch(prev) do {} while (0) #else /* !CONFIG_MMU */ /*****************************************************************************/ diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 922f036..e410f60 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -291,7 +291,7 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm, * definition. */ #ifndef __HAVE_ARCH_START_CONTEXT_SWITCH -#define arch_start_context_switch() do {} while (0) +#define arch_start_context_switch(prev) do {} while (0) #endif #ifndef __HAVE_PFNMAP_TRACKING diff --git a/kernel/sched.c b/kernel/sched.c index 7530fdd..133762a 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2746,7 +2746,7 @@ context_switch(struct rq *rq, struct task_struct *prev, * combine the page table reload and the switch backend into * one hypercall. */ - arch_start_context_switch(); + arch_start_context_switch(prev); if (unlikely(!mm)) { next->active_mm = oldmm; -- cgit v0.10.2 From 2829b449276aed45f3d649efb21e3418e39dd5d1 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Feb 2009 23:53:19 -0800 Subject: x86/paravirt: allow preemption with lazy mmu mode Impact: remove obsolete checks, simplification Lift restrictions on preemption with lazy mmu mode, as it is now allowed. Signed-off-by: Jeremy Fitzhardinge Acked-by: Peter Zijlstra diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index cf14375..bf2e86e 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -247,7 +247,6 @@ static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LA static inline void enter_lazy(enum paravirt_lazy_mode mode) { BUG_ON(__get_cpu_var(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); - BUG_ON(preemptible()); __get_cpu_var(paravirt_lazy_mode) = mode; } @@ -255,7 +254,6 @@ static inline void enter_lazy(enum paravirt_lazy_mode mode) static void leave_lazy(enum paravirt_lazy_mode mode) { BUG_ON(__get_cpu_var(paravirt_lazy_mode) != mode); - BUG_ON(preemptible()); __get_cpu_var(paravirt_lazy_mode) = PARAVIRT_LAZY_NONE; } @@ -272,6 +270,8 @@ void paravirt_leave_lazy_mmu(void) void paravirt_start_context_switch(struct task_struct *prev) { + BUG_ON(preemptible()); + if (percpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) { arch_leave_lazy_mmu_mode(); set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES); @@ -281,6 +281,8 @@ void paravirt_start_context_switch(struct task_struct *prev) void paravirt_end_context_switch(struct task_struct *next) { + BUG_ON(preemptible()); + leave_lazy(PARAVIRT_LAZY_CPU); if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES)) @@ -300,7 +302,6 @@ void arch_flush_lazy_mmu_mode(void) preempt_disable(); if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { - WARN_ON(preempt_count() == 1); arch_leave_lazy_mmu_mode(); arch_enter_lazy_mmu_mode(); } diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index f5f8faa..3f2d0fe 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -419,10 +419,6 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags) void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval) { - /* updates to init_mm may be done without lock */ - if (mm == &init_mm) - preempt_disable(); - ADD_STATS(set_pte_at, 1); // ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep)); ADD_STATS(set_pte_at_current, mm == current->mm); @@ -443,9 +439,7 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, } xen_set_pte(ptep, pteval); -out: - if (mm == &init_mm) - preempt_enable(); +out: return; } pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, -- cgit v0.10.2 From 252a6bf2a3a7e7add56b17d48aecf3f3ef213103 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 18 Feb 2009 00:11:28 -0800 Subject: mm: allow preemption in apply_to_pte_range Impact: allow preemption in apply_to_pte_range updates to init_mm Preemption is now allowed for lazy mmu mode, so don't disable it for the inner loop of apply_to_pte_range. This only applies when doing updates to init_mm; user pagetables are still modified under the pte lock, so preemption is disabled anyway. Signed-off-by: Jeremy Fitzhardinge Acked-by: Peter Zijlstra diff --git a/mm/memory.c b/mm/memory.c index b80cc31..baa999e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1718,7 +1718,6 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, BUG_ON(pmd_huge(*pmd)); - preempt_disable(); arch_enter_lazy_mmu_mode(); token = pmd_pgtable(*pmd); @@ -1730,7 +1729,6 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, } while (pte++, addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); - preempt_enable(); if (mm != &init_mm) pte_unmap_unlock(pte-1, ptl); -- cgit v0.10.2 From ab2f75f0b760d2b0c9a875b669a1b51dce02c85a Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 18 Feb 2009 00:18:50 -0800 Subject: x86/paravirt: use percpu_ rather than __get_cpu_var Impact: minor optimisation percpu_read/write is a slightly more direct way of getting to percpu data. Signed-off-by: Jeremy Fitzhardinge diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index bf2e86e..254e8aa 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -246,16 +246,16 @@ static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LA static inline void enter_lazy(enum paravirt_lazy_mode mode) { - BUG_ON(__get_cpu_var(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); + BUG_ON(percpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); - __get_cpu_var(paravirt_lazy_mode) = mode; + percpu_write(paravirt_lazy_mode, mode); } static void leave_lazy(enum paravirt_lazy_mode mode) { - BUG_ON(__get_cpu_var(paravirt_lazy_mode) != mode); + BUG_ON(percpu_read(paravirt_lazy_mode) != mode); - __get_cpu_var(paravirt_lazy_mode) = PARAVIRT_LAZY_NONE; + percpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE); } void paravirt_enter_lazy_mmu(void) @@ -294,7 +294,7 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void) if (in_interrupt()) return PARAVIRT_LAZY_NONE; - return __get_cpu_var(paravirt_lazy_mode); + return percpu_read(paravirt_lazy_mode); } void arch_flush_lazy_mmu_mode(void) -- cgit v0.10.2 From 5caecb9432428241d0c641897f07ff4003f1b55f Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 20 Feb 2009 23:01:26 -0800 Subject: xen: disable preempt for leave_lazy_mmu xen_mc_flush() requires preemption to be disabled for its own sanity, so disable it while we're flushing. Signed-off-by: Jeremy Fitzhardinge diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 3f2d0fe..0e57238 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1812,8 +1812,10 @@ __init void xen_post_allocator_init(void) static void xen_leave_lazy_mmu(void) { + preempt_disable(); xen_mc_flush(); paravirt_leave_lazy_mmu(); + preempt_enable(); } const struct pv_mmu_ops xen_mmu_ops __initdata = { -- cgit v0.10.2 From 59d7187142bbe9b404a403ed0f874d3227305f26 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 26 Feb 2009 15:48:33 -0800 Subject: xen: separate p2m allocation from setting When doing very early p2m setting, we need to separate setting from allocation, so split things up accordingly. Signed-off-by: Jeremy Fitzhardinge diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 0e57238..e0a55b7 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -233,47 +233,74 @@ unsigned long get_phys_to_machine(unsigned long pfn) } EXPORT_SYMBOL_GPL(get_phys_to_machine); -static void alloc_p2m(unsigned long **pp, unsigned long *mfnp) +/* install a new p2m_top page */ +bool install_p2mtop_page(unsigned long pfn, unsigned long *p) { - unsigned long *p; + unsigned topidx = p2m_top_index(pfn); + unsigned long **pfnp, *mfnp; unsigned i; - p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL); - BUG_ON(p == NULL); + pfnp = &p2m_top[topidx]; + mfnp = &p2m_top_mfn[topidx]; for (i = 0; i < P2M_ENTRIES_PER_PAGE; i++) p[i] = INVALID_P2M_ENTRY; - if (cmpxchg(pp, p2m_missing, p) != p2m_missing) - free_page((unsigned long)p); - else + if (cmpxchg(pfnp, p2m_missing, p) == p2m_missing) { *mfnp = virt_to_mfn(p); + return true; + } + + return false; } -void set_phys_to_machine(unsigned long pfn, unsigned long mfn) +static void alloc_p2m(unsigned long pfn) { - unsigned topidx, idx; + unsigned long *p; - if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { - BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); - return; - } + p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL); + BUG_ON(p == NULL); + + if (!install_p2mtop_page(pfn, p)) + free_page((unsigned long)p); +} + +/* Try to install p2m mapping; fail if intermediate bits missing */ +bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) +{ + unsigned topidx, idx; if (unlikely(pfn >= MAX_DOMAIN_PAGES)) { BUG_ON(mfn != INVALID_P2M_ENTRY); - return; + return true; } topidx = p2m_top_index(pfn); if (p2m_top[topidx] == p2m_missing) { - /* no need to allocate a page to store an invalid entry */ if (mfn == INVALID_P2M_ENTRY) - return; - alloc_p2m(&p2m_top[topidx], &p2m_top_mfn[topidx]); + return true; + return false; } idx = p2m_index(pfn); p2m_top[topidx][idx] = mfn; + + return true; +} + +void set_phys_to_machine(unsigned long pfn, unsigned long mfn) +{ + if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { + BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); + return; + } + + if (unlikely(!__set_phys_to_machine(pfn, mfn))) { + alloc_p2m(pfn); + + if (!__set_phys_to_machine(pfn, mfn)) + BUG(); + } } unsigned long arbitrary_virt_to_mfn(void *vaddr) diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index 24d1b44..da73026 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h @@ -11,6 +11,9 @@ enum pt_level { }; +bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); +bool install_p2mtop_page(unsigned long pfn, unsigned long *p); + void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); -- cgit v0.10.2 From a2bcd4731f77cb77ae4b5e4a3d7f5471cf346c33 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 29 Mar 2009 23:47:48 +0200 Subject: x86/mm: further cleanups of fault.c's include file section Impact: cleanup Eliminate more than 20 unnecessary #include lines in fault.c Also fix include file dependency bug in asm/traps.h. (this was masked before, by implicit inclusion) Signed-off-by: Ingo Molnar LKML-Reference: Acked-by: H. Peter Anvin diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 0d53425..37fb07a 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -2,6 +2,7 @@ #define _ASM_X86_TRAPS_H #include +#include /* TRAP_TRACE, ... */ #ifdef CONFIG_X86_32 #define dotraplinkage diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index a03b727..24a36a6 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -3,40 +3,16 @@ * Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs. * Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include +#include /* STACK_END_MAGIC */ +#include /* test_thread_flag(), ... */ +#include /* oops_begin/end, ... */ +#include /* search_exception_table */ +#include /* max_low_pfn */ +#include /* __kprobes, ... */ +#include /* kmmio_handler, ... */ + +#include /* dotraplinkage, ... */ +#include /* pgd_*(), ... */ /* * Page fault error code bits: -- cgit v0.10.2 From 7571a60446030d2576d881438447e86a0755a83b Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 27 Feb 2009 15:34:59 -0800 Subject: xen: split construction of p2m mfn tables from registration Build the p2m_mfn_list_list early with the rest of the p2m table, but register it later when the real shared_info structure is in place. Signed-off-by: Jeremy Fitzhardinge diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index e0a55b7..67d2ab4 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -184,7 +184,7 @@ static inline unsigned p2m_index(unsigned long pfn) } /* Build the parallel p2m_top_mfn structures */ -void xen_setup_mfn_list_list(void) +static void __init xen_build_mfn_list_list(void) { unsigned pfn, idx; @@ -198,7 +198,10 @@ void xen_setup_mfn_list_list(void) unsigned topidx = idx * P2M_ENTRIES_PER_PAGE; p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]); } +} +void xen_setup_mfn_list_list(void) +{ BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = @@ -218,6 +221,8 @@ void __init xen_build_dynamic_phys_to_machine(void) p2m_top[topidx] = &mfn_list[pfn]; } + + xen_build_mfn_list_list(); } unsigned long get_phys_to_machine(unsigned long pfn) -- cgit v0.10.2 From 6ed6bf428aff64fe37cdc54b239d598fee6016f1 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 4 Mar 2009 13:02:18 -0800 Subject: xen: clean up xen_load_gdt Makes the logic a bit clearer. Signed-off-by: Jeremy Fitzhardinge diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 70b355d3a..5776dc2 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -301,10 +301,21 @@ static void xen_load_gdt(const struct desc_ptr *dtr) frames = mcs.args; for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { - frames[f] = arbitrary_virt_to_mfn((void *)va); + int level; + pte_t *ptep = lookup_address(va, &level); + unsigned long pfn, mfn; + void *virt; + + BUG_ON(ptep == NULL); + + pfn = pte_pfn(*ptep); + mfn = pfn_to_mfn(pfn); + virt = __va(PFN_PHYS(pfn)); + + frames[f] = mfn; make_lowmem_page_readonly((void *)va); - make_lowmem_page_readonly(mfn_to_virt(frames[f])); + make_lowmem_page_readonly(virt); } MULTI_set_gdt(mcs.mc, frames, size / sizeof(struct desc_struct)); -- cgit v0.10.2 From 3ce5fa7ebff74b6a4dc5fdcdc22e6979f5a4ff85 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 4 Mar 2009 15:26:00 -0800 Subject: xen: make xen_load_gdt simpler Remove use of multicall machinery which is unused (gdt loading is never performance critical). This removes the implicit use of percpu variables, which simplifies understanding how the percpu code's use of load_gdt interacts with this code. Signed-off-by: Jeremy Fitzhardinge diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 5776dc2..48b399b 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -284,12 +284,11 @@ static void xen_set_ldt(const void *addr, unsigned entries) static void xen_load_gdt(const struct desc_ptr *dtr) { - unsigned long *frames; unsigned long va = dtr->address; unsigned int size = dtr->size + 1; unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; + unsigned long frames[pages]; int f; - struct multicall_space mcs; /* A GDT can be up to 64k in size, which corresponds to 8192 8-byte entries, or 16 4k pages.. */ @@ -297,9 +296,6 @@ static void xen_load_gdt(const struct desc_ptr *dtr) BUG_ON(size > 65536); BUG_ON(va & ~PAGE_MASK); - mcs = xen_mc_entry(sizeof(*frames) * pages); - frames = mcs.args; - for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { int level; pte_t *ptep = lookup_address(va, &level); @@ -314,13 +310,15 @@ static void xen_load_gdt(const struct desc_ptr *dtr) frames[f] = mfn; + printk("xen_load_gdt: %d va=%p mfn=%lx pfn=%lx va'=%p\n", + f, (void *)va, mfn, pfn, virt); + make_lowmem_page_readonly((void *)va); make_lowmem_page_readonly(virt); } - MULTI_set_gdt(mcs.mc, frames, size / sizeof(struct desc_struct)); - - xen_mc_issue(PARAVIRT_LAZY_CPU); + if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct))) + BUG(); } static void load_TLS_descriptor(struct thread_struct *t, -- cgit v0.10.2 From b4b7e58590d0e94ed78bd6be1aa163caba7b6c74 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 4 Mar 2009 16:34:27 -0800 Subject: xen: remove xen_load_gdt debug Don't need the noise. Signed-off-by: Jeremy Fitzhardinge diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 48b399b..75b7a0f 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -310,9 +310,6 @@ static void xen_load_gdt(const struct desc_ptr *dtr) frames[f] = mfn; - printk("xen_load_gdt: %d va=%p mfn=%lx pfn=%lx va'=%p\n", - f, (void *)va, mfn, pfn, virt); - make_lowmem_page_readonly((void *)va); make_lowmem_page_readonly(virt); } -- cgit v0.10.2 From e9e2d1ffcfdb38bed11a3064aa74bea9ee38ed80 Mon Sep 17 00:00:00 2001 From: Hannes Eder Date: Thu, 5 Mar 2009 20:13:57 +0100 Subject: NULL noise: arch/x86/xen/smp.c Fix this sparse warnings: arch/x86/xen/smp.c:316:52: warning: Using plain integer as NULL pointer arch/x86/xen/smp.c:421:60: warning: Using plain integer as NULL pointer Signed-off-by: Hannes Eder Signed-off-by: Jeremy Fitzhardinge diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 8d47056..304d832 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -317,7 +317,7 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) BUG_ON(rc); while(per_cpu(cpu_state, cpu) != CPU_ONLINE) { - HYPERVISOR_sched_op(SCHEDOP_yield, 0); + HYPERVISOR_sched_op(SCHEDOP_yield, NULL); barrier(); } @@ -422,7 +422,7 @@ static void xen_smp_send_call_function_ipi(const struct cpumask *mask) /* Make sure other vcpus get a chance to run if they need to. */ for_each_cpu(cpu, mask) { if (xen_vcpu_stolen(cpu)) { - HYPERVISOR_sched_op(SCHEDOP_yield, 0); + HYPERVISOR_sched_op(SCHEDOP_yield, NULL); break; } } -- cgit v0.10.2 From e826fe1ba1563a9272345da8e3279a930ac160a7 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sat, 7 Mar 2009 17:09:27 -0800 Subject: xen: mask XSAVE from cpuid Xen leaves XSAVE set in cpuid, but doesn't allow cr4.OSXSAVE to be set. This confuses the kernel and it ends up crashing on an xsetbv instruction. At boot time, try to set cr4.OSXSAVE, and mask XSAVE out of cpuid it we can't. This will produce a spurious error from Xen, but allows us to support XSAVE if/when Xen does. This also factors out the cpuid mask decisions to boot time. Signed-off-by: Jeremy Fitzhardinge diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 75b7a0f..da33e0c 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -168,21 +168,23 @@ static void __init xen_banner(void) xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); } +static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0; +static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0; + static void xen_cpuid(unsigned int *ax, unsigned int *bx, unsigned int *cx, unsigned int *dx) { + unsigned maskecx = ~0; unsigned maskedx = ~0; /* * Mask out inconvenient features, to try and disable as many * unsupported kernel subsystems as possible. */ - if (*ax == 1) - maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */ - (1 << X86_FEATURE_ACPI) | /* disable ACPI */ - (1 << X86_FEATURE_MCE) | /* disable MCE */ - (1 << X86_FEATURE_MCA) | /* disable MCA */ - (1 << X86_FEATURE_ACC)); /* thermal monitoring */ + if (*ax == 1) { + maskecx = cpuid_leaf1_ecx_mask; + maskedx = cpuid_leaf1_edx_mask; + } asm(XEN_EMULATE_PREFIX "cpuid" : "=a" (*ax), @@ -190,9 +192,43 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, "=c" (*cx), "=d" (*dx) : "0" (*ax), "2" (*cx)); + + *cx &= maskecx; *dx &= maskedx; } +static __init void xen_init_cpuid_mask(void) +{ + unsigned int ax, bx, cx, dx; + + cpuid_leaf1_edx_mask = + ~((1 << X86_FEATURE_MCE) | /* disable MCE */ + (1 << X86_FEATURE_MCA) | /* disable MCA */ + (1 << X86_FEATURE_ACC)); /* thermal monitoring */ + + if (!xen_initial_domain()) + cpuid_leaf1_edx_mask &= + ~((1 << X86_FEATURE_APIC) | /* disable local APIC */ + (1 << X86_FEATURE_ACPI)); /* disable ACPI */ + + ax = 1; + xen_cpuid(&ax, &bx, &cx, &dx); + + /* cpuid claims we support xsave; try enabling it to see what happens */ + if (cx & (1 << (X86_FEATURE_XSAVE % 32))) { + unsigned long cr4; + + set_in_cr4(X86_CR4_OSXSAVE); + + cr4 = read_cr4(); + + if ((cr4 & X86_CR4_OSXSAVE) == 0) + cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_XSAVE % 32)); + + clear_in_cr4(X86_CR4_OSXSAVE); + } +} + static void xen_set_debugreg(int reg, unsigned long val) { HYPERVISOR_set_debugreg(reg, val); @@ -901,6 +937,8 @@ asmlinkage void __init xen_start_kernel(void) xen_init_irq_ops(); + xen_init_cpuid_mask(); + #ifdef CONFIG_X86_LOCAL_APIC /* * set up the basic apic ops. -- cgit v0.10.2 From 68509cdcde6583ee1a9542899d1270449c7d5903 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sun, 8 Mar 2009 03:59:04 -0700 Subject: x86-64: remove PGE from must-have feature list PGE may not be available when running paravirtualized, so test the cpuid bit before using it. Signed-off-by: Jeremy Fitzhardinge diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index d5cd6c5..a4737dd 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -50,7 +50,7 @@ #ifdef CONFIG_X86_64 #define NEED_PSE 0 #define NEED_MSR (1<<(X86_FEATURE_MSR & 31)) -#define NEED_PGE (1<<(X86_FEATURE_PGE & 31)) +#define NEED_PGE 0 #define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31)) #define NEED_XMM (1<<(X86_FEATURE_XMM & 31)) #define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31)) -- cgit v0.10.2 From 1e7449730853e7c9ae9a2458b2ced7ba12559a0e Mon Sep 17 00:00:00 2001 From: Alex Nixon Date: Mon, 9 Feb 2009 12:05:46 -0800 Subject: Xen: Add virt_to_pfn helper function Signed-off-by: Alex Nixon diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 1a918dd..018a0a4 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -124,7 +124,8 @@ static inline unsigned long mfn_to_local_pfn(unsigned long mfn) /* VIRT <-> MACHINE conversion */ #define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v)))) -#define virt_to_mfn(v) (pfn_to_mfn(PFN_DOWN(__pa(v)))) +#define virt_to_pfn(v) (PFN_DOWN(__pa(v))) +#define virt_to_mfn(v) (pfn_to_mfn(virt_to_pfn(v))) #define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT)) static inline unsigned long pte_mfn(pte_t pte) -- cgit v0.10.2 From 5f241e65f2be4661a33e1937e1c829252a80b2b8 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 16 Mar 2009 17:08:48 -0700 Subject: x86-64: non-paravirt systems always has PSE and PGE A paravirtualized system may not have PSE or PGE available to guests, so they are not required features. However, without paravirt we can assume that any x86-64 implementation will have them available. Signed-off-by: Jeremy Fitzhardinge diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index a4737dd..64cf2d2 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -48,9 +48,15 @@ #endif #ifdef CONFIG_X86_64 +#ifdef CONFIG_PARAVIRT +/* Paravirtualized systems may not have PSE or PGE available */ #define NEED_PSE 0 -#define NEED_MSR (1<<(X86_FEATURE_MSR & 31)) #define NEED_PGE 0 +#else +#define NEED_PSE (1<<(X86_FEATURE_PSE) & 31) +#define NEED_PGE (1<<(X86_FEATURE_PGE) & 31) +#endif +#define NEED_MSR (1<<(X86_FEATURE_MSR & 31)) #define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31)) #define NEED_XMM (1<<(X86_FEATURE_XMM & 31)) #define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31)) -- cgit v0.10.2 From 4185f35404dc96f8525298c7c548aee419f3b3f4 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 17 Mar 2009 13:30:55 -0700 Subject: xen/mmu: some early pagetable cleanups 1. make sure early-allocated ptes are pinned, so they can be later unpinned 2. don't pin pmd+pud, just make them RO 3. scatter some __inits around Signed-off-by: Jeremy Fitzhardinge diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 67d2ab4..df87c80 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1013,7 +1013,7 @@ static __init int xen_mark_pinned(struct mm_struct *mm, struct page *page, return 0; } -void __init xen_mark_init_mm_pinned(void) +static void __init xen_mark_init_mm_pinned(void) { xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP); } @@ -1461,6 +1461,15 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) } #endif +static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) +{ + struct mmuext_op op; + op.cmd = cmd; + op.arg1.mfn = pfn_to_mfn(pfn); + if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) + BUG(); +} + /* Early in boot, while setting up the initial pagetable, assume everything is pinned. */ static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) @@ -1469,22 +1478,29 @@ static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) BUG_ON(mem_map); /* should only be used early */ #endif make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); + pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); +} + +/* Used for pmd and pud */ +static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn) +{ +#ifdef CONFIG_FLATMEM + BUG_ON(mem_map); /* should only be used early */ +#endif + make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); } /* Early release_pte assumes that all pts are pinned, since there's only init_mm and anything attached to that is pinned. */ -static void xen_release_pte_init(unsigned long pfn) +static __init void xen_release_pte_init(unsigned long pfn) { + pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); } -static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) +static __init void xen_release_pmd_init(unsigned long pfn) { - struct mmuext_op op; - op.cmd = cmd; - op.arg1.mfn = pfn_to_mfn(pfn); - if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) - BUG(); + make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); } /* This needs to make sure the new pte page is pinned iff its being @@ -1873,9 +1889,9 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = { .alloc_pte = xen_alloc_pte_init, .release_pte = xen_release_pte_init, - .alloc_pmd = xen_alloc_pte_init, + .alloc_pmd = xen_alloc_pmd_init, .alloc_pmd_clone = paravirt_nop, - .release_pmd = xen_release_pte_init, + .release_pmd = xen_release_pmd_init, #ifdef CONFIG_HIGHPTE .kmap_atomic_pte = xen_kmap_atomic_pte, @@ -1914,8 +1930,8 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = { .make_pud = PV_CALLEE_SAVE(xen_make_pud), .set_pgd = xen_set_pgd_hyper, - .alloc_pud = xen_alloc_pte_init, - .release_pud = xen_release_pte_init, + .alloc_pud = xen_alloc_pmd_init, + .release_pud = xen_release_pmd_init, #endif /* PAGETABLE_LEVELS == 4 */ .activate_mm = xen_activate_mm, diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index f897cdf..5c50a10 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -56,8 +56,6 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id); bool xen_vcpu_stolen(int vcpu); -void xen_mark_init_mm_pinned(void); - void xen_setup_vcpu_info_placement(void); #ifdef CONFIG_SMP -- cgit v0.10.2 From 8de07bbdede03598801cf33ab23dcbcd28a918d2 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 4 Mar 2009 17:36:57 -0800 Subject: xen/mmu: weaken flush_tlb_other test Impact: fixes crashing bug There's no particular problem with getting an empty cpu mask, so just shortcut-return if we get one. Avoids crash reported by Christophe Saout Signed-off-by: Jeremy Fitzhardinge diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index df87c80..e425a32 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1293,8 +1293,8 @@ static void xen_flush_tlb_others(const struct cpumask *cpus, } *args; struct multicall_space mcs; - BUG_ON(cpumask_empty(cpus)); - BUG_ON(!mm); + if (cpumask_empty(cpus)) + return; /* nothing to do */ mcs = xen_mc_entry(sizeof(*args)); args = mcs.args; -- cgit v0.10.2 From 1e6fcf840e11ceff8a656a678c6e4b0560a98e08 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 25 Mar 2009 17:46:42 +0000 Subject: xen: resume interrupts before system devices. Impact: bugfix Xen domain restore Otherwise the first timer interrupt after resume is missed and we never get another. Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index 3ccd348..b703dd2 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -68,15 +68,15 @@ static int xen_suspend(void *data) gnttab_resume(); xen_mm_unpin_all(); - sysdev_resume(); - device_power_up(PMSG_RESUME); - if (!*cancelled) { xen_irq_resume(); xen_console_resume(); xen_timer_resume(); } + sysdev_resume(); + device_power_up(PMSG_RESUME); + return 0; } -- cgit v0.10.2 From 707ebbc81c61eb480d8a51ca61e355e240df1d32 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 27 Mar 2009 11:29:02 -0700 Subject: xen: set _PAGE_NX in __supported_pte_mask before pagetable construction Some 64-bit machines don't support the NX flag in ptes. Check for NX before constructing the kernel pagetables. Signed-off-by: Jeremy Fitzhardinge diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index da33e0c..80f4c53 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -912,7 +913,6 @@ static const struct machine_ops __initdata xen_machine_ops = { .emergency_restart = xen_emergency_restart, }; - /* First C function to be called on Xen boot */ asmlinkage void __init xen_start_kernel(void) { @@ -980,6 +980,11 @@ asmlinkage void __init xen_start_kernel(void) if (!xen_initial_domain()) __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); +#ifdef CONFIG_X86_64 + /* Work out if we support NX */ + check_efer(); +#endif + /* Don't do the full vcpu_info placement stuff until we have a possible map and a non-dummy shared_info. */ per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; -- cgit v0.10.2 From 6d02c42698f99eccb290ac53d4f10ca883b9f90c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sun, 29 Mar 2009 22:57:15 -0700 Subject: xen: clean up gate trap/interrupt constants Use GATE_INTERRUPT/TRAP rather than 0xe/f. Signed-off-by: Jeremy Fitzhardinge diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 80f4c53..12a3159 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -428,7 +428,7 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, static int cvt_gate_to_trap(int vector, const gate_desc *val, struct trap_info *info) { - if (val->type != 0xf && val->type != 0xe) + if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT) return 0; info->vector = vector; @@ -436,8 +436,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val, info->cs = gate_segment(*val); info->flags = val->dpl; /* interrupt gates clear IF */ - if (val->type == 0xe) - info->flags |= 4; + if (val->type == GATE_INTERRUPT) + info->flags |= 1 << 2; return 1; } -- cgit v0.10.2 From d4c045364d3107603187f21a56ec231e74d26441 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Fri, 6 Feb 2009 19:20:31 -0800 Subject: xen: add irq_from_evtchn Given an evtchn, return the corresponding irq. Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 30963af..1cd2a0e 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -151,6 +151,12 @@ static unsigned int evtchn_from_irq(unsigned irq) return info_for_irq(irq)->evtchn; } +unsigned irq_from_evtchn(unsigned int evtchn) +{ + return evtchn_to_irq[evtchn]; +} +EXPORT_SYMBOL_GPL(irq_from_evtchn); + static enum ipi_vector ipi_from_irq(unsigned irq) { struct irq_info *info = info_for_irq(irq); diff --git a/include/xen/events.h b/include/xen/events.h index 0d5f1ad..e68d59a 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -53,4 +53,7 @@ bool xen_test_irq_pending(int irq); irq will be disabled so it won't deliver an interrupt. */ void xen_poll_irq(int irq); +/* Determine the IRQ which is bound to an event channel */ +unsigned irq_from_evtchn(unsigned int evtchn); + #endif /* _XEN_EVENTS_H */ -- cgit v0.10.2 From f7116284c734f3a47180cd9c907944a1837ccb3c Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Fri, 6 Feb 2009 19:21:19 -0800 Subject: xen: add /dev/xen/evtchn driver This driver is used by application which wish to receive notifications from the hypervisor or other guests via Xen's event channel mechanism. In particular it is used by the xenstore daemon in domain 0. Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 526187c..1bbb910 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -18,6 +18,16 @@ config XEN_SCRUB_PAGES secure, but slightly less efficient. If in doubt, say yes. +config XEN_DEV_EVTCHN + tristate "Xen /dev/xen/evtchn device" + depends on XEN + default y + help + The evtchn driver allows a userspace process to triger event + channels and to receive notification of an event channel + firing. + If in doubt, say yes. + config XENFS tristate "Xen filesystem" depends on XEN diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index ff8accc..1567639 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -4,4 +4,5 @@ obj-y += xenbus/ obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o obj-$(CONFIG_XEN_XENCOMM) += xencomm.o obj-$(CONFIG_XEN_BALLOON) += balloon.o -obj-$(CONFIG_XENFS) += xenfs/ \ No newline at end of file +obj-$(CONFIG_XEN_DEV_EVTCHN) += evtchn.o +obj-$(CONFIG_XENFS) += xenfs/ diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c new file mode 100644 index 0000000..517b9ee --- /dev/null +++ b/drivers/xen/evtchn.c @@ -0,0 +1,494 @@ +/****************************************************************************** + * evtchn.c + * + * Driver for receiving and demuxing event-channel signals. + * + * Copyright (c) 2004-2005, K A Fraser + * Multi-process extensions Copyright (c) 2004, Steven Smith + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct per_user_data { + /* Notification ring, accessed via /dev/xen/evtchn. */ +#define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t)) +#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1)) + evtchn_port_t *ring; + unsigned int ring_cons, ring_prod, ring_overflow; + struct mutex ring_cons_mutex; /* protect against concurrent readers */ + + /* Processes wait on this queue when ring is empty. */ + wait_queue_head_t evtchn_wait; + struct fasync_struct *evtchn_async_queue; + const char *name; +}; + +/* Who's bound to each port? */ +static struct per_user_data *port_user[NR_EVENT_CHANNELS]; +static DEFINE_SPINLOCK(port_user_lock); + +irqreturn_t evtchn_interrupt(int irq, void *data) +{ + unsigned int port = (unsigned long)data; + struct per_user_data *u; + + spin_lock(&port_user_lock); + + u = port_user[port]; + + disable_irq_nosync(irq); + + if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) { + u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port; + wmb(); /* Ensure ring contents visible */ + if (u->ring_cons == u->ring_prod++) { + wake_up_interruptible(&u->evtchn_wait); + kill_fasync(&u->evtchn_async_queue, + SIGIO, POLL_IN); + } + } else { + u->ring_overflow = 1; + } + + spin_unlock(&port_user_lock); + + return IRQ_HANDLED; +} + +static ssize_t evtchn_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + int rc; + unsigned int c, p, bytes1 = 0, bytes2 = 0; + struct per_user_data *u = file->private_data; + + /* Whole number of ports. */ + count &= ~(sizeof(evtchn_port_t)-1); + + if (count == 0) + return 0; + + if (count > PAGE_SIZE) + count = PAGE_SIZE; + + for (;;) { + mutex_lock(&u->ring_cons_mutex); + + rc = -EFBIG; + if (u->ring_overflow) + goto unlock_out; + + c = u->ring_cons; + p = u->ring_prod; + if (c != p) + break; + + mutex_unlock(&u->ring_cons_mutex); + + if (file->f_flags & O_NONBLOCK) + return -EAGAIN; + + rc = wait_event_interruptible(u->evtchn_wait, + u->ring_cons != u->ring_prod); + if (rc) + return rc; + } + + /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */ + if (((c ^ p) & EVTCHN_RING_SIZE) != 0) { + bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) * + sizeof(evtchn_port_t); + bytes2 = EVTCHN_RING_MASK(p) * sizeof(evtchn_port_t); + } else { + bytes1 = (p - c) * sizeof(evtchn_port_t); + bytes2 = 0; + } + + /* Truncate chunks according to caller's maximum byte count. */ + if (bytes1 > count) { + bytes1 = count; + bytes2 = 0; + } else if ((bytes1 + bytes2) > count) { + bytes2 = count - bytes1; + } + + rc = -EFAULT; + rmb(); /* Ensure that we see the port before we copy it. */ + if (copy_to_user(buf, &u->ring[EVTCHN_RING_MASK(c)], bytes1) || + ((bytes2 != 0) && + copy_to_user(&buf[bytes1], &u->ring[0], bytes2))) + goto unlock_out; + + u->ring_cons += (bytes1 + bytes2) / sizeof(evtchn_port_t); + rc = bytes1 + bytes2; + + unlock_out: + mutex_unlock(&u->ring_cons_mutex); + return rc; +} + +static ssize_t evtchn_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + int rc, i; + evtchn_port_t *kbuf = (evtchn_port_t *)__get_free_page(GFP_KERNEL); + struct per_user_data *u = file->private_data; + + if (kbuf == NULL) + return -ENOMEM; + + /* Whole number of ports. */ + count &= ~(sizeof(evtchn_port_t)-1); + + rc = 0; + if (count == 0) + goto out; + + if (count > PAGE_SIZE) + count = PAGE_SIZE; + + rc = -EFAULT; + if (copy_from_user(kbuf, buf, count) != 0) + goto out; + + spin_lock_irq(&port_user_lock); + for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) + if ((kbuf[i] < NR_EVENT_CHANNELS) && (port_user[kbuf[i]] == u)) + enable_irq(irq_from_evtchn(kbuf[i])); + spin_unlock_irq(&port_user_lock); + + rc = count; + + out: + free_page((unsigned long)kbuf); + return rc; +} + +static int evtchn_bind_to_user(struct per_user_data *u, int port) +{ + int irq; + int rc = 0; + + spin_lock_irq(&port_user_lock); + + BUG_ON(port_user[port] != NULL); + + irq = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED, + u->name, (void *)(unsigned long)port); + if (rc < 0) + goto fail; + + port_user[port] = u; + +fail: + spin_unlock_irq(&port_user_lock); + return rc; +} + +static void evtchn_unbind_from_user(struct per_user_data *u, int port) +{ + int irq = irq_from_evtchn(port); + + unbind_from_irqhandler(irq, (void *)(unsigned long)port); + port_user[port] = NULL; +} + +static long evtchn_ioctl(struct file *file, + unsigned int cmd, unsigned long arg) +{ + int rc; + struct per_user_data *u = file->private_data; + void __user *uarg = (void __user *) arg; + + switch (cmd) { + case IOCTL_EVTCHN_BIND_VIRQ: { + struct ioctl_evtchn_bind_virq bind; + struct evtchn_bind_virq bind_virq; + + rc = -EFAULT; + if (copy_from_user(&bind, uarg, sizeof(bind))) + break; + + bind_virq.virq = bind.virq; + bind_virq.vcpu = 0; + rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, + &bind_virq); + if (rc != 0) + break; + + rc = evtchn_bind_to_user(u, bind_virq.port); + if (rc == 0) + rc = bind_virq.port; + break; + } + + case IOCTL_EVTCHN_BIND_INTERDOMAIN: { + struct ioctl_evtchn_bind_interdomain bind; + struct evtchn_bind_interdomain bind_interdomain; + + rc = -EFAULT; + if (copy_from_user(&bind, uarg, sizeof(bind))) + break; + + bind_interdomain.remote_dom = bind.remote_domain; + bind_interdomain.remote_port = bind.remote_port; + rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, + &bind_interdomain); + if (rc != 0) + break; + + rc = evtchn_bind_to_user(u, bind_interdomain.local_port); + if (rc == 0) + rc = bind_interdomain.local_port; + break; + } + + case IOCTL_EVTCHN_BIND_UNBOUND_PORT: { + struct ioctl_evtchn_bind_unbound_port bind; + struct evtchn_alloc_unbound alloc_unbound; + + rc = -EFAULT; + if (copy_from_user(&bind, uarg, sizeof(bind))) + break; + + alloc_unbound.dom = DOMID_SELF; + alloc_unbound.remote_dom = bind.remote_domain; + rc = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, + &alloc_unbound); + if (rc != 0) + break; + + rc = evtchn_bind_to_user(u, alloc_unbound.port); + if (rc == 0) + rc = alloc_unbound.port; + break; + } + + case IOCTL_EVTCHN_UNBIND: { + struct ioctl_evtchn_unbind unbind; + + rc = -EFAULT; + if (copy_from_user(&unbind, uarg, sizeof(unbind))) + break; + + rc = -EINVAL; + if (unbind.port >= NR_EVENT_CHANNELS) + break; + + spin_lock_irq(&port_user_lock); + + rc = -ENOTCONN; + if (port_user[unbind.port] != u) { + spin_unlock_irq(&port_user_lock); + break; + } + + evtchn_unbind_from_user(u, unbind.port); + + spin_unlock_irq(&port_user_lock); + + rc = 0; + break; + } + + case IOCTL_EVTCHN_NOTIFY: { + struct ioctl_evtchn_notify notify; + + rc = -EFAULT; + if (copy_from_user(¬ify, uarg, sizeof(notify))) + break; + + if (notify.port >= NR_EVENT_CHANNELS) { + rc = -EINVAL; + } else if (port_user[notify.port] != u) { + rc = -ENOTCONN; + } else { + notify_remote_via_evtchn(notify.port); + rc = 0; + } + break; + } + + case IOCTL_EVTCHN_RESET: { + /* Initialise the ring to empty. Clear errors. */ + mutex_lock(&u->ring_cons_mutex); + spin_lock_irq(&port_user_lock); + u->ring_cons = u->ring_prod = u->ring_overflow = 0; + spin_unlock_irq(&port_user_lock); + mutex_unlock(&u->ring_cons_mutex); + rc = 0; + break; + } + + default: + rc = -ENOSYS; + break; + } + + return rc; +} + +static unsigned int evtchn_poll(struct file *file, poll_table *wait) +{ + unsigned int mask = POLLOUT | POLLWRNORM; + struct per_user_data *u = file->private_data; + + poll_wait(file, &u->evtchn_wait, wait); + if (u->ring_cons != u->ring_prod) + mask |= POLLIN | POLLRDNORM; + if (u->ring_overflow) + mask = POLLERR; + return mask; +} + +static int evtchn_fasync(int fd, struct file *filp, int on) +{ + struct per_user_data *u = filp->private_data; + return fasync_helper(fd, filp, on, &u->evtchn_async_queue); +} + +static int evtchn_open(struct inode *inode, struct file *filp) +{ + struct per_user_data *u; + + u = kzalloc(sizeof(*u), GFP_KERNEL); + if (u == NULL) + return -ENOMEM; + + u->name = kasprintf(GFP_KERNEL, "evtchn:%s", current->comm); + if (u->name == NULL) { + kfree(u); + return -ENOMEM; + } + + init_waitqueue_head(&u->evtchn_wait); + + u->ring = (evtchn_port_t *)__get_free_page(GFP_KERNEL); + if (u->ring == NULL) { + kfree(u->name); + kfree(u); + return -ENOMEM; + } + + mutex_init(&u->ring_cons_mutex); + + filp->private_data = u; + + return 0; +} + +static int evtchn_release(struct inode *inode, struct file *filp) +{ + int i; + struct per_user_data *u = filp->private_data; + + spin_lock_irq(&port_user_lock); + + free_page((unsigned long)u->ring); + + for (i = 0; i < NR_EVENT_CHANNELS; i++) { + if (port_user[i] != u) + continue; + + evtchn_unbind_from_user(port_user[i], i); + } + + spin_unlock_irq(&port_user_lock); + + kfree(u->name); + kfree(u); + + return 0; +} + +static const struct file_operations evtchn_fops = { + .owner = THIS_MODULE, + .read = evtchn_read, + .write = evtchn_write, + .unlocked_ioctl = evtchn_ioctl, + .poll = evtchn_poll, + .fasync = evtchn_fasync, + .open = evtchn_open, + .release = evtchn_release, +}; + +static struct miscdevice evtchn_miscdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "evtchn", + .fops = &evtchn_fops, +}; +static int __init evtchn_init(void) +{ + int err; + + if (!xen_domain()) + return -ENODEV; + + spin_lock_init(&port_user_lock); + memset(port_user, 0, sizeof(port_user)); + + /* Create '/dev/misc/evtchn'. */ + err = misc_register(&evtchn_miscdev); + if (err != 0) { + printk(KERN_ALERT "Could not register /dev/misc/evtchn\n"); + return err; + } + + printk(KERN_INFO "Event-channel device installed.\n"); + + return 0; +} + +static void __exit evtchn_cleanup(void) +{ + misc_deregister(&evtchn_miscdev); +} + +module_init(evtchn_init); +module_exit(evtchn_cleanup); + +MODULE_LICENSE("GPL"); diff --git a/include/xen/evtchn.h b/include/xen/evtchn.h new file mode 100644 index 0000000..14e833e --- /dev/null +++ b/include/xen/evtchn.h @@ -0,0 +1,88 @@ +/****************************************************************************** + * evtchn.h + * + * Interface to /dev/xen/evtchn. + * + * Copyright (c) 2003-2005, K A Fraser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __LINUX_PUBLIC_EVTCHN_H__ +#define __LINUX_PUBLIC_EVTCHN_H__ + +/* + * Bind a fresh port to VIRQ @virq. + * Return allocated port. + */ +#define IOCTL_EVTCHN_BIND_VIRQ \ + _IOC(_IOC_NONE, 'E', 0, sizeof(struct ioctl_evtchn_bind_virq)) +struct ioctl_evtchn_bind_virq { + unsigned int virq; +}; + +/* + * Bind a fresh port to remote <@remote_domain, @remote_port>. + * Return allocated port. + */ +#define IOCTL_EVTCHN_BIND_INTERDOMAIN \ + _IOC(_IOC_NONE, 'E', 1, sizeof(struct ioctl_evtchn_bind_interdomain)) +struct ioctl_evtchn_bind_interdomain { + unsigned int remote_domain, remote_port; +}; + +/* + * Allocate a fresh port for binding to @remote_domain. + * Return allocated port. + */ +#define IOCTL_EVTCHN_BIND_UNBOUND_PORT \ + _IOC(_IOC_NONE, 'E', 2, sizeof(struct ioctl_evtchn_bind_unbound_port)) +struct ioctl_evtchn_bind_unbound_port { + unsigned int remote_domain; +}; + +/* + * Unbind previously allocated @port. + */ +#define IOCTL_EVTCHN_UNBIND \ + _IOC(_IOC_NONE, 'E', 3, sizeof(struct ioctl_evtchn_unbind)) +struct ioctl_evtchn_unbind { + unsigned int port; +}; + +/* + * Unbind previously allocated @port. + */ +#define IOCTL_EVTCHN_NOTIFY \ + _IOC(_IOC_NONE, 'E', 4, sizeof(struct ioctl_evtchn_notify)) +struct ioctl_evtchn_notify { + unsigned int port; +}; + +/* Clear and reinitialise the event buffer. Clear error condition. */ +#define IOCTL_EVTCHN_RESET \ + _IOC(_IOC_NONE, 'E', 5, 0) + +#endif /* __LINUX_PUBLIC_EVTCHN_H__ */ -- cgit v0.10.2 From c5cfef0f79cacc3aa438fc28f4747f0d10c54d0d Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Fri, 6 Feb 2009 19:21:19 -0800 Subject: xen: export ioctl headers to userspace Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge diff --git a/include/Kbuild b/include/Kbuild index d8c3e3c..fe36acc 100644 --- a/include/Kbuild +++ b/include/Kbuild @@ -8,3 +8,4 @@ header-y += mtd/ header-y += rdma/ header-y += video/ header-y += drm/ +header-y += xen/ diff --git a/include/xen/Kbuild b/include/xen/Kbuild new file mode 100644 index 0000000..4e65c16 --- /dev/null +++ b/include/xen/Kbuild @@ -0,0 +1 @@ +header-y += evtchn.h -- cgit v0.10.2 From 0a4666b539a0e896ec4e8396a034a479e3573125 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 12 Feb 2009 13:03:24 -0800 Subject: xen/dev-evtchn: clean up locking in evtchn Define a new per_user_data mutex to serialize bind/unbind operations to prevent them from racing with each other. Fix error returns and don't do a bind while holding a spinlock. Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index 517b9ee..af03195 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -54,6 +54,8 @@ #include struct per_user_data { + struct mutex bind_mutex; /* serialize bind/unbind operations */ + /* Notification ring, accessed via /dev/xen/evtchn. */ #define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t)) #define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1)) @@ -69,7 +71,7 @@ struct per_user_data { /* Who's bound to each port? */ static struct per_user_data *port_user[NR_EVENT_CHANNELS]; -static DEFINE_SPINLOCK(port_user_lock); +static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */ irqreturn_t evtchn_interrupt(int irq, void *data) { @@ -210,22 +212,24 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf, static int evtchn_bind_to_user(struct per_user_data *u, int port) { - int irq; int rc = 0; - spin_lock_irq(&port_user_lock); - + /* + * Ports are never reused, so every caller should pass in a + * unique port. + * + * (Locking not necessary because we haven't registered the + * interrupt handler yet, and our caller has already + * serialized bind operations.) + */ BUG_ON(port_user[port] != NULL); - - irq = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED, - u->name, (void *)(unsigned long)port); - if (rc < 0) - goto fail; - port_user[port] = u; -fail: - spin_unlock_irq(&port_user_lock); + rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED, + u->name, (void *)(unsigned long)port); + if (rc >= 0) + rc = 0; + return rc; } @@ -234,6 +238,10 @@ static void evtchn_unbind_from_user(struct per_user_data *u, int port) int irq = irq_from_evtchn(port); unbind_from_irqhandler(irq, (void *)(unsigned long)port); + + /* make sure we unbind the irq handler before clearing the port */ + barrier(); + port_user[port] = NULL; } @@ -244,6 +252,9 @@ static long evtchn_ioctl(struct file *file, struct per_user_data *u = file->private_data; void __user *uarg = (void __user *) arg; + /* Prevent bind from racing with unbind */ + mutex_lock(&u->bind_mutex); + switch (cmd) { case IOCTL_EVTCHN_BIND_VIRQ: { struct ioctl_evtchn_bind_virq bind; @@ -368,6 +379,7 @@ static long evtchn_ioctl(struct file *file, rc = -ENOSYS; break; } + mutex_unlock(&u->bind_mutex); return rc; } @@ -414,6 +426,7 @@ static int evtchn_open(struct inode *inode, struct file *filp) return -ENOMEM; } + mutex_init(&u->bind_mutex); mutex_init(&u->ring_cons_mutex); filp->private_data = u; -- cgit v0.10.2 From a1ce1be578365a4da7e7d7db4812539d2d5da763 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 9 Feb 2009 12:05:50 -0800 Subject: xen: remove suspend_cancel hook Remove suspend_cancel hook from xenbus_driver, in preparation for using the device model for suspending. Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 773d1cf..bd20361 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -689,27 +689,6 @@ static int suspend_dev(struct device *dev, void *data) return 0; } -static int suspend_cancel_dev(struct device *dev, void *data) -{ - int err = 0; - struct xenbus_driver *drv; - struct xenbus_device *xdev; - - DPRINTK(""); - - if (dev->driver == NULL) - return 0; - drv = to_xenbus_driver(dev->driver); - xdev = container_of(dev, struct xenbus_device, dev); - if (drv->suspend_cancel) - err = drv->suspend_cancel(xdev); - if (err) - printk(KERN_WARNING - "xenbus: suspend_cancel %s failed: %i\n", - dev_name(dev), err); - return 0; -} - static int resume_dev(struct device *dev, void *data) { int err; @@ -777,8 +756,6 @@ EXPORT_SYMBOL_GPL(xenbus_resume); void xenbus_suspend_cancel(void) { xs_suspend_cancel(); - bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_cancel_dev); - xenbus_backend_resume(suspend_cancel_dev); } EXPORT_SYMBOL_GPL(xenbus_suspend_cancel); diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index f87f961..0836772 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -92,7 +92,6 @@ struct xenbus_driver { enum xenbus_state backend_state); int (*remove)(struct xenbus_device *dev); int (*suspend)(struct xenbus_device *dev); - int (*suspend_cancel)(struct xenbus_device *dev); int (*resume)(struct xenbus_device *dev); int (*uevent)(struct xenbus_device *, char **, int, char *, int); struct device_driver driver; -- cgit v0.10.2 From de5b31bd47de7e6f41be2e271318dbc8f1af354d Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 9 Feb 2009 12:05:50 -0800 Subject: xen: use device model for suspending xenbus devices Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index b703dd2..5269bb4 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -104,9 +104,8 @@ static void do_suspend(void) goto out; } - printk("suspending xenbus...\n"); - /* XXX use normal device tree? */ - xenbus_suspend(); + printk(KERN_DEBUG "suspending xenstore...\n"); + xs_suspend(); err = stop_machine(xen_suspend, &cancelled, cpumask_of(0)); if (err) { @@ -116,9 +115,9 @@ static void do_suspend(void) if (!cancelled) { xen_arch_resume(); - xenbus_resume(); + xs_resume(); } else - xenbus_suspend_cancel(); + xs_suspend_cancel(); device_resume(PMSG_RESUME); diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index bd20361..4649213 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -71,6 +71,9 @@ static int xenbus_probe_frontend(const char *type, const char *name); static void xenbus_dev_shutdown(struct device *_dev); +static int xenbus_dev_suspend(struct device *dev, pm_message_t state); +static int xenbus_dev_resume(struct device *dev); + /* If something in array of ids matches this device, return it. */ static const struct xenbus_device_id * match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev) @@ -188,6 +191,9 @@ static struct xen_bus_type xenbus_frontend = { .remove = xenbus_dev_remove, .shutdown = xenbus_dev_shutdown, .dev_attrs = xenbus_dev_attrs, + + .suspend = xenbus_dev_suspend, + .resume = xenbus_dev_resume, }, }; @@ -669,7 +675,7 @@ static struct xenbus_watch fe_watch = { .callback = frontend_changed, }; -static int suspend_dev(struct device *dev, void *data) +static int xenbus_dev_suspend(struct device *dev, pm_message_t state) { int err = 0; struct xenbus_driver *drv; @@ -682,14 +688,14 @@ static int suspend_dev(struct device *dev, void *data) drv = to_xenbus_driver(dev->driver); xdev = container_of(dev, struct xenbus_device, dev); if (drv->suspend) - err = drv->suspend(xdev); + err = drv->suspend(xdev, state); if (err) printk(KERN_WARNING "xenbus: suspend %s failed: %i\n", dev_name(dev), err); return 0; } -static int resume_dev(struct device *dev, void *data) +static int xenbus_dev_resume(struct device *dev) { int err; struct xenbus_driver *drv; @@ -734,31 +740,6 @@ static int resume_dev(struct device *dev, void *data) return 0; } -void xenbus_suspend(void) -{ - DPRINTK(""); - - bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_dev); - xenbus_backend_suspend(suspend_dev); - xs_suspend(); -} -EXPORT_SYMBOL_GPL(xenbus_suspend); - -void xenbus_resume(void) -{ - xb_init_comms(); - xs_resume(); - bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, resume_dev); - xenbus_backend_resume(resume_dev); -} -EXPORT_SYMBOL_GPL(xenbus_resume); - -void xenbus_suspend_cancel(void) -{ - xs_suspend_cancel(); -} -EXPORT_SYMBOL_GPL(xenbus_suspend_cancel); - /* A flag to determine if xenstored is 'ready' (i.e. has started) */ int xenstored_ready = 0; diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index e325eab..eab33f1 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -673,6 +673,8 @@ void xs_resume(void) struct xenbus_watch *watch; char token[sizeof(watch) * 2 + 1]; + xb_init_comms(); + mutex_unlock(&xs_state.response_mutex); mutex_unlock(&xs_state.request_mutex); up_write(&xs_state.transaction_mutex); diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index 0836772..b9763ba 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -91,7 +91,7 @@ struct xenbus_driver { void (*otherend_changed)(struct xenbus_device *dev, enum xenbus_state backend_state); int (*remove)(struct xenbus_device *dev); - int (*suspend)(struct xenbus_device *dev); + int (*suspend)(struct xenbus_device *dev, pm_message_t state); int (*resume)(struct xenbus_device *dev); int (*uevent)(struct xenbus_device *, char **, int, char *, int); struct device_driver driver; -- cgit v0.10.2 From c6a960ce8858f20036cc3afc3b9422670d0d9021 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 9 Feb 2009 12:05:53 -0800 Subject: xen/xenbus: export xenbus_dev_changed Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 4649213..d42e25d 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -660,6 +660,7 @@ void xenbus_dev_changed(const char *node, struct xen_bus_type *bus) kfree(root); } +EXPORT_SYMBOL_GPL(xenbus_dev_changed); static void frontend_changed(struct xenbus_watch *watch, const char **vec, unsigned int len) -- cgit v0.10.2 From cff7e81b3dd7c25cd2248cd7a04c5764552d5d55 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 10 Mar 2009 14:39:59 -0700 Subject: xen: add /sys/hypervisor support Adds support for Xen info under /sys/hypervisor. Taken from Novell 2.6.27 backport tree. Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 526187c..88bca1c 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -41,3 +41,13 @@ config XEN_COMPAT_XENFS a xen platform. If in doubt, say yes. +config XEN_SYS_HYPERVISOR + bool "Create xen entries under /sys/hypervisor" + depends on XEN && SYSFS + select SYS_HYPERVISOR + default y + help + Create entries under /sys/hypervisor describing the Xen + hypervisor environment. When running native or in another + virtual environment, /sys/hypervisor will still be present, + but will have no xen contents. \ No newline at end of file diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index ff8accc..f3603a3 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -4,4 +4,5 @@ obj-y += xenbus/ obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o obj-$(CONFIG_XEN_XENCOMM) += xencomm.o obj-$(CONFIG_XEN_BALLOON) += balloon.o -obj-$(CONFIG_XENFS) += xenfs/ \ No newline at end of file +obj-$(CONFIG_XENFS) += xenfs/ +obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c new file mode 100644 index 0000000..cb29d1c --- /dev/null +++ b/drivers/xen/sys-hypervisor.c @@ -0,0 +1,475 @@ +/* + * copyright (c) 2006 IBM Corporation + * Authored by: Mike D. Day + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include + +#include +#include + +#include +#include +#include + +#define HYPERVISOR_ATTR_RO(_name) \ +static struct hyp_sysfs_attr _name##_attr = __ATTR_RO(_name) + +#define HYPERVISOR_ATTR_RW(_name) \ +static struct hyp_sysfs_attr _name##_attr = \ + __ATTR(_name, 0644, _name##_show, _name##_store) + +struct hyp_sysfs_attr { + struct attribute attr; + ssize_t (*show)(struct hyp_sysfs_attr *, char *); + ssize_t (*store)(struct hyp_sysfs_attr *, const char *, size_t); + void *hyp_attr_data; +}; + +static ssize_t type_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + return sprintf(buffer, "xen\n"); +} + +HYPERVISOR_ATTR_RO(type); + +static int __init xen_sysfs_type_init(void) +{ + return sysfs_create_file(hypervisor_kobj, &type_attr.attr); +} + +static void xen_sysfs_type_destroy(void) +{ + sysfs_remove_file(hypervisor_kobj, &type_attr.attr); +} + +/* xen version attributes */ +static ssize_t major_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int version = HYPERVISOR_xen_version(XENVER_version, NULL); + if (version) + return sprintf(buffer, "%d\n", version >> 16); + return -ENODEV; +} + +HYPERVISOR_ATTR_RO(major); + +static ssize_t minor_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int version = HYPERVISOR_xen_version(XENVER_version, NULL); + if (version) + return sprintf(buffer, "%d\n", version & 0xff); + return -ENODEV; +} + +HYPERVISOR_ATTR_RO(minor); + +static ssize_t extra_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int ret = -ENOMEM; + char *extra; + + extra = kmalloc(XEN_EXTRAVERSION_LEN, GFP_KERNEL); + if (extra) { + ret = HYPERVISOR_xen_version(XENVER_extraversion, extra); + if (!ret) + ret = sprintf(buffer, "%s\n", extra); + kfree(extra); + } + + return ret; +} + +HYPERVISOR_ATTR_RO(extra); + +static struct attribute *version_attrs[] = { + &major_attr.attr, + &minor_attr.attr, + &extra_attr.attr, + NULL +}; + +static struct attribute_group version_group = { + .name = "version", + .attrs = version_attrs, +}; + +static int __init xen_sysfs_version_init(void) +{ + return sysfs_create_group(hypervisor_kobj, &version_group); +} + +static void xen_sysfs_version_destroy(void) +{ + sysfs_remove_group(hypervisor_kobj, &version_group); +} + +/* UUID */ + +static ssize_t uuid_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + char *vm, *val; + int ret; + extern int xenstored_ready; + + if (!xenstored_ready) + return -EBUSY; + + vm = xenbus_read(XBT_NIL, "vm", "", NULL); + if (IS_ERR(vm)) + return PTR_ERR(vm); + val = xenbus_read(XBT_NIL, vm, "uuid", NULL); + kfree(vm); + if (IS_ERR(val)) + return PTR_ERR(val); + ret = sprintf(buffer, "%s\n", val); + kfree(val); + return ret; +} + +HYPERVISOR_ATTR_RO(uuid); + +static int __init xen_sysfs_uuid_init(void) +{ + return sysfs_create_file(hypervisor_kobj, &uuid_attr.attr); +} + +static void xen_sysfs_uuid_destroy(void) +{ + sysfs_remove_file(hypervisor_kobj, &uuid_attr.attr); +} + +/* xen compilation attributes */ + +static ssize_t compiler_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int ret = -ENOMEM; + struct xen_compile_info *info; + + info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL); + if (info) { + ret = HYPERVISOR_xen_version(XENVER_compile_info, info); + if (!ret) + ret = sprintf(buffer, "%s\n", info->compiler); + kfree(info); + } + + return ret; +} + +HYPERVISOR_ATTR_RO(compiler); + +static ssize_t compiled_by_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int ret = -ENOMEM; + struct xen_compile_info *info; + + info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL); + if (info) { + ret = HYPERVISOR_xen_version(XENVER_compile_info, info); + if (!ret) + ret = sprintf(buffer, "%s\n", info->compile_by); + kfree(info); + } + + return ret; +} + +HYPERVISOR_ATTR_RO(compiled_by); + +static ssize_t compile_date_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int ret = -ENOMEM; + struct xen_compile_info *info; + + info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL); + if (info) { + ret = HYPERVISOR_xen_version(XENVER_compile_info, info); + if (!ret) + ret = sprintf(buffer, "%s\n", info->compile_date); + kfree(info); + } + + return ret; +} + +HYPERVISOR_ATTR_RO(compile_date); + +static struct attribute *xen_compile_attrs[] = { + &compiler_attr.attr, + &compiled_by_attr.attr, + &compile_date_attr.attr, + NULL +}; + +static struct attribute_group xen_compilation_group = { + .name = "compilation", + .attrs = xen_compile_attrs, +}; + +int __init static xen_compilation_init(void) +{ + return sysfs_create_group(hypervisor_kobj, &xen_compilation_group); +} + +static void xen_compilation_destroy(void) +{ + sysfs_remove_group(hypervisor_kobj, &xen_compilation_group); +} + +/* xen properties info */ + +static ssize_t capabilities_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int ret = -ENOMEM; + char *caps; + + caps = kmalloc(XEN_CAPABILITIES_INFO_LEN, GFP_KERNEL); + if (caps) { + ret = HYPERVISOR_xen_version(XENVER_capabilities, caps); + if (!ret) + ret = sprintf(buffer, "%s\n", caps); + kfree(caps); + } + + return ret; +} + +HYPERVISOR_ATTR_RO(capabilities); + +static ssize_t changeset_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int ret = -ENOMEM; + char *cset; + + cset = kmalloc(XEN_CHANGESET_INFO_LEN, GFP_KERNEL); + if (cset) { + ret = HYPERVISOR_xen_version(XENVER_changeset, cset); + if (!ret) + ret = sprintf(buffer, "%s\n", cset); + kfree(cset); + } + + return ret; +} + +HYPERVISOR_ATTR_RO(changeset); + +static ssize_t virtual_start_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int ret = -ENOMEM; + struct xen_platform_parameters *parms; + + parms = kmalloc(sizeof(struct xen_platform_parameters), GFP_KERNEL); + if (parms) { + ret = HYPERVISOR_xen_version(XENVER_platform_parameters, + parms); + if (!ret) + ret = sprintf(buffer, "%lx\n", parms->virt_start); + kfree(parms); + } + + return ret; +} + +HYPERVISOR_ATTR_RO(virtual_start); + +static ssize_t pagesize_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + int ret; + + ret = HYPERVISOR_xen_version(XENVER_pagesize, NULL); + if (ret > 0) + ret = sprintf(buffer, "%x\n", ret); + + return ret; +} + +HYPERVISOR_ATTR_RO(pagesize); + +/* eventually there will be several more features to export */ +static ssize_t xen_feature_show(int index, char *buffer) +{ + int ret = -ENOMEM; + struct xen_feature_info *info; + + info = kmalloc(sizeof(struct xen_feature_info), GFP_KERNEL); + if (info) { + info->submap_idx = index; + ret = HYPERVISOR_xen_version(XENVER_get_features, info); + if (!ret) + ret = sprintf(buffer, "%d\n", info->submap); + kfree(info); + } + + return ret; +} + +static ssize_t writable_pt_show(struct hyp_sysfs_attr *attr, char *buffer) +{ + return xen_feature_show(XENFEAT_writable_page_tables, buffer); +} + +HYPERVISOR_ATTR_RO(writable_pt); + +static struct attribute *xen_properties_attrs[] = { + &capabilities_attr.attr, + &changeset_attr.attr, + &virtual_start_attr.attr, + &pagesize_attr.attr, + &writable_pt_attr.attr, + NULL +}; + +static struct attribute_group xen_properties_group = { + .name = "properties", + .attrs = xen_properties_attrs, +}; + +static int __init xen_properties_init(void) +{ + return sysfs_create_group(hypervisor_kobj, &xen_properties_group); +} + +static void xen_properties_destroy(void) +{ + sysfs_remove_group(hypervisor_kobj, &xen_properties_group); +} + +#ifdef CONFIG_KEXEC + +extern size_t vmcoreinfo_size_xen; +extern unsigned long paddr_vmcoreinfo_xen; + +static ssize_t vmcoreinfo_show(struct hyp_sysfs_attr *attr, char *page) +{ + return sprintf(page, "%lx %zx\n", + paddr_vmcoreinfo_xen, vmcoreinfo_size_xen); +} + +HYPERVISOR_ATTR_RO(vmcoreinfo); + +static int __init xen_sysfs_vmcoreinfo_init(void) +{ + return sysfs_create_file(hypervisor_kobj, + &vmcoreinfo_attr.attr); +} + +static void xen_sysfs_vmcoreinfo_destroy(void) +{ + sysfs_remove_file(hypervisor_kobj, &vmcoreinfo_attr.attr); +} + +#endif + +static int __init hyper_sysfs_init(void) +{ + int ret; + + if (!xen_domain()) + return -ENODEV; + + ret = xen_sysfs_type_init(); + if (ret) + goto out; + ret = xen_sysfs_version_init(); + if (ret) + goto version_out; + ret = xen_compilation_init(); + if (ret) + goto comp_out; + ret = xen_sysfs_uuid_init(); + if (ret) + goto uuid_out; + ret = xen_properties_init(); + if (ret) + goto prop_out; +#ifdef CONFIG_KEXEC + if (vmcoreinfo_size_xen != 0) { + ret = xen_sysfs_vmcoreinfo_init(); + if (ret) + goto vmcoreinfo_out; + } +#endif + + goto out; + +#ifdef CONFIG_KEXEC +vmcoreinfo_out: +#endif + xen_properties_destroy(); +prop_out: + xen_sysfs_uuid_destroy(); +uuid_out: + xen_compilation_destroy(); +comp_out: + xen_sysfs_version_destroy(); +version_out: + xen_sysfs_type_destroy(); +out: + return ret; +} + +static void __exit hyper_sysfs_exit(void) +{ +#ifdef CONFIG_KEXEC + if (vmcoreinfo_size_xen != 0) + xen_sysfs_vmcoreinfo_destroy(); +#endif + xen_properties_destroy(); + xen_compilation_destroy(); + xen_sysfs_uuid_destroy(); + xen_sysfs_version_destroy(); + xen_sysfs_type_destroy(); + +} +module_init(hyper_sysfs_init); +module_exit(hyper_sysfs_exit); + +static ssize_t hyp_sysfs_show(struct kobject *kobj, + struct attribute *attr, + char *buffer) +{ + struct hyp_sysfs_attr *hyp_attr; + hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr); + if (hyp_attr->show) + return hyp_attr->show(hyp_attr, buffer); + return 0; +} + +static ssize_t hyp_sysfs_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, + size_t len) +{ + struct hyp_sysfs_attr *hyp_attr; + hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr); + if (hyp_attr->store) + return hyp_attr->store(hyp_attr, buffer, len); + return 0; +} + +static struct sysfs_ops hyp_sysfs_ops = { + .show = hyp_sysfs_show, + .store = hyp_sysfs_store, +}; + +static struct kobj_type hyp_sysfs_kobj_type = { + .sysfs_ops = &hyp_sysfs_ops, +}; + +static int __init hypervisor_subsys_init(void) +{ + if (!xen_domain()) + return -ENODEV; + + hypervisor_kobj->ktype = &hyp_sysfs_kobj_type; + return 0; +} +device_initcall(hypervisor_subsys_init); diff --git a/include/xen/interface/version.h b/include/xen/interface/version.h index 453235e..e8b6519 100644 --- a/include/xen/interface/version.h +++ b/include/xen/interface/version.h @@ -57,4 +57,7 @@ struct xen_feature_info { /* Declares the features reported by XENVER_get_features. */ #include "features.h" +/* arg == NULL; returns host memory page size. */ +#define XENVER_pagesize 7 + #endif /* __XEN_PUBLIC_VERSION_H__ */ -- cgit v0.10.2 From a649b720614d5675dc402bef75a92576143fede7 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 10 Mar 2009 17:17:41 -0700 Subject: xen/sys/hypervisor: change writable_pt to features /sys/hypervisor/properties/writable_pt was misnamed. Rename to features, expressed as a bit array in hex. Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c index cb29d1c..1267d6f 100644 --- a/drivers/xen/sys-hypervisor.c +++ b/drivers/xen/sys-hypervisor.c @@ -293,37 +293,48 @@ static ssize_t pagesize_show(struct hyp_sysfs_attr *attr, char *buffer) HYPERVISOR_ATTR_RO(pagesize); -/* eventually there will be several more features to export */ static ssize_t xen_feature_show(int index, char *buffer) { - int ret = -ENOMEM; - struct xen_feature_info *info; + ssize_t ret; + struct xen_feature_info info; - info = kmalloc(sizeof(struct xen_feature_info), GFP_KERNEL); - if (info) { - info->submap_idx = index; - ret = HYPERVISOR_xen_version(XENVER_get_features, info); - if (!ret) - ret = sprintf(buffer, "%d\n", info->submap); - kfree(info); - } + info.submap_idx = index; + ret = HYPERVISOR_xen_version(XENVER_get_features, &info); + if (!ret) + ret = sprintf(buffer, "%08x", info.submap); return ret; } -static ssize_t writable_pt_show(struct hyp_sysfs_attr *attr, char *buffer) +static ssize_t features_show(struct hyp_sysfs_attr *attr, char *buffer) { - return xen_feature_show(XENFEAT_writable_page_tables, buffer); + ssize_t len; + int i; + + len = 0; + for (i = XENFEAT_NR_SUBMAPS-1; i >= 0; i--) { + int ret = xen_feature_show(i, buffer + len); + if (ret < 0) { + if (len == 0) + len = ret; + break; + } + len += ret; + } + if (len > 0) + buffer[len++] = '\n'; + + return len; } -HYPERVISOR_ATTR_RO(writable_pt); +HYPERVISOR_ATTR_RO(features); static struct attribute *xen_properties_attrs[] = { &capabilities_attr.attr, &changeset_attr.attr, &virtual_start_attr.attr, &pagesize_attr.attr, - &writable_pt_attr.attr, + &features_attr.attr, NULL }; -- cgit v0.10.2 From f0783708bf63a2827863cf2be57c08a24843e6bd Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 11 Mar 2009 10:19:54 +0000 Subject: xen: drop kexec bits from /sys/hypervisor since kexec isn't implemented yet I needed this to compile since there is no kexec yet in pvops kernel CC drivers/xen/sys-hypervisor.o drivers/xen/sys-hypervisor.c: In function 'hyper_sysfs_init': drivers/xen/sys-hypervisor.c:405: error: 'vmcoreinfo_size_xen' undeclared (first use in this function) drivers/xen/sys-hypervisor.c:405: error: (Each undeclared identifier is reported only once drivers/xen/sys-hypervisor.c:405: error: for each function it appears in.) drivers/xen/sys-hypervisor.c:406: error: implicit declaration of function 'xen_sysfs_vmcoreinfo_init' drivers/xen/sys-hypervisor.c: In function 'hyper_sysfs_exit': drivers/xen/sys-hypervisor.c:433: error: 'vmcoreinfo_size_xen' undeclared (first use in this function) drivers/xen/sys-hypervisor.c:434: error: implicit declaration of function 'xen_sysfs_vmcoreinfo_destroy' Signed-off-by: Ian Campbell diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c index 1267d6f..88a60e0 100644 --- a/drivers/xen/sys-hypervisor.c +++ b/drivers/xen/sys-hypervisor.c @@ -353,32 +353,6 @@ static void xen_properties_destroy(void) sysfs_remove_group(hypervisor_kobj, &xen_properties_group); } -#ifdef CONFIG_KEXEC - -extern size_t vmcoreinfo_size_xen; -extern unsigned long paddr_vmcoreinfo_xen; - -static ssize_t vmcoreinfo_show(struct hyp_sysfs_attr *attr, char *page) -{ - return sprintf(page, "%lx %zx\n", - paddr_vmcoreinfo_xen, vmcoreinfo_size_xen); -} - -HYPERVISOR_ATTR_RO(vmcoreinfo); - -static int __init xen_sysfs_vmcoreinfo_init(void) -{ - return sysfs_create_file(hypervisor_kobj, - &vmcoreinfo_attr.attr); -} - -static void xen_sysfs_vmcoreinfo_destroy(void) -{ - sysfs_remove_file(hypervisor_kobj, &vmcoreinfo_attr.attr); -} - -#endif - static int __init hyper_sysfs_init(void) { int ret; @@ -401,20 +375,9 @@ static int __init hyper_sysfs_init(void) ret = xen_properties_init(); if (ret) goto prop_out; -#ifdef CONFIG_KEXEC - if (vmcoreinfo_size_xen != 0) { - ret = xen_sysfs_vmcoreinfo_init(); - if (ret) - goto vmcoreinfo_out; - } -#endif goto out; -#ifdef CONFIG_KEXEC -vmcoreinfo_out: -#endif - xen_properties_destroy(); prop_out: xen_sysfs_uuid_destroy(); uuid_out: @@ -429,10 +392,6 @@ out: static void __exit hyper_sysfs_exit(void) { -#ifdef CONFIG_KEXEC - if (vmcoreinfo_size_xen != 0) - xen_sysfs_vmcoreinfo_destroy(); -#endif xen_properties_destroy(); xen_compilation_destroy(); xen_sysfs_uuid_destroy(); -- cgit v0.10.2 From 818fd20673df82031e604bb784d836f1fc2e2451 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 6 Feb 2009 18:46:47 -0800 Subject: xen: add "capabilities" file The xenfs capabilities file allows usermode to determine what capabilities the domain has. The only one at present is "control_d" in a privileged domain. Signed-off-by: Jeremy Fitzhardinge diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c index 515741a..6559e0c 100644 --- a/drivers/xen/xenfs/super.c +++ b/drivers/xen/xenfs/super.c @@ -20,10 +20,27 @@ MODULE_DESCRIPTION("Xen filesystem"); MODULE_LICENSE("GPL"); +static ssize_t capabilities_read(struct file *file, char __user *buf, + size_t size, loff_t *off) +{ + char *tmp = ""; + + if (xen_initial_domain()) + tmp = "control_d\n"; + + return simple_read_from_buffer(buf, size, off, tmp, strlen(tmp)); +} + +static const struct file_operations capabilities_file_ops = { + .read = capabilities_read, +}; + static int xenfs_fill_super(struct super_block *sb, void *data, int silent) { static struct tree_descr xenfs_files[] = { - [2] = {"xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR}, + [1] = {}, + { "xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR }, + { "capabilities", &capabilities_file_ops, S_IRUGO }, {""}, }; -- cgit v0.10.2 From 8a6f83afd0c5355db6d11394a798e94950306239 Mon Sep 17 00:00:00 2001 From: KaiGai Kohei Date: Wed, 1 Apr 2009 10:07:57 +0900 Subject: Permissive domain in userspace object manager This patch enables applications to handle permissive domain correctly. Since the v2.6.26 kernel, SELinux has supported an idea of permissive domain which allows certain processes to work as if permissive mode, even if the global setting is enforcing mode. However, we don't have an application program interface to inform what domains are permissive one, and what domains are not. It means applications focuses on SELinux (XACE/SELinux, SE-PostgreSQL and so on) cannot handle permissive domain correctly. This patch add the sixth field (flags) on the reply of the /selinux/access interface which is used to make an access control decision from userspace. If the first bit of the flags field is positive, it means the required access control decision is on permissive domain, so application should allow any required actions, as the kernel doing. This patch also has a side benefit. The av_decision.flags is set at context_struct_compute_av(). It enables to check required permissions without read_lock(&policy_rwlock). Signed-off-by: KaiGai Kohei Acked-by: Stephen Smalley Acked-by: Eric Paris -- security/selinux/avc.c | 2 +- security/selinux/include/security.h | 4 +++- security/selinux/selinuxfs.c | 4 ++-- security/selinux/ss/services.c | 30 +++++------------------------- 4 files changed, 11 insertions(+), 29 deletions(-) Signed-off-by: James Morris diff --git a/security/selinux/avc.c b/security/selinux/avc.c index 7f9b5fa..b2ab608 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -927,7 +927,7 @@ int avc_has_perm_noaudit(u32 ssid, u32 tsid, if (denied) { if (flags & AVC_STRICT) rc = -EACCES; - else if (!selinux_enforcing || security_permissive_sid(ssid)) + else if (!selinux_enforcing || (avd->flags & AVD_FLAGS_PERMISSIVE)) avc_update_node(AVC_CALLBACK_GRANT, requested, ssid, tsid, tclass, avd->seqno); else diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index 5c3434f..a7be3f0 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -91,9 +91,11 @@ struct av_decision { u32 auditallow; u32 auditdeny; u32 seqno; + u32 flags; }; -int security_permissive_sid(u32 sid); +/* definitions of av_decision.flags */ +#define AVD_FLAGS_PERMISSIVE 0x0001 int security_compute_av(u32 ssid, u32 tsid, u16 tclass, u32 requested, diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 2d5136e..8d4007f 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -527,10 +527,10 @@ static ssize_t sel_write_access(struct file *file, char *buf, size_t size) goto out2; length = scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, - "%x %x %x %x %u", + "%x %x %x %x %u %x", avd.allowed, 0xffffffff, avd.auditallow, avd.auditdeny, - avd.seqno); + avd.seqno, avd.flags); out2: kfree(tcon); out: diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index deeec6c..500e6f7 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -410,6 +410,7 @@ static int context_struct_compute_av(struct context *scontext, avd->auditallow = 0; avd->auditdeny = 0xffffffff; avd->seqno = latest_granting; + avd->flags = 0; /* * Check for all the invalid cases. @@ -528,31 +529,6 @@ inval_class: return 0; } -/* - * Given a sid find if the type has the permissive flag set - */ -int security_permissive_sid(u32 sid) -{ - struct context *context; - u32 type; - int rc; - - read_lock(&policy_rwlock); - - context = sidtab_search(&sidtab, sid); - BUG_ON(!context); - - type = context->type; - /* - * we are intentionally using type here, not type-1, the 0th bit may - * someday indicate that we are globally setting permissive in policy. - */ - rc = ebitmap_get_bit(&policydb.permissive_map, type); - - read_unlock(&policy_rwlock); - return rc; -} - static int security_validtrans_handle_fail(struct context *ocontext, struct context *ncontext, struct context *tcontext, @@ -767,6 +743,10 @@ int security_compute_av(u32 ssid, rc = context_struct_compute_av(scontext, tcontext, tclass, requested, avd); + + /* permissive domain? */ + if (ebitmap_get_bit(&policydb.permissive_map, scontext->type)) + avd->flags |= AVD_FLAGS_PERMISSIVE; out: read_unlock(&policy_rwlock); return rc; -- cgit v0.10.2 From 53152f957d4a5dfd537d17c823afeb1a2c03753e Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Thu, 2 Apr 2009 13:24:28 +0100 Subject: xen: honour VCPU availability on boot If a VM is booted with offline VCPUs then unplug them during boot. Determining the availability of a VCPU requires access to XenStore which is not available at the point smp_prepare_cpus() is called, therefore we bring up all VCPUS initially and unplug the offline ones as soon as XenStore becomes available. Signed-off-by: Ian Campbell diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c index 974f56d..411cb1f 100644 --- a/drivers/xen/cpu_hotplug.c +++ b/drivers/xen/cpu_hotplug.c @@ -21,29 +21,41 @@ static void disable_hotplug_cpu(int cpu) cpu_clear(cpu, cpu_present_map); } -static void vcpu_hotplug(unsigned int cpu) +static int vcpu_online(unsigned int cpu) { int err; char dir[32], state[32]; - if (!cpu_possible(cpu)) - return; - sprintf(dir, "cpu/%u", cpu); err = xenbus_scanf(XBT_NIL, dir, "availability", "%s", state); if (err != 1) { printk(KERN_ERR "XENBUS: Unable to read cpu state\n"); - return; + return err; } - if (strcmp(state, "online") == 0) { + if (strcmp(state, "online") == 0) + return 1; + else if (strcmp(state, "offline") == 0) + return 0; + + printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n", state, cpu); + return -EINVAL; +} +static void vcpu_hotplug(unsigned int cpu) +{ + if (!cpu_possible(cpu)) + return; + + switch (vcpu_online(cpu)) { + case 1: enable_hotplug_cpu(cpu); - } else if (strcmp(state, "offline") == 0) { + break; + case 0: (void)cpu_down(cpu); disable_hotplug_cpu(cpu); - } else { - printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n", - state, cpu); + break; + default: + break; } } @@ -64,12 +76,20 @@ static void handle_vcpu_hotplug_event(struct xenbus_watch *watch, static int setup_cpu_watcher(struct notifier_block *notifier, unsigned long event, void *data) { + int cpu; static struct xenbus_watch cpu_watch = { .node = "cpu", .callback = handle_vcpu_hotplug_event}; (void)register_xenbus_watch(&cpu_watch); + for_each_possible_cpu(cpu) { + if (vcpu_online(cpu) == 0) { + (void)cpu_down(cpu); + cpu_clear(cpu, cpu_present_map); + } + } + return NOTIFY_DONE; } -- cgit v0.10.2 From 3d43321b7015387cfebbe26436d0e9d299162ea1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 2 Apr 2009 15:49:29 -0700 Subject: modules: sysctl to block module loading Implement a sysctl file that disables module-loading system-wide since there is no longer a viable way to remove CAP_SYS_MODULE after the system bounding capability set was removed in 2.6.25. Value can only be set to "1", and is tested only if standard capability checks allow CAP_SYS_MODULE. Given existing /dev/mem protections, this should allow administrators a one-way method to block module loading after initial boot-time module loading has finished. Signed-off-by: Kees Cook Acked-by: Serge Hallyn Signed-off-by: James Morris diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index a4ccdd1..02b1349 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -30,6 +30,7 @@ show up in /proc/sys/kernel: - kstack_depth_to_print [ X86 only ] - l2cr [ PPC only ] - modprobe ==> Documentation/debugging-modules.txt +- modules_disabled - msgmax - msgmnb - msgmni @@ -179,6 +180,16 @@ kernel stack. ============================================================== +modules_disabled: + +A toggle value indicating if modules are allowed to be loaded +in an otherwise modular kernel. This toggle defaults to off +(0), but can be set true (1). Once true, modules can be +neither loaded nor unloaded, and the toggle cannot be set back +to false. + +============================================================== + osrelease, ostype & version: # cat osrelease diff --git a/kernel/module.c b/kernel/module.c index f77ac32..eeb3f7b 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -778,6 +778,9 @@ static void wait_for_zero_refcount(struct module *mod) mutex_lock(&module_mutex); } +/* Block module loading/unloading? */ +int modules_disabled = 0; + SYSCALL_DEFINE2(delete_module, const char __user *, name_user, unsigned int, flags) { @@ -785,7 +788,7 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, char name[MODULE_NAME_LEN]; int ret, forced = 0; - if (!capable(CAP_SYS_MODULE)) + if (!capable(CAP_SYS_MODULE) || modules_disabled) return -EPERM; if (strncpy_from_user(name, name_user, MODULE_NAME_LEN-1) < 0) @@ -2349,7 +2352,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, int ret = 0; /* Must have permission */ - if (!capable(CAP_SYS_MODULE)) + if (!capable(CAP_SYS_MODULE) || modules_disabled) return -EPERM; /* Only one module load at a time, please */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c5ef44f..2fb4246 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -113,6 +113,7 @@ static int ngroups_max = NGROUPS_MAX; #ifdef CONFIG_MODULES extern char modprobe_path[]; +extern int modules_disabled; #endif #ifdef CONFIG_CHR_DEV_SG extern int sg_big_buff; @@ -533,6 +534,17 @@ static struct ctl_table kern_table[] = { .proc_handler = &proc_dostring, .strategy = &sysctl_string, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "modules_disabled", + .data = &modules_disabled, + .maxlen = sizeof(int), + .mode = 0644, + /* only handle a transition from default "0" to "1" */ + .proc_handler = &proc_dointvec_minmax, + .extra1 = &one, + .extra2 = &one, + }, #endif #if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET) { -- cgit v0.10.2 From b5f22a59c0356655a501190959db9f7f5dd07e3f Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Thu, 2 Apr 2009 18:47:14 -0500 Subject: don't raise all privs on setuid-root file with fE set (v2) Distributions face a backward compatibility problem with starting to use file capabilities. For instance, removing setuid root from ping and doing setcap cap_net_raw=pe means that booting with an older kernel or one compiled without file capabilities means ping won't work for non-root users. In order to replace the setuid root bit on a capability-unaware program, one has to set the effective, or legacy, file capability, which makes the capability effective immediately. This patch uses the legacy bit as a queue to not automatically add full privilege to a setuid-root program. So, with this patch, an ordinary setuid-root program will run with privilege. But if /bin/ping has both setuid-root and cap_net_raw in fP and fE, then ping (when run by non-root user) will not run with only cap_net_raw. Changelog: Apr 2 2009: Print a message once when such a binary is loaded, as per James Morris' suggestion. Apr 2 2009: Fix the condition to only catch uid!=0 && euid==0. Signed-off-by: Serge E. Hallyn Acked-by: Casey Schaufler Signed-off-by: James Morris diff --git a/security/commoncap.c b/security/commoncap.c index 7cd61a5..97ac1f1 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -28,6 +28,28 @@ #include #include +/* + * If a non-root user executes a setuid-root binary in + * !secure(SECURE_NOROOT) mode, then we raise capabilities. + * However if fE is also set, then the intent is for only + * the file capabilities to be applied, and the setuid-root + * bit is left on either to change the uid (plausible) or + * to get full privilege on a kernel without file capabilities + * support. So in that case we do not raise capabilities. + * + * Warn if that happens, once per boot. + */ +static void warn_setuid_and_fcaps_mixed(char *fname) +{ + static int warned; + if (!warned) { + printk(KERN_INFO "warning: `%s' has both setuid-root and" + " effective capabilities. Therefore not raising all" + " capabilities.\n", fname); + warned = 1; + } +} + int cap_netlink_send(struct sock *sk, struct sk_buff *skb) { NETLINK_CB(skb).eff_cap = current_cap(); @@ -464,6 +486,15 @@ int cap_bprm_set_creds(struct linux_binprm *bprm) if (!issecure(SECURE_NOROOT)) { /* + * If the legacy file capability is set, then don't set privs + * for a setuid root binary run by a non-root user. Do set it + * for a root user just to cause least surprise to an admin. + */ + if (effective && new->uid != 0 && new->euid == 0) { + warn_setuid_and_fcaps_mixed(bprm->filename); + goto skip; + } + /* * To support inheritance of root-permissions and suid-root * executables under compatibility mode, we override the * capability sets for the file. @@ -478,6 +509,7 @@ int cap_bprm_set_creds(struct linux_binprm *bprm) if (new->euid == 0) effective = true; } +skip: /* Don't let someone trace a set[ug]id/setpcap binary with the revised * credentials unless they have the appropriate permit -- cgit v0.10.2 From 31c9a24ec82926fcae49483e53566d231e705057 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 2 Apr 2009 21:06:25 -0700 Subject: RCU: make treercu be default Impact: switch default config from CLASSIC_RCU to TREE_RCU Given that I have not gotten any complaints or bug reports on treercu recently, this patch makes it be the default. There are a number of other defconfig files that explicitly call out CLASSIC_RCU, but which have comment headers saying not to edit them. Probably holdovers from one of the flavors of "make config", but... Signed-off-by: Paul E. McKenney Cc: akpm@linux-foundation.org Cc: dipankar@in.ibm.com Cc: niv@us.ibm.com Cc: manfred@colorfullife.com Cc: peterz@infradead.org LKML-Reference: <20090403040625.GA9473@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar diff --git a/init/Kconfig b/init/Kconfig index 14c483d..26ac877 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -302,7 +302,7 @@ menu "RCU Subsystem" choice prompt "RCU Implementation" - default CLASSIC_RCU + default TREE_RCU config CLASSIC_RCU bool "Classic RCU" -- cgit v0.10.2 From 4b166da939012905f4c36fedada62067db31948e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Gl=C3=B6ckner?= Date: Sat, 28 Mar 2009 19:47:01 +0100 Subject: ASoC: Add driver for s6000 I2S interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds a driver for the I2S interface found on Stretch s6000 family processors. Signed-off-by: Daniel Glöckner Signed-off-by: Mark Brown diff --git a/sound/soc/Kconfig b/sound/soc/Kconfig index 3d2bb6f..3304f9d 100644 --- a/sound/soc/Kconfig +++ b/sound/soc/Kconfig @@ -32,6 +32,7 @@ source "sound/soc/fsl/Kconfig" source "sound/soc/omap/Kconfig" source "sound/soc/pxa/Kconfig" source "sound/soc/s3c24xx/Kconfig" +source "sound/soc/s6000/Kconfig" source "sound/soc/sh/Kconfig" # Supported codecs diff --git a/sound/soc/Makefile b/sound/soc/Makefile index 0237879..8943a14 100644 --- a/sound/soc/Makefile +++ b/sound/soc/Makefile @@ -10,4 +10,5 @@ obj-$(CONFIG_SND_SOC) += fsl/ obj-$(CONFIG_SND_SOC) += omap/ obj-$(CONFIG_SND_SOC) += pxa/ obj-$(CONFIG_SND_SOC) += s3c24xx/ +obj-$(CONFIG_SND_SOC) += s6000/ obj-$(CONFIG_SND_SOC) += sh/ diff --git a/sound/soc/s6000/Kconfig b/sound/soc/s6000/Kconfig new file mode 100644 index 0000000..4bfc8bc --- /dev/null +++ b/sound/soc/s6000/Kconfig @@ -0,0 +1,10 @@ +config SND_S6000_SOC + tristate "SoC Audio for the Stretch s6000 family" + depends on XTENSA_VARIANT_S6000 + help + Say Y or M if you want to add support for codecs attached to + s6000 family chips. You will also need to select the platform + to support below. + +config SND_S6000_SOC_I2S + tristate diff --git a/sound/soc/s6000/Makefile b/sound/soc/s6000/Makefile new file mode 100644 index 0000000..df15f87 --- /dev/null +++ b/sound/soc/s6000/Makefile @@ -0,0 +1,6 @@ +# s6000 Platform Support +snd-soc-s6000-objs := s6000-pcm.o +snd-soc-s6000-i2s-objs := s6000-i2s.o + +obj-$(CONFIG_SND_S6000_SOC) += snd-soc-s6000.o +obj-$(CONFIG_SND_S6000_SOC_I2S) += snd-soc-s6000-i2s.o diff --git a/sound/soc/s6000/s6000-i2s.c b/sound/soc/s6000/s6000-i2s.c new file mode 100644 index 0000000..dcc7904 --- /dev/null +++ b/sound/soc/s6000/s6000-i2s.c @@ -0,0 +1,629 @@ +/* + * ALSA SoC I2S Audio Layer for the Stretch S6000 family + * + * Author: Daniel Gloeckner, + * Copyright: (C) 2009 emlix GmbH + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "s6000-i2s.h" +#include "s6000-pcm.h" + +struct s6000_i2s_dev { + dma_addr_t sifbase; + u8 __iomem *scbbase; + unsigned int wide; + unsigned int channel_in; + unsigned int channel_out; + unsigned int lines_in; + unsigned int lines_out; + struct s6000_pcm_dma_params dma_params; +}; + +#define S6_I2S_INTERRUPT_STATUS 0x00 +#define S6_I2S_INT_OVERRUN 1 +#define S6_I2S_INT_UNDERRUN 2 +#define S6_I2S_INT_ALIGNMENT 4 +#define S6_I2S_INTERRUPT_ENABLE 0x04 +#define S6_I2S_INTERRUPT_RAW 0x08 +#define S6_I2S_INTERRUPT_CLEAR 0x0C +#define S6_I2S_INTERRUPT_SET 0x10 +#define S6_I2S_MODE 0x20 +#define S6_I2S_DUAL 0 +#define S6_I2S_WIDE 1 +#define S6_I2S_TX_DEFAULT 0x24 +#define S6_I2S_DATA_CFG(c) (0x40 + 0x10 * (c)) +#define S6_I2S_IN 0 +#define S6_I2S_OUT 1 +#define S6_I2S_UNUSED 2 +#define S6_I2S_INTERFACE_CFG(c) (0x44 + 0x10 * (c)) +#define S6_I2S_DIV_MASK 0x001fff +#define S6_I2S_16BIT 0x000000 +#define S6_I2S_20BIT 0x002000 +#define S6_I2S_24BIT 0x004000 +#define S6_I2S_32BIT 0x006000 +#define S6_I2S_BITS_MASK 0x006000 +#define S6_I2S_MEM_16BIT 0x000000 +#define S6_I2S_MEM_32BIT 0x008000 +#define S6_I2S_MEM_MASK 0x008000 +#define S6_I2S_CHANNELS_SHIFT 16 +#define S6_I2S_CHANNELS_MASK 0x030000 +#define S6_I2S_SCK_IN 0x000000 +#define S6_I2S_SCK_OUT 0x040000 +#define S6_I2S_SCK_DIR 0x040000 +#define S6_I2S_WS_IN 0x000000 +#define S6_I2S_WS_OUT 0x080000 +#define S6_I2S_WS_DIR 0x080000 +#define S6_I2S_LEFT_FIRST 0x000000 +#define S6_I2S_RIGHT_FIRST 0x100000 +#define S6_I2S_FIRST 0x100000 +#define S6_I2S_CUR_SCK 0x200000 +#define S6_I2S_CUR_WS 0x400000 +#define S6_I2S_ENABLE(c) (0x48 + 0x10 * (c)) +#define S6_I2S_DISABLE_IF 0x02 +#define S6_I2S_ENABLE_IF 0x03 +#define S6_I2S_IS_BUSY 0x04 +#define S6_I2S_DMA_ACTIVE 0x08 +#define S6_I2S_IS_ENABLED 0x10 + +#define S6_I2S_NUM_LINES 4 + +#define S6_I2S_SIF_PORT0 0x0000000 +#define S6_I2S_SIF_PORT1 0x0000080 /* docs say 0x0000010 */ + +static inline void s6_i2s_write_reg(struct s6000_i2s_dev *dev, int reg, u32 val) +{ + writel(val, dev->scbbase + reg); +} + +static inline u32 s6_i2s_read_reg(struct s6000_i2s_dev *dev, int reg) +{ + return readl(dev->scbbase + reg); +} + +static inline void s6_i2s_mod_reg(struct s6000_i2s_dev *dev, int reg, + u32 mask, u32 val) +{ + val ^= s6_i2s_read_reg(dev, reg) & ~mask; + s6_i2s_write_reg(dev, reg, val); +} + +static void s6000_i2s_start_channel(struct s6000_i2s_dev *dev, int channel) +{ + int i, j, cur, prev; + + /* + * Wait for WCLK to toggle 5 times before enabling the channel + * s6000 Family Datasheet 3.6.4: + * "At least two cycles of WS must occur between commands + * to disable or enable the interface" + */ + j = 0; + prev = ~S6_I2S_CUR_WS; + for (i = 1000000; --i && j < 6; ) { + cur = s6_i2s_read_reg(dev, S6_I2S_INTERFACE_CFG(channel)) + & S6_I2S_CUR_WS; + if (prev != cur) { + prev = cur; + j++; + } + } + if (j < 6) + printk(KERN_WARNING "s6000-i2s: timeout waiting for WCLK\n"); + + s6_i2s_write_reg(dev, S6_I2S_ENABLE(channel), S6_I2S_ENABLE_IF); +} + +static void s6000_i2s_stop_channel(struct s6000_i2s_dev *dev, int channel) +{ + s6_i2s_write_reg(dev, S6_I2S_ENABLE(channel), S6_I2S_DISABLE_IF); +} + +static void s6000_i2s_start(struct snd_pcm_substream *substream) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct s6000_i2s_dev *dev = rtd->dai->cpu_dai->private_data; + int channel; + + channel = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) ? + dev->channel_out : dev->channel_in; + + s6000_i2s_start_channel(dev, channel); +} + +static void s6000_i2s_stop(struct snd_pcm_substream *substream) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct s6000_i2s_dev *dev = rtd->dai->cpu_dai->private_data; + int channel; + + channel = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) ? + dev->channel_out : dev->channel_in; + + s6000_i2s_stop_channel(dev, channel); +} + +static int s6000_i2s_trigger(struct snd_pcm_substream *substream, int cmd, + int after) +{ + switch (cmd) { + case SNDRV_PCM_TRIGGER_START: + case SNDRV_PCM_TRIGGER_RESUME: + case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: + if ((substream->stream == SNDRV_PCM_STREAM_CAPTURE) ^ !after) + s6000_i2s_start(substream); + break; + case SNDRV_PCM_TRIGGER_STOP: + case SNDRV_PCM_TRIGGER_SUSPEND: + case SNDRV_PCM_TRIGGER_PAUSE_PUSH: + if (!after) + s6000_i2s_stop(substream); + } + return 0; +} + +static unsigned int s6000_i2s_int_sources(struct s6000_i2s_dev *dev) +{ + unsigned int pending; + pending = s6_i2s_read_reg(dev, S6_I2S_INTERRUPT_RAW); + pending &= S6_I2S_INT_ALIGNMENT | + S6_I2S_INT_UNDERRUN | + S6_I2S_INT_OVERRUN; + s6_i2s_write_reg(dev, S6_I2S_INTERRUPT_CLEAR, pending); + + return pending; +} + +static unsigned int s6000_i2s_check_xrun(struct snd_soc_dai *cpu_dai) +{ + struct s6000_i2s_dev *dev = cpu_dai->private_data; + unsigned int errors; + unsigned int ret; + + errors = s6000_i2s_int_sources(dev); + if (likely(!errors)) + return 0; + + ret = 0; + if (errors & S6_I2S_INT_ALIGNMENT) + printk(KERN_ERR "s6000-i2s: WCLK misaligned\n"); + if (errors & S6_I2S_INT_UNDERRUN) + ret |= 1 << SNDRV_PCM_STREAM_PLAYBACK; + if (errors & S6_I2S_INT_OVERRUN) + ret |= 1 << SNDRV_PCM_STREAM_CAPTURE; + return ret; +} + +static void s6000_i2s_wait_disabled(struct s6000_i2s_dev *dev) +{ + int channel; + int n = 50; + for (channel = 0; channel < 2; channel++) { + while (--n >= 0) { + int v = s6_i2s_read_reg(dev, S6_I2S_ENABLE(channel)); + if ((v & S6_I2S_IS_ENABLED) + || !(v & (S6_I2S_DMA_ACTIVE | S6_I2S_IS_BUSY))) + break; + udelay(20); + } + } + if (n < 0) + printk(KERN_WARNING "s6000-i2s: timeout disabling interfaces"); +} + +static int s6000_i2s_set_dai_fmt(struct snd_soc_dai *cpu_dai, + unsigned int fmt) +{ + struct s6000_i2s_dev *dev = cpu_dai->private_data; + u32 w; + + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: + w = S6_I2S_SCK_IN | S6_I2S_WS_IN; + break; + case SND_SOC_DAIFMT_CBS_CFM: + w = S6_I2S_SCK_OUT | S6_I2S_WS_IN; + break; + case SND_SOC_DAIFMT_CBM_CFS: + w = S6_I2S_SCK_IN | S6_I2S_WS_OUT; + break; + case SND_SOC_DAIFMT_CBS_CFS: + w = S6_I2S_SCK_OUT | S6_I2S_WS_OUT; + break; + default: + return -EINVAL; + } + + switch (fmt & SND_SOC_DAIFMT_INV_MASK) { + case SND_SOC_DAIFMT_IB_IF: + w |= S6_I2S_LEFT_FIRST; + break; + case SND_SOC_DAIFMT_IB_NF: + w |= S6_I2S_RIGHT_FIRST; + break; + default: + return -EINVAL; + } + + s6_i2s_mod_reg(dev, S6_I2S_INTERFACE_CFG(0), + S6_I2S_FIRST | S6_I2S_WS_DIR | S6_I2S_SCK_DIR, w); + s6_i2s_mod_reg(dev, S6_I2S_INTERFACE_CFG(1), + S6_I2S_FIRST | S6_I2S_WS_DIR | S6_I2S_SCK_DIR, w); + + return 0; +} + +static int s6000_i2s_set_clkdiv(struct snd_soc_dai *dai, int div_id, int div) +{ + struct s6000_i2s_dev *dev = dai->private_data; + + if (!div || (div & 1) || div > (S6_I2S_DIV_MASK + 1) * 2) + return -EINVAL; + + s6_i2s_mod_reg(dev, S6_I2S_INTERFACE_CFG(div_id), + S6_I2S_DIV_MASK, div / 2 - 1); + return 0; +} + +static int s6000_i2s_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) +{ + struct s6000_i2s_dev *dev = dai->private_data; + int interf; + u32 w = 0; + + if (dev->wide) + interf = 0; + else { + w |= (((params_channels(params) - 2) / 2) + << S6_I2S_CHANNELS_SHIFT) & S6_I2S_CHANNELS_MASK; + interf = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) + ? dev->channel_out : dev->channel_in; + } + + switch (params_format(params)) { + case SNDRV_PCM_FORMAT_S16_LE: + w |= S6_I2S_16BIT | S6_I2S_MEM_16BIT; + break; + case SNDRV_PCM_FORMAT_S32_LE: + w |= S6_I2S_32BIT | S6_I2S_MEM_32BIT; + break; + default: + printk(KERN_WARNING "s6000-i2s: unsupported PCM format %x\n", + params_format(params)); + return -EINVAL; + } + + if (s6_i2s_read_reg(dev, S6_I2S_INTERFACE_CFG(interf)) + & S6_I2S_IS_ENABLED) { + printk(KERN_ERR "s6000-i2s: interface already enabled\n"); + return -EBUSY; + } + + s6_i2s_mod_reg(dev, S6_I2S_INTERFACE_CFG(interf), + S6_I2S_CHANNELS_MASK|S6_I2S_MEM_MASK|S6_I2S_BITS_MASK, + w); + + return 0; +} + +static int s6000_i2s_dai_probe(struct platform_device *pdev, + struct snd_soc_dai *dai) +{ + struct s6000_i2s_dev *dev = dai->private_data; + struct s6000_snd_platform_data *pdata = pdev->dev.platform_data; + + if (!pdata) + return -EINVAL; + + dev->wide = pdata->wide; + dev->channel_in = pdata->channel_in; + dev->channel_out = pdata->channel_out; + dev->lines_in = pdata->lines_in; + dev->lines_out = pdata->lines_out; + + s6_i2s_write_reg(dev, S6_I2S_MODE, + dev->wide ? S6_I2S_WIDE : S6_I2S_DUAL); + + if (dev->wide) { + int i; + + if (dev->lines_in + dev->lines_out > S6_I2S_NUM_LINES) + return -EINVAL; + + dev->channel_in = 0; + dev->channel_out = 1; + dai->capture.channels_min = 2 * dev->lines_in; + dai->capture.channels_max = dai->capture.channels_min; + dai->playback.channels_min = 2 * dev->lines_out; + dai->playback.channels_max = dai->playback.channels_min; + + for (i = 0; i < dev->lines_out; i++) + s6_i2s_write_reg(dev, S6_I2S_DATA_CFG(i), S6_I2S_OUT); + + for (; i < S6_I2S_NUM_LINES - dev->lines_in; i++) + s6_i2s_write_reg(dev, S6_I2S_DATA_CFG(i), + S6_I2S_UNUSED); + + for (; i < S6_I2S_NUM_LINES; i++) + s6_i2s_write_reg(dev, S6_I2S_DATA_CFG(i), S6_I2S_IN); + } else { + unsigned int cfg[2] = {S6_I2S_UNUSED, S6_I2S_UNUSED}; + + if (dev->lines_in > 1 || dev->lines_out > 1) + return -EINVAL; + + dai->capture.channels_min = 2 * dev->lines_in; + dai->capture.channels_max = 8 * dev->lines_in; + dai->playback.channels_min = 2 * dev->lines_out; + dai->playback.channels_max = 8 * dev->lines_out; + + if (dev->lines_in) + cfg[dev->channel_in] = S6_I2S_IN; + if (dev->lines_out) + cfg[dev->channel_out] = S6_I2S_OUT; + + s6_i2s_write_reg(dev, S6_I2S_DATA_CFG(0), cfg[0]); + s6_i2s_write_reg(dev, S6_I2S_DATA_CFG(1), cfg[1]); + } + + if (dev->lines_out) { + if (dev->lines_in) { + if (!dev->dma_params.dma_out) + return -ENODEV; + } else { + dev->dma_params.dma_out = dev->dma_params.dma_in; + dev->dma_params.dma_in = 0; + } + } + dev->dma_params.sif_in = dev->sifbase + (dev->channel_in ? + S6_I2S_SIF_PORT1 : S6_I2S_SIF_PORT0); + dev->dma_params.sif_out = dev->sifbase + (dev->channel_out ? + S6_I2S_SIF_PORT1 : S6_I2S_SIF_PORT0); + dev->dma_params.same_rate = pdata->same_rate | pdata->wide; + return 0; +} + +#define S6000_I2S_RATES (SNDRV_PCM_RATE_CONTINUOUS | SNDRV_PCM_RATE_5512 | \ + SNDRV_PCM_RATE_8000_192000) +#define S6000_I2S_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S32_LE) + +static struct snd_soc_dai_ops s6000_i2s_dai_ops = { + .set_fmt = s6000_i2s_set_dai_fmt, + .set_clkdiv = s6000_i2s_set_clkdiv, + .hw_params = s6000_i2s_hw_params, +}; + +struct snd_soc_dai s6000_i2s_dai = { + .name = "s6000-i2s", + .id = 0, + .probe = s6000_i2s_dai_probe, + .playback = { + .channels_min = 2, + .channels_max = 8, + .formats = S6000_I2S_FORMATS, + .rates = S6000_I2S_RATES, + .rate_min = 0, + .rate_max = 1562500, + }, + .capture = { + .channels_min = 2, + .channels_max = 8, + .formats = S6000_I2S_FORMATS, + .rates = S6000_I2S_RATES, + .rate_min = 0, + .rate_max = 1562500, + }, + .ops = &s6000_i2s_dai_ops, +} +EXPORT_SYMBOL_GPL(s6000_i2s_dai); + +static int __devinit s6000_i2s_probe(struct platform_device *pdev) +{ + struct s6000_i2s_dev *dev; + struct resource *scbmem, *sifmem, *region, *dma1, *dma2; + u8 __iomem *mmio; + int ret; + + scbmem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!scbmem) { + dev_err(&pdev->dev, "no mem resource?\n"); + ret = -ENODEV; + goto err_release_none; + } + + region = request_mem_region(scbmem->start, + scbmem->end - scbmem->start + 1, + pdev->name); + if (!region) { + dev_err(&pdev->dev, "I2S SCB region already claimed\n"); + ret = -EBUSY; + goto err_release_none; + } + + mmio = ioremap(scbmem->start, scbmem->end - scbmem->start + 1); + if (!mmio) { + dev_err(&pdev->dev, "can't ioremap SCB region\n"); + ret = -ENOMEM; + goto err_release_scb; + } + + sifmem = platform_get_resource(pdev, IORESOURCE_MEM, 1); + if (!sifmem) { + dev_err(&pdev->dev, "no second mem resource?\n"); + ret = -ENODEV; + goto err_release_map; + } + + region = request_mem_region(sifmem->start, + sifmem->end - sifmem->start + 1, + pdev->name); + if (!region) { + dev_err(&pdev->dev, "I2S SIF region already claimed\n"); + ret = -EBUSY; + goto err_release_map; + } + + dma1 = platform_get_resource(pdev, IORESOURCE_DMA, 0); + if (!dma1) { + dev_err(&pdev->dev, "no dma resource?\n"); + ret = -ENODEV; + goto err_release_sif; + } + + region = request_mem_region(dma1->start, dma1->end - dma1->start + 1, + pdev->name); + if (!region) { + dev_err(&pdev->dev, "I2S DMA region already claimed\n"); + ret = -EBUSY; + goto err_release_sif; + } + + dma2 = platform_get_resource(pdev, IORESOURCE_DMA, 1); + if (dma2) { + region = request_mem_region(dma2->start, + dma2->end - dma2->start + 1, + pdev->name); + if (!region) { + dev_err(&pdev->dev, + "I2S DMA region already claimed\n"); + ret = -EBUSY; + goto err_release_dma1; + } + } + + dev = kzalloc(sizeof(struct s6000_i2s_dev), GFP_KERNEL); + if (!dev) { + ret = -ENOMEM; + goto err_release_dma2; + } + + s6000_i2s_dai.dev = &pdev->dev; + s6000_i2s_dai.private_data = dev; + s6000_i2s_dai.dma_data = &dev->dma_params; + + dev->sifbase = sifmem->start; + dev->scbbase = mmio; + + s6_i2s_write_reg(dev, S6_I2S_INTERRUPT_ENABLE, 0); + s6_i2s_write_reg(dev, S6_I2S_INTERRUPT_CLEAR, + S6_I2S_INT_ALIGNMENT | + S6_I2S_INT_UNDERRUN | + S6_I2S_INT_OVERRUN); + + s6000_i2s_stop_channel(dev, 0); + s6000_i2s_stop_channel(dev, 1); + s6000_i2s_wait_disabled(dev); + + dev->dma_params.check_xrun = s6000_i2s_check_xrun; + dev->dma_params.trigger = s6000_i2s_trigger; + dev->dma_params.dma_in = dma1->start; + dev->dma_params.dma_out = dma2 ? dma2->start : 0; + dev->dma_params.irq = platform_get_irq(pdev, 0); + if (dev->dma_params.irq < 0) { + dev_err(&pdev->dev, "no irq resource?\n"); + ret = -ENODEV; + goto err_release_dev; + } + + s6_i2s_write_reg(dev, S6_I2S_INTERRUPT_ENABLE, + S6_I2S_INT_ALIGNMENT | + S6_I2S_INT_UNDERRUN | + S6_I2S_INT_OVERRUN); + + ret = snd_soc_register_dai(&s6000_i2s_dai); + if (ret) + goto err_release_dev; + + return 0; + +err_release_dev: + kfree(dev); +err_release_dma2: + if (dma2) + release_mem_region(dma2->start, dma2->end - dma2->start + 1); +err_release_dma1: + release_mem_region(dma1->start, dma1->end - dma1->start + 1); +err_release_sif: + release_mem_region(sifmem->start, (sifmem->end - sifmem->start) + 1); +err_release_map: + iounmap(mmio); +err_release_scb: + release_mem_region(scbmem->start, (scbmem->end - scbmem->start) + 1); +err_release_none: + return ret; +} + +static void __devexit s6000_i2s_remove(struct platform_device *pdev) +{ + struct s6000_i2s_dev *dev = s6000_i2s_dai.private_data; + struct resource *region; + void __iomem *mmio = dev->scbbase; + + snd_soc_unregister_dai(&s6000_i2s_dai); + + s6000_i2s_stop_channel(dev, 0); + s6000_i2s_stop_channel(dev, 1); + + s6_i2s_write_reg(dev, S6_I2S_INTERRUPT_ENABLE, 0); + s6000_i2s_dai.private_data = 0; + kfree(dev); + + region = platform_get_resource(pdev, IORESOURCE_DMA, 0); + release_mem_region(region->start, region->end - region->start + 1); + + region = platform_get_resource(pdev, IORESOURCE_DMA, 1); + if (region) + release_mem_region(region->start, + region->end - region->start + 1); + + region = platform_get_resource(pdev, IORESOURCE_MEM, 0); + release_mem_region(region->start, (region->end - region->start) + 1); + + iounmap(mmio); + region = platform_get_resource(pdev, IORESOURCE_IO, 0); + release_mem_region(region->start, (region->end - region->start) + 1); +} + +static struct platform_driver s6000_i2s_driver = { + .probe = s6000_i2s_probe, + .remove = __devexit_p(s6000_i2s_remove), + .driver = { + .name = "s6000-i2s", + .owner = THIS_MODULE, + }, +}; + +static int __init s6000_i2s_init(void) +{ + return platform_driver_register(&s6000_i2s_driver); +} +module_init(s6000_i2s_init); + +static void __exit s6000_i2s_exit(void) +{ + platform_driver_unregister(&s6000_i2s_driver); +} +module_exit(s6000_i2s_exit); + +MODULE_AUTHOR("Daniel Gloeckner"); +MODULE_DESCRIPTION("Stretch s6000 family I2S SoC Interface"); +MODULE_LICENSE("GPL"); diff --git a/sound/soc/s6000/s6000-i2s.h b/sound/soc/s6000/s6000-i2s.h new file mode 100644 index 0000000..2375fdf --- /dev/null +++ b/sound/soc/s6000/s6000-i2s.h @@ -0,0 +1,25 @@ +/* + * ALSA SoC I2S Audio Layer for the Stretch s6000 family + * + * Author: Daniel Gloeckner, + * Copyright: (C) 2009 emlix GmbH + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _S6000_I2S_H +#define _S6000_I2S_H + +extern struct snd_soc_dai s6000_i2s_dai; + +struct s6000_snd_platform_data { + int lines_in; + int lines_out; + int channel_in; + int channel_out; + int wide; + int same_rate; +}; +#endif diff --git a/sound/soc/s6000/s6000-pcm.c b/sound/soc/s6000/s6000-pcm.c new file mode 100644 index 0000000..83b8028 --- /dev/null +++ b/sound/soc/s6000/s6000-pcm.c @@ -0,0 +1,497 @@ +/* + * ALSA PCM interface for the Stetch s6000 family + * + * Author: Daniel Gloeckner, + * Copyright: (C) 2009 emlix GmbH + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include "s6000-pcm.h" + +#define S6_PCM_PREALLOCATE_SIZE (96 * 1024) +#define S6_PCM_PREALLOCATE_MAX (2048 * 1024) + +static struct snd_pcm_hardware s6000_pcm_hardware = { + .info = (SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER | + SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_MMAP_VALID | + SNDRV_PCM_INFO_PAUSE | SNDRV_PCM_INFO_JOINT_DUPLEX), + .formats = (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S32_LE), + .rates = (SNDRV_PCM_RATE_CONTINUOUS | SNDRV_PCM_RATE_5512 | \ + SNDRV_PCM_RATE_8000_192000), + .rate_min = 0, + .rate_max = 1562500, + .channels_min = 2, + .channels_max = 8, + .buffer_bytes_max = 0x7ffffff0, + .period_bytes_min = 16, + .period_bytes_max = 0xfffff0, + .periods_min = 2, + .periods_max = 1024, /* no limit */ + .fifo_size = 0, +}; + +struct s6000_runtime_data { + spinlock_t lock; + int period; /* current DMA period */ +}; + +static void s6000_pcm_enqueue_dma(struct snd_pcm_substream *substream) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + struct s6000_runtime_data *prtd = runtime->private_data; + struct snd_soc_pcm_runtime *soc_runtime = substream->private_data; + struct s6000_pcm_dma_params *par = soc_runtime->dai->cpu_dai->dma_data; + int channel; + unsigned int period_size; + unsigned int dma_offset; + dma_addr_t dma_pos; + dma_addr_t src, dst; + + period_size = snd_pcm_lib_period_bytes(substream); + dma_offset = prtd->period * period_size; + dma_pos = runtime->dma_addr + dma_offset; + + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { + src = dma_pos; + dst = par->sif_out; + channel = par->dma_out; + } else { + src = par->sif_in; + dst = dma_pos; + channel = par->dma_in; + } + + if (!s6dmac_channel_enabled(DMA_MASK_DMAC(channel), + DMA_INDEX_CHNL(channel))) + return; + + if (s6dmac_fifo_full(DMA_MASK_DMAC(channel), DMA_INDEX_CHNL(channel))) { + printk(KERN_ERR "s6000-pcm: fifo full\n"); + return; + } + + BUG_ON(period_size & 15); + s6dmac_put_fifo(DMA_MASK_DMAC(channel), DMA_INDEX_CHNL(channel), + src, dst, period_size); + + prtd->period++; + if (unlikely(prtd->period >= runtime->periods)) + prtd->period = 0; +} + +static irqreturn_t s6000_pcm_irq(int irq, void *data) +{ + struct snd_pcm *pcm = data; + struct snd_soc_pcm_runtime *runtime = pcm->private_data; + struct s6000_pcm_dma_params *params = runtime->dai->cpu_dai->dma_data; + struct s6000_runtime_data *prtd; + unsigned int has_xrun; + int i, ret = IRQ_NONE; + u32 channel[2] = { + [SNDRV_PCM_STREAM_PLAYBACK] = params->dma_out, + [SNDRV_PCM_STREAM_CAPTURE] = params->dma_in + }; + + has_xrun = params->check_xrun(runtime->dai->cpu_dai); + + for (i = 0; i < ARRAY_SIZE(channel); ++i) { + struct snd_pcm_substream *substream = pcm->streams[i].substream; + unsigned int pending; + + if (!channel[i]) + continue; + + if (unlikely(has_xrun & (1 << i)) && + substream->runtime && + snd_pcm_running(substream)) { + dev_dbg(pcm->dev, "xrun\n"); + snd_pcm_stop(substream, SNDRV_PCM_STATE_XRUN); + ret = IRQ_HANDLED; + } + + pending = s6dmac_int_sources(DMA_MASK_DMAC(channel[i]), + DMA_INDEX_CHNL(channel[i])); + + if (pending & 1) { + ret = IRQ_HANDLED; + if (likely(substream->runtime && + snd_pcm_running(substream))) { + snd_pcm_period_elapsed(substream); + dev_dbg(pcm->dev, "period elapsed %x %x\n", + s6dmac_cur_src(DMA_MASK_DMAC(channel[i]), + DMA_INDEX_CHNL(channel[i])), + s6dmac_cur_dst(DMA_MASK_DMAC(channel[i]), + DMA_INDEX_CHNL(channel[i]))); + prtd = substream->runtime->private_data; + spin_lock(&prtd->lock); + s6000_pcm_enqueue_dma(substream); + spin_unlock(&prtd->lock); + } + } + + if (unlikely(pending & ~7)) { + if (pending & (1 << 3)) + printk(KERN_WARNING + "s6000-pcm: DMA %x Underflow\n", + channel[i]); + if (pending & (1 << 4)) + printk(KERN_WARNING + "s6000-pcm: DMA %x Overflow\n", + channel[i]); + if (pending & 0x1e0) + printk(KERN_WARNING + "s6000-pcm: DMA %x Master Error " + "(mask %x)\n", + channel[i], pending >> 5); + + } + } + + return ret; +} + +static int s6000_pcm_start(struct snd_pcm_substream *substream) +{ + struct s6000_runtime_data *prtd = substream->runtime->private_data; + struct snd_soc_pcm_runtime *soc_runtime = substream->private_data; + struct s6000_pcm_dma_params *par = soc_runtime->dai->cpu_dai->dma_data; + unsigned long flags; + int srcinc; + u32 dma; + + spin_lock_irqsave(&prtd->lock, flags); + + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { + srcinc = 1; + dma = par->dma_out; + } else { + srcinc = 0; + dma = par->dma_in; + } + s6dmac_enable_chan(DMA_MASK_DMAC(dma), DMA_INDEX_CHNL(dma), + 1 /* priority 1 (0 is max) */, + 0 /* peripheral requests w/o xfer length mode */, + srcinc /* source address increment */, + srcinc^1 /* destination address increment */, + 0 /* chunksize 0 (skip impossible on this dma) */, + 0 /* source skip after chunk (impossible) */, + 0 /* destination skip after chunk (impossible) */, + 4 /* 16 byte burst size */, + -1 /* don't conserve bandwidth */, + 0 /* low watermark irq descriptor theshold */, + 0 /* disable hardware timestamps */, + 1 /* enable channel */); + + s6000_pcm_enqueue_dma(substream); + s6000_pcm_enqueue_dma(substream); + + spin_unlock_irqrestore(&prtd->lock, flags); + + return 0; +} + +static int s6000_pcm_stop(struct snd_pcm_substream *substream) +{ + struct s6000_runtime_data *prtd = substream->runtime->private_data; + struct snd_soc_pcm_runtime *soc_runtime = substream->private_data; + struct s6000_pcm_dma_params *par = soc_runtime->dai->cpu_dai->dma_data; + unsigned long flags; + u32 channel; + + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) + channel = par->dma_out; + else + channel = par->dma_in; + + s6dmac_set_terminal_count(DMA_MASK_DMAC(channel), + DMA_INDEX_CHNL(channel), 0); + + spin_lock_irqsave(&prtd->lock, flags); + + s6dmac_disable_chan(DMA_MASK_DMAC(channel), DMA_INDEX_CHNL(channel)); + + spin_unlock_irqrestore(&prtd->lock, flags); + + return 0; +} + +static int s6000_pcm_trigger(struct snd_pcm_substream *substream, int cmd) +{ + struct snd_soc_pcm_runtime *soc_runtime = substream->private_data; + struct s6000_pcm_dma_params *par = soc_runtime->dai->cpu_dai->dma_data; + int ret; + + ret = par->trigger(substream, cmd, 0); + if (ret < 0) + return ret; + + switch (cmd) { + case SNDRV_PCM_TRIGGER_START: + case SNDRV_PCM_TRIGGER_RESUME: + case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: + ret = s6000_pcm_start(substream); + break; + case SNDRV_PCM_TRIGGER_STOP: + case SNDRV_PCM_TRIGGER_SUSPEND: + case SNDRV_PCM_TRIGGER_PAUSE_PUSH: + ret = s6000_pcm_stop(substream); + break; + default: + ret = -EINVAL; + } + if (ret < 0) + return ret; + + return par->trigger(substream, cmd, 1); +} + +static int s6000_pcm_prepare(struct snd_pcm_substream *substream) +{ + struct s6000_runtime_data *prtd = substream->runtime->private_data; + + prtd->period = 0; + + return 0; +} + +static snd_pcm_uframes_t s6000_pcm_pointer(struct snd_pcm_substream *substream) +{ + struct snd_soc_pcm_runtime *soc_runtime = substream->private_data; + struct s6000_pcm_dma_params *par = soc_runtime->dai->cpu_dai->dma_data; + struct snd_pcm_runtime *runtime = substream->runtime; + struct s6000_runtime_data *prtd = runtime->private_data; + unsigned long flags; + unsigned int offset; + dma_addr_t count; + + spin_lock_irqsave(&prtd->lock, flags); + + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) + count = s6dmac_cur_src(DMA_MASK_DMAC(par->dma_out), + DMA_INDEX_CHNL(par->dma_out)); + else + count = s6dmac_cur_dst(DMA_MASK_DMAC(par->dma_in), + DMA_INDEX_CHNL(par->dma_in)); + + count -= runtime->dma_addr; + + spin_unlock_irqrestore(&prtd->lock, flags); + + offset = bytes_to_frames(runtime, count); + if (unlikely(offset >= runtime->buffer_size)) + offset = 0; + + return offset; +} + +static int s6000_pcm_open(struct snd_pcm_substream *substream) +{ + struct snd_soc_pcm_runtime *soc_runtime = substream->private_data; + struct s6000_pcm_dma_params *par = soc_runtime->dai->cpu_dai->dma_data; + struct snd_pcm_runtime *runtime = substream->runtime; + struct s6000_runtime_data *prtd; + int ret; + + snd_soc_set_runtime_hwparams(substream, &s6000_pcm_hardware); + + ret = snd_pcm_hw_constraint_step(runtime, 0, + SNDRV_PCM_HW_PARAM_PERIOD_BYTES, 16); + if (ret < 0) + return ret; + ret = snd_pcm_hw_constraint_step(runtime, 0, + SNDRV_PCM_HW_PARAM_BUFFER_BYTES, 16); + if (ret < 0) + return ret; + ret = snd_pcm_hw_constraint_integer(runtime, + SNDRV_PCM_HW_PARAM_PERIODS); + if (ret < 0) + return ret; + + if (par->same_rate) { + int rate; + spin_lock(&par->lock); /* needed? */ + rate = par->rate; + spin_unlock(&par->lock); + if (rate != -1) { + ret = snd_pcm_hw_constraint_minmax(runtime, + SNDRV_PCM_HW_PARAM_RATE, + rate, rate); + if (ret < 0) + return ret; + } + } + + prtd = kzalloc(sizeof(struct s6000_runtime_data), GFP_KERNEL); + if (prtd == NULL) + return -ENOMEM; + + spin_lock_init(&prtd->lock); + + runtime->private_data = prtd; + + return 0; +} + +static int s6000_pcm_close(struct snd_pcm_substream *substream) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + struct s6000_runtime_data *prtd = runtime->private_data; + + kfree(prtd); + + return 0; +} + +static int s6000_pcm_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *hw_params) +{ + struct snd_soc_pcm_runtime *soc_runtime = substream->private_data; + struct s6000_pcm_dma_params *par = soc_runtime->dai->cpu_dai->dma_data; + int ret; + ret = snd_pcm_lib_malloc_pages(substream, + params_buffer_bytes(hw_params)); + if (ret < 0) { + printk(KERN_WARNING "s6000-pcm: allocation of memory failed\n"); + return ret; + } + + if (par->same_rate) { + spin_lock(&par->lock); + if (par->rate == -1 || + !(par->in_use & ~(1 << substream->stream))) { + par->rate = params_rate(hw_params); + par->in_use |= 1 << substream->stream; + } else if (params_rate(hw_params) != par->rate) { + snd_pcm_lib_free_pages(substream); + par->in_use &= ~(1 << substream->stream); + ret = -EBUSY; + } + spin_unlock(&par->lock); + } + return ret; +} + +static int s6000_pcm_hw_free(struct snd_pcm_substream *substream) +{ + struct snd_soc_pcm_runtime *soc_runtime = substream->private_data; + struct s6000_pcm_dma_params *par = soc_runtime->dai->cpu_dai->dma_data; + + spin_lock(&par->lock); + par->in_use &= ~(1 << substream->stream); + if (!par->in_use) + par->rate = -1; + spin_unlock(&par->lock); + + return snd_pcm_lib_free_pages(substream); +} + +static struct snd_pcm_ops s6000_pcm_ops = { + .open = s6000_pcm_open, + .close = s6000_pcm_close, + .ioctl = snd_pcm_lib_ioctl, + .hw_params = s6000_pcm_hw_params, + .hw_free = s6000_pcm_hw_free, + .trigger = s6000_pcm_trigger, + .prepare = s6000_pcm_prepare, + .pointer = s6000_pcm_pointer, +}; + +static void s6000_pcm_free(struct snd_pcm *pcm) +{ + struct snd_soc_pcm_runtime *runtime = pcm->private_data; + struct s6000_pcm_dma_params *params = runtime->dai->cpu_dai->dma_data; + + free_irq(params->irq, pcm); + snd_pcm_lib_preallocate_free_for_all(pcm); +} + +static u64 s6000_pcm_dmamask = DMA_32BIT_MASK; + +static int s6000_pcm_new(struct snd_card *card, + struct snd_soc_dai *dai, struct snd_pcm *pcm) +{ + struct snd_soc_pcm_runtime *runtime = pcm->private_data; + struct s6000_pcm_dma_params *params = runtime->dai->cpu_dai->dma_data; + int res; + + if (!card->dev->dma_mask) + card->dev->dma_mask = &s6000_pcm_dmamask; + if (!card->dev->coherent_dma_mask) + card->dev->coherent_dma_mask = DMA_32BIT_MASK; + + if (params->dma_in) { + s6dmac_disable_chan(DMA_MASK_DMAC(params->dma_in), + DMA_INDEX_CHNL(params->dma_in)); + s6dmac_int_sources(DMA_MASK_DMAC(params->dma_in), + DMA_INDEX_CHNL(params->dma_in)); + } + + if (params->dma_out) { + s6dmac_disable_chan(DMA_MASK_DMAC(params->dma_out), + DMA_INDEX_CHNL(params->dma_out)); + s6dmac_int_sources(DMA_MASK_DMAC(params->dma_out), + DMA_INDEX_CHNL(params->dma_out)); + } + + res = request_irq(params->irq, s6000_pcm_irq, IRQF_SHARED, + s6000_soc_platform.name, pcm); + if (res) { + printk(KERN_ERR "s6000-pcm couldn't get IRQ\n"); + return res; + } + + res = snd_pcm_lib_preallocate_pages_for_all(pcm, + SNDRV_DMA_TYPE_DEV, + card->dev, + S6_PCM_PREALLOCATE_SIZE, + S6_PCM_PREALLOCATE_MAX); + if (res) + printk(KERN_WARNING "s6000-pcm: preallocation failed\n"); + + spin_lock_init(¶ms->lock); + params->in_use = 0; + params->rate = -1; + return 0; +} + +struct snd_soc_platform s6000_soc_platform = { + .name = "s6000-audio", + .pcm_ops = &s6000_pcm_ops, + .pcm_new = s6000_pcm_new, + .pcm_free = s6000_pcm_free, +}; +EXPORT_SYMBOL_GPL(s6000_soc_platform); + +static int __init s6000_pcm_init(void) +{ + return snd_soc_register_platform(&s6000_soc_platform); +} +module_init(s6000_pcm_init); + +static void __exit s6000_pcm_exit(void) +{ + snd_soc_unregister_platform(&s6000_soc_platform); +} +module_exit(s6000_pcm_exit); + +MODULE_AUTHOR("Daniel Gloeckner"); +MODULE_DESCRIPTION("Stretch s6000 family PCM DMA module"); +MODULE_LICENSE("GPL"); diff --git a/sound/soc/s6000/s6000-pcm.h b/sound/soc/s6000/s6000-pcm.h new file mode 100644 index 0000000..96f23f6 --- /dev/null +++ b/sound/soc/s6000/s6000-pcm.h @@ -0,0 +1,35 @@ +/* + * ALSA PCM interface for the Stretch s6000 family + * + * Author: Daniel Gloeckner, + * Copyright: (C) 2009 emlix GmbH + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _S6000_PCM_H +#define _S6000_PCM_H + +struct snd_soc_dai; +struct snd_pcm_substream; + +struct s6000_pcm_dma_params { + unsigned int (*check_xrun)(struct snd_soc_dai *cpu_dai); + int (*trigger)(struct snd_pcm_substream *substream, int cmd, int after); + dma_addr_t sif_in; + dma_addr_t sif_out; + u32 dma_in; + u32 dma_out; + int irq; + int same_rate; + + spinlock_t lock; + int in_use; + int rate; +}; + +extern struct snd_soc_platform s6000_soc_platform; + +#endif -- cgit v0.10.2 From 2b7dbbe0c9491e62b50978d1615193bec33a8291 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Gl=C3=B6ckner?= Date: Sat, 28 Mar 2009 19:47:02 +0100 Subject: ASoC: s6105 IP camera machine specific ASoC code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds machine specific code for the audio part of the Stretch s6105 IP camera reference design. The device uses the tlv320aic31(01) codec to generate the clock for both I2S ports of the soc. While the master clock is generated by a configurable PLL chip, the code assumes the factory default settings. An additional kcontrol has been added to handle the special routing of the board, connecting both HPLCOM and HPROUT to the same pin of the audio jack. One of these should always be switched off. Signed-off-by: Daniel Glöckner Signed-off-by: Mark Brown diff --git a/sound/soc/s6000/Kconfig b/sound/soc/s6000/Kconfig index 4bfc8bc..c74eb3d 100644 --- a/sound/soc/s6000/Kconfig +++ b/sound/soc/s6000/Kconfig @@ -8,3 +8,12 @@ config SND_S6000_SOC config SND_S6000_SOC_I2S tristate + +config SND_S6000_SOC_S6IPCAM + tristate "SoC Audio support for Stretch 6105 IP Camera" + depends on SND_S6000_SOC && XTENSA_PLATFORM_S6105 + select SND_S6000_SOC_I2S + select SND_SOC_TLV320AIC3X + help + Say Y if you want to add support for SoC audio on the + Stretch s6105 IP Camera Reference Design. diff --git a/sound/soc/s6000/Makefile b/sound/soc/s6000/Makefile index df15f87..7a61361 100644 --- a/sound/soc/s6000/Makefile +++ b/sound/soc/s6000/Makefile @@ -4,3 +4,8 @@ snd-soc-s6000-i2s-objs := s6000-i2s.o obj-$(CONFIG_SND_S6000_SOC) += snd-soc-s6000.o obj-$(CONFIG_SND_S6000_SOC_I2S) += snd-soc-s6000-i2s.o + +# s6105 Machine Support +snd-soc-s6ipcam-objs := s6105-ipcam.o + +obj-$(CONFIG_SND_S6000_SOC_S6IPCAM) += snd-soc-s6ipcam.o diff --git a/sound/soc/s6000/s6105-ipcam.c b/sound/soc/s6000/s6105-ipcam.c new file mode 100644 index 0000000..21c4f55 --- /dev/null +++ b/sound/soc/s6000/s6105-ipcam.c @@ -0,0 +1,244 @@ +/* + * ASoC driver for Stretch s6105 IP camera platform + * + * Author: Daniel Gloeckner, + * Copyright: (C) 2009 emlix GmbH + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "../codecs/tlv320aic3x.h" +#include "s6000-pcm.h" +#include "s6000-i2s.h" + +#define S6105_CAM_CODEC_CLOCK 12288000 + +static int s6105_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct snd_soc_dai *codec_dai = rtd->dai->codec_dai; + struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai; + int ret = 0; + + /* set codec DAI configuration */ + ret = snd_soc_dai_set_fmt(codec_dai, SND_SOC_DAIFMT_I2S | + SND_SOC_DAIFMT_CBM_CFM); + if (ret < 0) + return ret; + + /* set cpu DAI configuration */ + ret = snd_soc_dai_set_fmt(cpu_dai, SND_SOC_DAIFMT_CBM_CFM | + SND_SOC_DAIFMT_IB_IF); + if (ret < 0) + return ret; + + /* set the codec system clock */ + ret = snd_soc_dai_set_sysclk(codec_dai, 0, S6105_CAM_CODEC_CLOCK, + SND_SOC_CLOCK_OUT); + if (ret < 0) + return ret; + + return 0; +} + +static struct snd_soc_ops s6105_ops = { + .hw_params = s6105_hw_params, +}; + +/* s6105 machine dapm widgets */ +static const struct snd_soc_dapm_widget aic3x_dapm_widgets[] = { + SND_SOC_DAPM_LINE("Audio Out Differential", NULL), + SND_SOC_DAPM_LINE("Audio Out Stereo", NULL), + SND_SOC_DAPM_LINE("Audio In", NULL), +}; + +/* s6105 machine audio_mapnections to the codec pins */ +static const struct snd_soc_dapm_route audio_map[] = { + /* Audio Out connected to HPLOUT, HPLCOM, HPROUT */ + {"Audio Out Differential", NULL, "HPLOUT"}, + {"Audio Out Differential", NULL, "HPLCOM"}, + {"Audio Out Stereo", NULL, "HPLOUT"}, + {"Audio Out Stereo", NULL, "HPROUT"}, + + /* Audio In connected to LINE1L, LINE1R */ + {"LINE1L", NULL, "Audio In"}, + {"LINE1R", NULL, "Audio In"}, +}; + +static int output_type_info(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_info *uinfo) +{ + uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED; + uinfo->count = 1; + uinfo->value.enumerated.items = 2; + if (uinfo->value.enumerated.item) { + uinfo->value.enumerated.item = 1; + strcpy(uinfo->value.enumerated.name, "HPLOUT/HPROUT"); + } else { + strcpy(uinfo->value.enumerated.name, "HPLOUT/HPLCOM"); + } + return 0; +} + +static int output_type_get(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + ucontrol->value.enumerated.item[0] = kcontrol->private_value; + return 0; +} + +static int output_type_put(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_codec *codec = kcontrol->private_data; + unsigned int val = (ucontrol->value.enumerated.item[0] != 0); + char *differential = "Audio Out Differential"; + char *stereo = "Audio Out Stereo"; + + if (kcontrol->private_value == val) + return 0; + kcontrol->private_value = val; + snd_soc_dapm_disable_pin(codec, val ? differential : stereo); + snd_soc_dapm_sync(codec); + snd_soc_dapm_enable_pin(codec, val ? stereo : differential); + snd_soc_dapm_sync(codec); + + return 1; +} + +static const struct snd_kcontrol_new audio_out_mux = { + .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .name = "Master Output Mux", + .index = 0, + .access = SNDRV_CTL_ELEM_ACCESS_READWRITE, + .info = output_type_info, + .get = output_type_get, + .put = output_type_put, + .private_value = 1 /* default to stereo */ +}; + +/* Logic for a aic3x as connected on the s6105 ip camera ref design */ +static int s6105_aic3x_init(struct snd_soc_codec *codec) +{ + /* Add s6105 specific widgets */ + snd_soc_dapm_new_controls(codec, aic3x_dapm_widgets, + ARRAY_SIZE(aic3x_dapm_widgets)); + + /* Set up s6105 specific audio path audio_map */ + snd_soc_dapm_add_routes(codec, audio_map, ARRAY_SIZE(audio_map)); + + /* not present */ + snd_soc_dapm_nc_pin(codec, "MONO_LOUT"); + snd_soc_dapm_nc_pin(codec, "LINE2L"); + snd_soc_dapm_nc_pin(codec, "LINE2R"); + + /* not connected */ + snd_soc_dapm_nc_pin(codec, "MIC3L"); /* LINE2L on this chip */ + snd_soc_dapm_nc_pin(codec, "MIC3R"); /* LINE2R on this chip */ + snd_soc_dapm_nc_pin(codec, "LLOUT"); + snd_soc_dapm_nc_pin(codec, "RLOUT"); + snd_soc_dapm_nc_pin(codec, "HPRCOM"); + + /* always connected */ + snd_soc_dapm_enable_pin(codec, "Audio In"); + + /* must correspond to audio_out_mux.private_value initializer */ + snd_soc_dapm_disable_pin(codec, "Audio Out Differential"); + snd_soc_dapm_sync(codec); + snd_soc_dapm_enable_pin(codec, "Audio Out Stereo"); + + snd_soc_dapm_sync(codec); + + snd_ctl_add(codec->card, snd_ctl_new1(&audio_out_mux, codec)); + + return 0; +} + +/* s6105 digital audio interface glue - connects codec <--> CPU */ +static struct snd_soc_dai_link s6105_dai = { + .name = "TLV320AIC31", + .stream_name = "AIC31", + .cpu_dai = &s6000_i2s_dai, + .codec_dai = &aic3x_dai, + .init = s6105_aic3x_init, + .ops = &s6105_ops, +}; + +/* s6105 audio machine driver */ +static struct snd_soc_card snd_soc_card_s6105 = { + .name = "Stretch IP Camera", + .platform = &s6000_soc_platform, + .dai_link = &s6105_dai, + .num_links = 1, +}; + +/* s6105 audio private data */ +static struct aic3x_setup_data s6105_aic3x_setup = { + .i2c_bus = 0, + .i2c_address = 0x18, +}; + +/* s6105 audio subsystem */ +static struct snd_soc_device s6105_snd_devdata = { + .card = &snd_soc_card_s6105, + .codec_dev = &soc_codec_dev_aic3x, + .codec_data = &s6105_aic3x_setup, +}; + +static struct s6000_snd_platform_data __initdata s6105_snd_data = { + .wide = 0, + .channel_in = 0, + .channel_out = 1, + .lines_in = 1, + .lines_out = 1, + .same_rate = 1, +}; + +static struct platform_device *s6105_snd_device; + +static int __init s6105_init(void) +{ + int ret; + + s6105_snd_device = platform_device_alloc("soc-audio", -1); + if (!s6105_snd_device) + return -ENOMEM; + + platform_set_drvdata(s6105_snd_device, &s6105_snd_devdata); + s6105_snd_devdata.dev = &s6105_snd_device->dev; + platform_device_add_data(s6105_snd_device, &s6105_snd_data, + sizeof(s6105_snd_data)); + + ret = platform_device_add(s6105_snd_device); + if (ret) + platform_device_put(s6105_snd_device); + + return ret; +} + +static void __exit s6105_exit(void) +{ + platform_device_unregister(s6105_snd_device); +} + +module_init(s6105_init); +module_exit(s6105_exit); + +MODULE_AUTHOR("Daniel Gloeckner"); +MODULE_DESCRIPTION("Stretch s6105 IP camera ASoC driver"); +MODULE_LICENSE("GPL"); -- cgit v0.10.2 From 595258aaeac4cc6e187b98b1bf29bb176febe763 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 13 Mar 2009 12:21:28 +0100 Subject: perf_counter: x86: fix 32-bit irq_period assumption No need to assume the irq_period is 32bit. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 155bc3c..1cedc34 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -449,7 +449,7 @@ __hw_perf_counter_set_period(struct perf_counter *counter, struct hw_perf_counter *hwc, int idx) { s64 left = atomic64_read(&hwc->period_left); - s32 period = hwc->irq_period; + s64 period = hwc->irq_period; int err; /* -- cgit v0.10.2 From 755642322aa66fbc5421a35fd3e1733f73e20083 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 13 Mar 2009 12:21:29 +0100 Subject: perf_counter: use list_move_tail() Instead of del/add use a move list-op. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index b2e8389..0fe22c9 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -89,8 +89,7 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx) list_for_each_entry_safe(sibling, tmp, &counter->sibling_list, list_entry) { - list_del_init(&sibling->list_entry); - list_add_tail(&sibling->list_entry, &ctx->counter_list); + list_move_tail(&sibling->list_entry, &ctx->counter_list); sibling->group_leader = sibling; } } @@ -959,8 +958,7 @@ static void rotate_ctx(struct perf_counter_context *ctx) */ perf_flags = hw_perf_save_disable(); list_for_each_entry(counter, &ctx->counter_list, list_entry) { - list_del(&counter->list_entry); - list_add_tail(&counter->list_entry, &ctx->counter_list); + list_move_tail(&counter->list_entry, &ctx->counter_list); break; } hw_perf_restore(perf_flags); -- cgit v0.10.2 From 60b3df9c1e24a18aabb412da9905208c5f04ebea Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 13 Mar 2009 12:21:30 +0100 Subject: perf_counter: add comment to barrier We need to ensure the enabled=0 write happens before we start disabling the actual counters, so that a pcm_amd_enable() will not enable one underneath us. I think the race is impossible anyway, we always balance the ops within any one context and perform enable() with IRQs disabled. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 1cedc34..a2e3b76 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -247,6 +247,10 @@ static u64 pmc_amd_save_disable_all(void) enabled = cpuc->enabled; cpuc->enabled = 0; + /* + * ensure we write the disable before we start disabling the + * counters proper, so that pcm_amd_enable() does the right thing. + */ barrier(); for (idx = 0; idx < nr_counters_generic; idx++) { -- cgit v0.10.2 From 82bae4f8c2fd64a2bb1e2e72c508853ed2b4a299 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 13 Mar 2009 12:21:31 +0100 Subject: perf_counter: x86: use ULL postfix for 64bit constants Fix a build warning on 32bit machines by explicitly marking the constants as 64-bit. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index a2e3b76..22dab06 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -84,9 +84,9 @@ static u64 pmc_intel_event_map(int event) static u64 pmc_intel_raw_event(u64 event) { -#define CORE_EVNTSEL_EVENT_MASK 0x000000FF -#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00 -#define CORE_EVNTSEL_COUNTER_MASK 0xFF000000 +#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL +#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL +#define CORE_EVNTSEL_COUNTER_MASK 0xFF000000ULL #define CORE_EVNTSEL_MASK \ (CORE_EVNTSEL_EVENT_MASK | \ @@ -116,9 +116,9 @@ static u64 pmc_amd_event_map(int event) static u64 pmc_amd_raw_event(u64 event) { -#define K7_EVNTSEL_EVENT_MASK 0x7000000FF -#define K7_EVNTSEL_UNIT_MASK 0x00000FF00 -#define K7_EVNTSEL_COUNTER_MASK 0x0FF000000 +#define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL +#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL +#define K7_EVNTSEL_COUNTER_MASK 0x0FF000000ULL #define K7_EVNTSEL_MASK \ (K7_EVNTSEL_EVENT_MASK | \ -- cgit v0.10.2 From 15dbf27cc18559a14e99609f78678aa86b9c6ff1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 13 Mar 2009 12:21:32 +0100 Subject: perf_counter: software counter event infrastructure Provide generic software counter infrastructure that supports software events. This will be used to allow sample based profiling based on software events such as pagefaults. The current infrastructure can only provide a count of such events, no place information. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index dde5645..3fefc3b 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -126,6 +126,7 @@ struct hw_perf_counter { unsigned long counter_base; int nmi; unsigned int idx; + atomic64_t count; /* software */ atomic64_t prev_count; u64 irq_period; atomic64_t period_left; @@ -283,6 +284,8 @@ static inline int is_software_counter(struct perf_counter *counter) return !counter->hw_event.raw && counter->hw_event.type < 0; } +extern void perf_swcounter_event(enum hw_event_types, u64, int, struct pt_regs *); + #else static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } @@ -295,10 +298,13 @@ static inline void perf_counter_exit_task(struct task_struct *child) { } static inline void perf_counter_notify(struct pt_regs *regs) { } static inline void perf_counter_print_debug(void) { } static inline void perf_counter_unthrottle(void) { } -static inline void hw_perf_restore(u64 ctrl) { } +static inline void hw_perf_restore(u64 ctrl) { } static inline u64 hw_perf_save_disable(void) { return 0; } static inline int perf_counter_task_disable(void) { return -EINVAL; } static inline int perf_counter_task_enable(void) { return -EINVAL; } + +static inline void perf_swcounter_event(enum hw_event_types event, u64 nr, + int nmi, struct pt_regs *regs) { } #endif #endif /* __KERNEL__ */ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 0fe22c9..eeb1b46 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1328,6 +1328,185 @@ static const struct file_operations perf_fops = { .compat_ioctl = perf_ioctl, }; +/* + * Generic software counter infrastructure + */ + +static void perf_swcounter_update(struct perf_counter *counter) +{ + struct hw_perf_counter *hwc = &counter->hw; + u64 prev, now; + s64 delta; + +again: + prev = atomic64_read(&hwc->prev_count); + now = atomic64_read(&hwc->count); + if (atomic64_cmpxchg(&hwc->prev_count, prev, now) != prev) + goto again; + + delta = now - prev; + + atomic64_add(delta, &counter->count); + atomic64_sub(delta, &hwc->period_left); +} + +static void perf_swcounter_set_period(struct perf_counter *counter) +{ + struct hw_perf_counter *hwc = &counter->hw; + s64 left = atomic64_read(&hwc->period_left); + s64 period = hwc->irq_period; + + if (unlikely(left <= -period)) { + left = period; + atomic64_set(&hwc->period_left, left); + } + + if (unlikely(left <= 0)) { + left += period; + atomic64_add(period, &hwc->period_left); + } + + atomic64_set(&hwc->prev_count, -left); + atomic64_set(&hwc->count, -left); +} + +static void perf_swcounter_save_and_restart(struct perf_counter *counter) +{ + perf_swcounter_update(counter); + perf_swcounter_set_period(counter); +} + +static void perf_swcounter_store_irq(struct perf_counter *counter, u64 data) +{ + struct perf_data *irqdata = counter->irqdata; + + if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) { + irqdata->overrun++; + } else { + u64 *p = (u64 *) &irqdata->data[irqdata->len]; + + *p = data; + irqdata->len += sizeof(u64); + } +} + +static void perf_swcounter_handle_group(struct perf_counter *sibling) +{ + struct perf_counter *counter, *group_leader = sibling->group_leader; + + list_for_each_entry(counter, &group_leader->sibling_list, list_entry) { + perf_swcounter_update(counter); + perf_swcounter_store_irq(sibling, counter->hw_event.type); + perf_swcounter_store_irq(sibling, atomic64_read(&counter->count)); + } +} + +static void perf_swcounter_interrupt(struct perf_counter *counter, + int nmi, struct pt_regs *regs) +{ + perf_swcounter_save_and_restart(counter); + + switch (counter->hw_event.record_type) { + case PERF_RECORD_SIMPLE: + break; + + case PERF_RECORD_IRQ: + perf_swcounter_store_irq(counter, instruction_pointer(regs)); + break; + + case PERF_RECORD_GROUP: + perf_swcounter_handle_group(counter); + break; + } + + if (nmi) { + counter->wakeup_pending = 1; + set_tsk_thread_flag(current, TIF_PERF_COUNTERS); + } else + wake_up(&counter->waitq); +} + +static int perf_swcounter_match(struct perf_counter *counter, + enum hw_event_types event, + struct pt_regs *regs) +{ + if (counter->state != PERF_COUNTER_STATE_ACTIVE) + return 0; + + if (counter->hw_event.raw) + return 0; + + if (counter->hw_event.type != event) + return 0; + + if (counter->hw_event.exclude_user && user_mode(regs)) + return 0; + + if (counter->hw_event.exclude_kernel && !user_mode(regs)) + return 0; + + return 1; +} + +static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, + enum hw_event_types event, u64 nr, + int nmi, struct pt_regs *regs) +{ + struct perf_counter *counter; + unsigned long flags; + int neg; + + if (list_empty(&ctx->counter_list)) + return; + + spin_lock_irqsave(&ctx->lock, flags); + + /* + * XXX: make counter_list RCU safe + */ + list_for_each_entry(counter, &ctx->counter_list, list_entry) { + if (perf_swcounter_match(counter, event, regs)) { + neg = atomic64_add_negative(nr, &counter->hw.count); + if (counter->hw.irq_period && !neg) + perf_swcounter_interrupt(counter, nmi, regs); + } + } + + spin_unlock_irqrestore(&ctx->lock, flags); +} + +void perf_swcounter_event(enum hw_event_types event, u64 nr, + int nmi, struct pt_regs *regs) +{ + struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); + + perf_swcounter_ctx_event(&cpuctx->ctx, event, nr, nmi, regs); + if (cpuctx->task_ctx) + perf_swcounter_ctx_event(cpuctx->task_ctx, event, nr, nmi, regs); + + put_cpu_var(perf_cpu_context); +} + +static void perf_swcounter_read(struct perf_counter *counter) +{ + perf_swcounter_update(counter); +} + +static int perf_swcounter_enable(struct perf_counter *counter) +{ + perf_swcounter_set_period(counter); + return 0; +} + +static void perf_swcounter_disable(struct perf_counter *counter) +{ + perf_swcounter_update(counter); +} + +/* + * Software counter: cpu wall time clock + */ + static int cpu_clock_perf_counter_enable(struct perf_counter *counter) { int cpu = raw_smp_processor_id(); @@ -1365,6 +1544,10 @@ static const struct hw_perf_counter_ops perf_ops_cpu_clock = { }; /* + * Software counter: task time clock + */ + +/* * Called from within the scheduler: */ static u64 task_clock_perf_counter_val(struct perf_counter *counter, int update) @@ -1420,6 +1603,10 @@ static const struct hw_perf_counter_ops perf_ops_task_clock = { .read = task_clock_perf_counter_read, }; +/* + * Software counter: page faults + */ + #ifdef CONFIG_VM_EVENT_COUNTERS #define cpu_page_faults() __get_cpu_var(vm_event_states).event[PGFAULT] #else @@ -1473,6 +1660,10 @@ static const struct hw_perf_counter_ops perf_ops_page_faults = { .read = page_faults_perf_counter_read, }; +/* + * Software counter: context switches + */ + static u64 get_context_switches(struct perf_counter *counter) { struct task_struct *curr = counter->ctx->task; @@ -1521,6 +1712,10 @@ static const struct hw_perf_counter_ops perf_ops_context_switches = { .read = context_switches_perf_counter_read, }; +/* + * Software counter: cpu migrations + */ + static inline u64 get_cpu_migrations(struct perf_counter *counter) { struct task_struct *curr = counter->ctx->task; @@ -1572,7 +1767,9 @@ static const struct hw_perf_counter_ops perf_ops_cpu_migrations = { static const struct hw_perf_counter_ops * sw_perf_counter_init(struct perf_counter *counter) { + struct perf_counter_hw_event *hw_event = &counter->hw_event; const struct hw_perf_counter_ops *hw_ops = NULL; + struct hw_perf_counter *hwc = &counter->hw; /* * Software counters (currently) can't in general distinguish @@ -1618,6 +1815,10 @@ sw_perf_counter_init(struct perf_counter *counter) default: break; } + + if (hw_ops) + hwc->irq_period = hw_event->irq_period; + return hw_ops; } -- cgit v0.10.2 From 7dd1fcc258b65da718f01e4684a7b9244501a9fb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 13 Mar 2009 12:21:33 +0100 Subject: perf_counter: provide pagefault software events We use the generic software counter infrastructure to provide page fault events. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 7699394..eda5b0c 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -170,6 +171,8 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, die("Weird page fault", regs, SIGSEGV); } + perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs); + /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the * kernel and should generate an OOPS. Unfortunately, in the case of an diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index a03b727..c872575 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -1044,6 +1045,8 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) if (unlikely(error_code & PF_RSVD)) pgtable_bad(regs, error_code, address); + perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs); + /* * If we're in an interrupt, have no user context or are running * in an atomic region then we must not take the fault: diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index eeb1b46..1773c5d 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1607,57 +1607,10 @@ static const struct hw_perf_counter_ops perf_ops_task_clock = { * Software counter: page faults */ -#ifdef CONFIG_VM_EVENT_COUNTERS -#define cpu_page_faults() __get_cpu_var(vm_event_states).event[PGFAULT] -#else -#define cpu_page_faults() 0 -#endif - -static u64 get_page_faults(struct perf_counter *counter) -{ - struct task_struct *curr = counter->ctx->task; - - if (curr) - return curr->maj_flt + curr->min_flt; - return cpu_page_faults(); -} - -static void page_faults_perf_counter_update(struct perf_counter *counter) -{ - u64 prev, now; - s64 delta; - - prev = atomic64_read(&counter->hw.prev_count); - now = get_page_faults(counter); - - atomic64_set(&counter->hw.prev_count, now); - - delta = now - prev; - - atomic64_add(delta, &counter->count); -} - -static void page_faults_perf_counter_read(struct perf_counter *counter) -{ - page_faults_perf_counter_update(counter); -} - -static int page_faults_perf_counter_enable(struct perf_counter *counter) -{ - if (counter->prev_state <= PERF_COUNTER_STATE_OFF) - atomic64_set(&counter->hw.prev_count, get_page_faults(counter)); - return 0; -} - -static void page_faults_perf_counter_disable(struct perf_counter *counter) -{ - page_faults_perf_counter_update(counter); -} - static const struct hw_perf_counter_ops perf_ops_page_faults = { - .enable = page_faults_perf_counter_enable, - .disable = page_faults_perf_counter_disable, - .read = page_faults_perf_counter_read, + .enable = perf_swcounter_enable, + .disable = perf_swcounter_disable, + .read = perf_swcounter_read, }; /* -- cgit v0.10.2 From ac17dc8e58f3069ea895cfff963adf98ff3cf6b2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 13 Mar 2009 12:21:34 +0100 Subject: perf_counter: provide major/minor page fault software events Provide separate sw counters for major and minor page faults. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index eda5b0c..17bbf6f 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -312,6 +312,7 @@ good_area: } if (ret & VM_FAULT_MAJOR) { current->maj_flt++; + perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, regs); #ifdef CONFIG_PPC_SMLPAR if (firmware_has_feature(FW_FEATURE_CMO)) { preempt_disable(); @@ -319,8 +320,10 @@ good_area: preempt_enable(); } #endif - } else + } else { current->min_flt++; + perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, regs); + } up_read(&mm->mmap_sem); return 0; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index c872575..f2d3324 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1140,10 +1140,13 @@ good_area: return; } - if (fault & VM_FAULT_MAJOR) + if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; - else + perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, regs); + } else { tsk->min_flt++; + perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, regs); + } check_v8086_mode(regs, address, tsk); diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 3fefc3b..4b14a8e 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -49,8 +49,10 @@ enum hw_event_types { PERF_COUNT_PAGE_FAULTS = -3, PERF_COUNT_CONTEXT_SWITCHES = -4, PERF_COUNT_CPU_MIGRATIONS = -5, + PERF_COUNT_PAGE_FAULTS_MIN = -6, + PERF_COUNT_PAGE_FAULTS_MAJ = -7, - PERF_SW_EVENTS_MIN = -6, + PERF_SW_EVENTS_MIN = -8, }; /* diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 1773c5d..68950a3 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1503,6 +1503,12 @@ static void perf_swcounter_disable(struct perf_counter *counter) perf_swcounter_update(counter); } +static const struct hw_perf_counter_ops perf_ops_generic = { + .enable = perf_swcounter_enable, + .disable = perf_swcounter_disable, + .read = perf_swcounter_read, +}; + /* * Software counter: cpu wall time clock */ @@ -1604,16 +1610,6 @@ static const struct hw_perf_counter_ops perf_ops_task_clock = { }; /* - * Software counter: page faults - */ - -static const struct hw_perf_counter_ops perf_ops_page_faults = { - .enable = perf_swcounter_enable, - .disable = perf_swcounter_disable, - .read = perf_swcounter_read, -}; - -/* * Software counter: context switches */ @@ -1753,9 +1749,9 @@ sw_perf_counter_init(struct perf_counter *counter) hw_ops = &perf_ops_cpu_clock; break; case PERF_COUNT_PAGE_FAULTS: - if (!(counter->hw_event.exclude_user || - counter->hw_event.exclude_kernel)) - hw_ops = &perf_ops_page_faults; + case PERF_COUNT_PAGE_FAULTS_MIN: + case PERF_COUNT_PAGE_FAULTS_MAJ: + hw_ops = &perf_ops_generic; break; case PERF_COUNT_CONTEXT_SWITCHES: if (!counter->hw_event.exclude_kernel) -- cgit v0.10.2 From d6d020e9957745c61285ef3da9f294c5e6801f0f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 13 Mar 2009 12:21:35 +0100 Subject: perf_counter: hrtimer based sampling for software time events Use hrtimers to profile timer based sampling for the software time counters. This allows platforms without hardware counter support to still perform sample based profiling. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 4b14a8e..dfb4c7c 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -114,6 +114,7 @@ struct perf_counter_hw_event { #include #include #include +#include #include struct task_struct; @@ -123,12 +124,19 @@ struct task_struct; */ struct hw_perf_counter { #ifdef CONFIG_PERF_COUNTERS - u64 config; - unsigned long config_base; - unsigned long counter_base; - int nmi; - unsigned int idx; - atomic64_t count; /* software */ + union { + struct { /* hardware */ + u64 config; + unsigned long config_base; + unsigned long counter_base; + int nmi; + unsigned int idx; + }; + union { /* software */ + atomic64_t count; + struct hrtimer hrtimer; + }; + }; atomic64_t prev_count; u64 irq_period; atomic64_t period_left; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 68950a3..f9330d5 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1395,7 +1395,7 @@ static void perf_swcounter_handle_group(struct perf_counter *sibling) struct perf_counter *counter, *group_leader = sibling->group_leader; list_for_each_entry(counter, &group_leader->sibling_list, list_entry) { - perf_swcounter_update(counter); + counter->hw_ops->read(counter); perf_swcounter_store_irq(sibling, counter->hw_event.type); perf_swcounter_store_irq(sibling, atomic64_read(&counter->count)); } @@ -1404,8 +1404,6 @@ static void perf_swcounter_handle_group(struct perf_counter *sibling) static void perf_swcounter_interrupt(struct perf_counter *counter, int nmi, struct pt_regs *regs) { - perf_swcounter_save_and_restart(counter); - switch (counter->hw_event.record_type) { case PERF_RECORD_SIMPLE: break; @@ -1426,6 +1424,38 @@ static void perf_swcounter_interrupt(struct perf_counter *counter, wake_up(&counter->waitq); } +static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) +{ + struct perf_counter *counter; + struct pt_regs *regs; + + counter = container_of(hrtimer, struct perf_counter, hw.hrtimer); + counter->hw_ops->read(counter); + + regs = get_irq_regs(); + /* + * In case we exclude kernel IPs or are somehow not in interrupt + * context, provide the next best thing, the user IP. + */ + if ((counter->hw_event.exclude_kernel || !regs) && + !counter->hw_event.exclude_user) + regs = task_pt_regs(current); + + if (regs) + perf_swcounter_interrupt(counter, 0, regs); + + hrtimer_forward_now(hrtimer, ns_to_ktime(counter->hw.irq_period)); + + return HRTIMER_RESTART; +} + +static void perf_swcounter_overflow(struct perf_counter *counter, + int nmi, struct pt_regs *regs) +{ + perf_swcounter_save_and_restart(counter); + perf_swcounter_interrupt(counter, nmi, regs); +} + static int perf_swcounter_match(struct perf_counter *counter, enum hw_event_types event, struct pt_regs *regs) @@ -1448,13 +1478,20 @@ static int perf_swcounter_match(struct perf_counter *counter, return 1; } +static void perf_swcounter_add(struct perf_counter *counter, u64 nr, + int nmi, struct pt_regs *regs) +{ + int neg = atomic64_add_negative(nr, &counter->hw.count); + if (counter->hw.irq_period && !neg) + perf_swcounter_overflow(counter, nmi, regs); +} + static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, enum hw_event_types event, u64 nr, int nmi, struct pt_regs *regs) { struct perf_counter *counter; unsigned long flags; - int neg; if (list_empty(&ctx->counter_list)) return; @@ -1465,11 +1502,8 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, * XXX: make counter_list RCU safe */ list_for_each_entry(counter, &ctx->counter_list, list_entry) { - if (perf_swcounter_match(counter, event, regs)) { - neg = atomic64_add_negative(nr, &counter->hw.count); - if (counter->hw.irq_period && !neg) - perf_swcounter_interrupt(counter, nmi, regs); - } + if (perf_swcounter_match(counter, event, regs)) + perf_swcounter_add(counter, nr, nmi, regs); } spin_unlock_irqrestore(&ctx->lock, flags); @@ -1513,14 +1547,6 @@ static const struct hw_perf_counter_ops perf_ops_generic = { * Software counter: cpu wall time clock */ -static int cpu_clock_perf_counter_enable(struct perf_counter *counter) -{ - int cpu = raw_smp_processor_id(); - - atomic64_set(&counter->hw.prev_count, cpu_clock(cpu)); - return 0; -} - static void cpu_clock_perf_counter_update(struct perf_counter *counter) { int cpu = raw_smp_processor_id(); @@ -1533,8 +1559,26 @@ static void cpu_clock_perf_counter_update(struct perf_counter *counter) atomic64_add(now - prev, &counter->count); } +static int cpu_clock_perf_counter_enable(struct perf_counter *counter) +{ + struct hw_perf_counter *hwc = &counter->hw; + int cpu = raw_smp_processor_id(); + + atomic64_set(&hwc->prev_count, cpu_clock(cpu)); + if (hwc->irq_period) { + hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hwc->hrtimer.function = perf_swcounter_hrtimer; + __hrtimer_start_range_ns(&hwc->hrtimer, + ns_to_ktime(hwc->irq_period), 0, + HRTIMER_MODE_REL, 0); + } + + return 0; +} + static void cpu_clock_perf_counter_disable(struct perf_counter *counter) { + hrtimer_cancel(&counter->hw.hrtimer); cpu_clock_perf_counter_update(counter); } @@ -1580,27 +1624,33 @@ static void task_clock_perf_counter_update(struct perf_counter *counter, u64 now atomic64_add(delta, &counter->count); } -static void task_clock_perf_counter_read(struct perf_counter *counter) -{ - u64 now = task_clock_perf_counter_val(counter, 1); - - task_clock_perf_counter_update(counter, now); -} - static int task_clock_perf_counter_enable(struct perf_counter *counter) { - if (counter->prev_state <= PERF_COUNTER_STATE_OFF) - atomic64_set(&counter->hw.prev_count, - task_clock_perf_counter_val(counter, 0)); + struct hw_perf_counter *hwc = &counter->hw; + + atomic64_set(&hwc->prev_count, task_clock_perf_counter_val(counter, 0)); + if (hwc->irq_period) { + hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hwc->hrtimer.function = perf_swcounter_hrtimer; + __hrtimer_start_range_ns(&hwc->hrtimer, + ns_to_ktime(hwc->irq_period), 0, + HRTIMER_MODE_REL, 0); + } return 0; } static void task_clock_perf_counter_disable(struct perf_counter *counter) { - u64 now = task_clock_perf_counter_val(counter, 0); + hrtimer_cancel(&counter->hw.hrtimer); + task_clock_perf_counter_update(counter, + task_clock_perf_counter_val(counter, 0)); +} - task_clock_perf_counter_update(counter, now); +static void task_clock_perf_counter_read(struct perf_counter *counter) +{ + task_clock_perf_counter_update(counter, + task_clock_perf_counter_val(counter, 1)); } static const struct hw_perf_counter_ops perf_ops_task_clock = { @@ -1729,16 +1779,12 @@ sw_perf_counter_init(struct perf_counter *counter) */ switch (counter->hw_event.type) { case PERF_COUNT_CPU_CLOCK: - if (!(counter->hw_event.exclude_user || - counter->hw_event.exclude_kernel || - counter->hw_event.exclude_hv)) - hw_ops = &perf_ops_cpu_clock; + hw_ops = &perf_ops_cpu_clock; + + if (hw_event->irq_period && hw_event->irq_period < 10000) + hw_event->irq_period = 10000; break; case PERF_COUNT_TASK_CLOCK: - if (counter->hw_event.exclude_user || - counter->hw_event.exclude_kernel || - counter->hw_event.exclude_hv) - break; /* * If the user instantiates this as a per-cpu counter, * use the cpu_clock counter instead. @@ -1747,6 +1793,9 @@ sw_perf_counter_init(struct perf_counter *counter) hw_ops = &perf_ops_task_clock; else hw_ops = &perf_ops_cpu_clock; + + if (hw_event->irq_period && hw_event->irq_period < 10000) + hw_event->irq_period = 10000; break; case PERF_COUNT_PAGE_FAULTS: case PERF_COUNT_PAGE_FAULTS_MIN: -- cgit v0.10.2 From 592903cdcbf606a838056bae6d03fc557806c914 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 13 Mar 2009 12:21:36 +0100 Subject: perf_counter: add an event_list I noticed that the counter_list only includes top-level counters, thus perf_swcounter_event() will miss sw-counters in groups. Since perf_swcounter_event() also wants an RCU safe list, create a new event_list that includes all counters and uses RCU list ops and use call_rcu to free the counter structure. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index dfb4c7c..08c11a6 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -187,6 +187,7 @@ struct file; struct perf_counter { #ifdef CONFIG_PERF_COUNTERS struct list_head list_entry; + struct list_head event_entry; struct list_head sibling_list; struct perf_counter *group_leader; const struct hw_perf_counter_ops *hw_ops; @@ -220,6 +221,8 @@ struct perf_counter { struct perf_data *irqdata; struct perf_data *usrdata; struct perf_data data[2]; + + struct rcu_head rcu_head; #endif }; @@ -243,6 +246,7 @@ struct perf_counter_context { struct mutex mutex; struct list_head counter_list; + struct list_head event_list; int nr_counters; int nr_active; int is_active; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index f9330d5..8d6ecfa 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -22,6 +22,7 @@ #include #include #include +#include /* * Each CPU has a list of per CPU counters: @@ -72,6 +73,8 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx) list_add_tail(&counter->list_entry, &ctx->counter_list); else list_add_tail(&counter->list_entry, &group_leader->sibling_list); + + list_add_rcu(&counter->event_entry, &ctx->event_list); } static void @@ -80,6 +83,7 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx) struct perf_counter *sibling, *tmp; list_del_init(&counter->list_entry); + list_del_rcu(&counter->event_entry); /* * If this was a group counter with sibling counters then @@ -1133,6 +1137,14 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu) return ctx; } +static void free_counter_rcu(struct rcu_head *head) +{ + struct perf_counter *counter; + + counter = container_of(head, struct perf_counter, rcu_head); + kfree(counter); +} + /* * Called when the last reference to the file is gone. */ @@ -1151,7 +1163,7 @@ static int perf_release(struct inode *inode, struct file *file) mutex_unlock(&counter->mutex); mutex_unlock(&ctx->mutex); - kfree(counter); + call_rcu(&counter->rcu_head, free_counter_rcu); put_context(ctx); return 0; @@ -1491,22 +1503,16 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, int nmi, struct pt_regs *regs) { struct perf_counter *counter; - unsigned long flags; - if (list_empty(&ctx->counter_list)) + if (list_empty(&ctx->event_list)) return; - spin_lock_irqsave(&ctx->lock, flags); - - /* - * XXX: make counter_list RCU safe - */ - list_for_each_entry(counter, &ctx->counter_list, list_entry) { + rcu_read_lock(); + list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { if (perf_swcounter_match(counter, event, regs)) perf_swcounter_add(counter, nr, nmi, regs); } - - spin_unlock_irqrestore(&ctx->lock, flags); + rcu_read_unlock(); } void perf_swcounter_event(enum hw_event_types event, u64 nr, @@ -1846,6 +1852,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, mutex_init(&counter->mutex); INIT_LIST_HEAD(&counter->list_entry); + INIT_LIST_HEAD(&counter->event_entry); INIT_LIST_HEAD(&counter->sibling_list); init_waitqueue_head(&counter->waitq); @@ -1992,6 +1999,7 @@ __perf_counter_init_context(struct perf_counter_context *ctx, spin_lock_init(&ctx->lock); mutex_init(&ctx->mutex); INIT_LIST_HEAD(&ctx->counter_list); + INIT_LIST_HEAD(&ctx->event_list); ctx->task = task; } -- cgit v0.10.2 From 039fc91e064b81c2820ff16c304be5aba35fd126 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 13 Mar 2009 16:43:47 +0100 Subject: perf_counter: fix hrtimer sampling Impact: fix deadlock with perfstat Fix for the perfstat fubar.. We cannot unconditionally call hrtimer_cancel() without ever having done hrtimer_init() on the thing. Signed-off-by: Peter Zijlstra Orig-LKML-Reference: <1236959027.22447.149.camel@twins> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 8d6ecfa..d6cc222 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1571,9 +1571,9 @@ static int cpu_clock_perf_counter_enable(struct perf_counter *counter) int cpu = raw_smp_processor_id(); atomic64_set(&hwc->prev_count, cpu_clock(cpu)); + hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hwc->hrtimer.function = perf_swcounter_hrtimer; if (hwc->irq_period) { - hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - hwc->hrtimer.function = perf_swcounter_hrtimer; __hrtimer_start_range_ns(&hwc->hrtimer, ns_to_ktime(hwc->irq_period), 0, HRTIMER_MODE_REL, 0); @@ -1635,9 +1635,9 @@ static int task_clock_perf_counter_enable(struct perf_counter *counter) struct hw_perf_counter *hwc = &counter->hw; atomic64_set(&hwc->prev_count, task_clock_perf_counter_val(counter, 0)); + hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hwc->hrtimer.function = perf_swcounter_hrtimer; if (hwc->irq_period) { - hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - hwc->hrtimer.function = perf_swcounter_hrtimer; __hrtimer_start_range_ns(&hwc->hrtimer, ns_to_ktime(hwc->irq_period), 0, HRTIMER_MODE_REL, 0); -- cgit v0.10.2 From 4e193bd4dfdc983d12969b51439b4a1fbaf2daad Mon Sep 17 00:00:00 2001 From: Tim Blechmann Date: Sat, 14 Mar 2009 14:29:25 +0100 Subject: perf_counter: include missing header Impact: build fix In order to compile a kernel with performance counter patches, has to be included to provide the declaration of struct pt_regs *get_irq_regs(void); [ This bug was masked by unrelated x86 header file changes in the x86 tree, but occurs in the tip:perfcounters/core standalone tree. ] Signed-off-by: Tim Blechmann Orig-LKML-Reference: <20090314142925.49c29c17@thinkpad> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index d6cc222..0018c5e 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -24,6 +24,8 @@ #include #include +#include + /* * Each CPU has a list of per CPU counters: */ -- cgit v0.10.2 From 7bb497bd885eedd0f56dfe3cc1b5ff20710d33b9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 18 Mar 2009 08:59:21 +0100 Subject: perf_counter: fix crash on perfmon v1 systems Impact: fix boot crash on Intel Perfmon Version 1 systems Intel Perfmon v1 does not support the global MSRs, nor does it offer the generalized MSR ranges. So support v2 and later CPUs only. Also mark pmc_ops as read-mostly - to avoid false cacheline sharing. Cc: Paul Mackerras Cc: Peter Zijlstra Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 22dab06..6cba9d4 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -57,12 +57,14 @@ struct pmc_x86_ops { int max_events; }; -static struct pmc_x86_ops *pmc_ops; +static struct pmc_x86_ops *pmc_ops __read_mostly; static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = { .enabled = 1, }; +static __read_mostly int intel_perfmon_version; + /* * Intel PerfMon v3. Used on Core2 and later. */ @@ -613,7 +615,7 @@ void perf_counter_print_debug(void) cpu = smp_processor_id(); cpuc = &per_cpu(cpu_hw_counters, cpu); - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { + if (intel_perfmon_version >= 2) { rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); @@ -930,10 +932,10 @@ static struct pmc_x86_ops pmc_amd_ops = { static struct pmc_x86_ops *pmc_intel_init(void) { + union cpuid10_edx edx; union cpuid10_eax eax; - unsigned int ebx; unsigned int unused; - union cpuid10_edx edx; + unsigned int ebx; /* * Check whether the Architectural PerfMon supports @@ -943,8 +945,12 @@ static struct pmc_x86_ops *pmc_intel_init(void) if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) return NULL; + intel_perfmon_version = eax.split.version_id; + if (intel_perfmon_version < 2) + return NULL; + pr_info("Intel Performance Monitoring support detected.\n"); - pr_info("... version: %d\n", eax.split.version_id); + pr_info("... version: %d\n", intel_perfmon_version); pr_info("... bit width: %d\n", eax.split.bit_width); pr_info("... mask length: %d\n", eax.split.mask_length); -- cgit v0.10.2 From b6c5a71da1477d261bc36254fe1f20d32b57598d Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 16 Mar 2009 21:00:00 +1100 Subject: perf_counter: abstract wakeup flag setting in core to fix powerpc build Impact: build fix for powerpc Commit bd753921015e7905 ("perf_counter: software counter event infrastructure") introduced a use of TIF_PERF_COUNTERS into the core perfcounter code. This breaks the build on powerpc because we use a flag in a per-cpu area to signal wakeups on powerpc rather than a thread_info flag, because the thread_info flags have to be manipulated with atomic operations and are thus slower than per-cpu flags. This fixes the by changing the core to use an abstracted set_perf_counter_pending() function, which is defined on x86 to set the TIF_PERF_COUNTERS flag and on powerpc to set the per-cpu flag (paca->perf_counter_pending). It changes the previous powerpc definition of set_perf_counter_pending to not take an argument and adds a clear_perf_counter_pending, so as to simplify the definition on x86. On x86, set_perf_counter_pending() is defined as a macro. Defining it as a static inline in arch/x86/include/asm/perf_counters.h causes compile failures because gets included early in , and the definitions of set_tsk_thread_flag etc. are therefore not available in . (On powerpc this problem is avoided by defining set_perf_counter_pending etc. in .) Signed-off-by: Paul Mackerras diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index b43076f..cb32d57 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -142,10 +142,17 @@ static inline unsigned long get_perf_counter_pending(void) return x; } -static inline void set_perf_counter_pending(int x) +static inline void set_perf_counter_pending(void) { asm volatile("stb %0,%1(13)" : : - "r" (x), + "r" (1), + "i" (offsetof(struct paca_struct, perf_counter_pending))); +} + +static inline void clear_perf_counter_pending(void) +{ + asm volatile("stb %0,%1(13)" : : + "r" (0), "i" (offsetof(struct paca_struct, perf_counter_pending))); } @@ -158,7 +165,8 @@ static inline unsigned long get_perf_counter_pending(void) return 0; } -static inline void set_perf_counter_pending(int x) {} +static inline void set_perf_counter_pending(void) {} +static inline void clear_perf_counter_pending(void) {} static inline void perf_counter_do_pending(void) {} #endif /* CONFIG_PERF_COUNTERS */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 0d2e37c..469e963 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -104,13 +104,6 @@ static inline notrace void set_soft_enabled(unsigned long enable) : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled))); } -#ifdef CONFIG_PERF_COUNTERS -notrace void __weak perf_counter_do_pending(void) -{ - set_perf_counter_pending(0); -} -#endif - notrace void raw_local_irq_restore(unsigned long en) { /* @@ -142,8 +135,10 @@ notrace void raw_local_irq_restore(unsigned long en) iseries_handle_interrupts(); } - if (get_perf_counter_pending()) + if (get_perf_counter_pending()) { + clear_perf_counter_pending(); perf_counter_do_pending(); + } /* * if (get_paca()->hard_enabled) return; diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 0e33d27..5008762 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -653,7 +653,6 @@ void perf_counter_do_pending(void) struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters); struct perf_counter *counter; - set_perf_counter_pending(0); for (i = 0; i < cpuhw->n_counters; ++i) { counter = cpuhw->counter[i]; if (counter && counter->wakeup_pending) { @@ -811,7 +810,7 @@ static void perf_counter_interrupt(struct pt_regs *regs) perf_counter_do_pending(); irq_exit(); } else { - set_perf_counter_pending(1); + set_perf_counter_pending(); } } } diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index 2e08ed7..1662043 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -84,6 +84,9 @@ union cpuid10_edx { #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) +#define set_perf_counter_pending() \ + set_tsk_thread_flag(current, TIF_PERF_COUNTERS); + #ifdef CONFIG_PERF_COUNTERS extern void init_hw_perf_counters(void); extern void perf_counters_lapic_init(int nmi); diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 0018c5e..b39456a 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1433,7 +1433,7 @@ static void perf_swcounter_interrupt(struct perf_counter *counter, if (nmi) { counter->wakeup_pending = 1; - set_tsk_thread_flag(current, TIF_PERF_COUNTERS); + set_perf_counter_pending(); } else wake_up(&counter->waitq); } -- cgit v0.10.2 From 01ef09d9ffb5ce9f8d62d1e5206da3d5ca612acc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Mar 2009 20:26:11 +0100 Subject: perf_counter: fix uninitialized usage of event_list Impact: fix boot crash When doing the generic context switch event I ran into some early boot hangs, which were caused by inf func recursion (event, fault, event, fault). I eventually tracked it down to event_list not being initialized at the time of the first event. Fix this. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Steven Rostedt Orig-LKML-Reference: <20090319194233.195392657@chello.nl> Signed-off-by: Ingo Molnar diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 219748d..ca226a9 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -124,6 +124,8 @@ extern struct cred init_cred; # define INIT_PERF_COUNTERS(tsk) \ .perf_counter_ctx.counter_list = \ LIST_HEAD_INIT(tsk.perf_counter_ctx.counter_list), \ + .perf_counter_ctx.event_list = \ + LIST_HEAD_INIT(tsk.perf_counter_ctx.event_list), \ .perf_counter_ctx.lock = \ __SPIN_LOCK_UNLOCKED(tsk.perf_counter_ctx.lock), #else diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index b39456a..4c4e9eb 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1506,7 +1506,7 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, { struct perf_counter *counter; - if (list_empty(&ctx->event_list)) + if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) return; rcu_read_lock(); -- cgit v0.10.2 From 4a0deca657f3dbb8a707b5dc8f173beec01e7ed2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Mar 2009 20:26:12 +0100 Subject: perf_counter: generic context switch event Impact: cleanup Use the generic software events for context switches. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Steven Rostedt Orig-LKML-Reference: <20090319194233.283522645@chello.nl> Signed-off-by: Ingo Molnar diff --git a/include/linux/sched.h b/include/linux/sched.h index 75b2fc5..7ed41f7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -138,7 +138,6 @@ extern unsigned long nr_running(void); extern unsigned long nr_uninterruptible(void); extern unsigned long nr_active(void); extern unsigned long nr_iowait(void); -extern u64 cpu_nr_switches(int cpu); extern u64 cpu_nr_migrations(int cpu); extern unsigned long get_parent_ip(unsigned long addr); diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 4c4e9eb..99d5930 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -710,10 +710,13 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu) { struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); struct perf_counter_context *ctx = &task->perf_counter_ctx; + struct pt_regs *regs; if (likely(!cpuctx->task_ctx)) return; + regs = task_pt_regs(task); + perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs); __perf_counter_sched_out(ctx, cpuctx); cpuctx->task_ctx = NULL; @@ -1668,58 +1671,6 @@ static const struct hw_perf_counter_ops perf_ops_task_clock = { }; /* - * Software counter: context switches - */ - -static u64 get_context_switches(struct perf_counter *counter) -{ - struct task_struct *curr = counter->ctx->task; - - if (curr) - return curr->nvcsw + curr->nivcsw; - return cpu_nr_switches(smp_processor_id()); -} - -static void context_switches_perf_counter_update(struct perf_counter *counter) -{ - u64 prev, now; - s64 delta; - - prev = atomic64_read(&counter->hw.prev_count); - now = get_context_switches(counter); - - atomic64_set(&counter->hw.prev_count, now); - - delta = now - prev; - - atomic64_add(delta, &counter->count); -} - -static void context_switches_perf_counter_read(struct perf_counter *counter) -{ - context_switches_perf_counter_update(counter); -} - -static int context_switches_perf_counter_enable(struct perf_counter *counter) -{ - if (counter->prev_state <= PERF_COUNTER_STATE_OFF) - atomic64_set(&counter->hw.prev_count, - get_context_switches(counter)); - return 0; -} - -static void context_switches_perf_counter_disable(struct perf_counter *counter) -{ - context_switches_perf_counter_update(counter); -} - -static const struct hw_perf_counter_ops perf_ops_context_switches = { - .enable = context_switches_perf_counter_enable, - .disable = context_switches_perf_counter_disable, - .read = context_switches_perf_counter_read, -}; - -/* * Software counter: cpu migrations */ @@ -1808,11 +1759,8 @@ sw_perf_counter_init(struct perf_counter *counter) case PERF_COUNT_PAGE_FAULTS: case PERF_COUNT_PAGE_FAULTS_MIN: case PERF_COUNT_PAGE_FAULTS_MAJ: - hw_ops = &perf_ops_generic; - break; case PERF_COUNT_CONTEXT_SWITCHES: - if (!counter->hw_event.exclude_kernel) - hw_ops = &perf_ops_context_switches; + hw_ops = &perf_ops_generic; break; case PERF_COUNT_CPU_MIGRATIONS: if (!counter->hw_event.exclude_kernel) diff --git a/kernel/sched.c b/kernel/sched.c index 39e7086..f76e3c0 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2900,14 +2900,8 @@ unsigned long nr_active(void) /* * Externally visible per-cpu scheduler statistics: - * cpu_nr_switches(cpu) - number of context switches on that cpu * cpu_nr_migrations(cpu) - number of migrations into that cpu */ -u64 cpu_nr_switches(int cpu) -{ - return cpu_rq(cpu)->nr_switches; -} - u64 cpu_nr_migrations(int cpu) { return cpu_rq(cpu)->nr_migrations_in; -- cgit v0.10.2 From f16009527595ee562308653bc3d0039166d2ab15 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Mar 2009 20:26:16 +0100 Subject: perf_counter: fix up counter free paths Impact: fix crash during perfcounters use I found another counter free path, create a free_counter() call to accomodate generic tear-down. Fixes an RCU bug. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Steven Rostedt Orig-LKML-Reference: <20090319194233.652078652@chello.nl> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 99d5930..97f891f 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1150,6 +1150,11 @@ static void free_counter_rcu(struct rcu_head *head) kfree(counter); } +static void free_counter(struct perf_counter *counter) +{ + call_rcu(&counter->rcu_head, free_counter_rcu); +} + /* * Called when the last reference to the file is gone. */ @@ -1168,7 +1173,7 @@ static int perf_release(struct inode *inode, struct file *file) mutex_unlock(&counter->mutex); mutex_unlock(&ctx->mutex); - call_rcu(&counter->rcu_head, free_counter_rcu); + free_counter(counter); put_context(ctx); return 0; @@ -2128,10 +2133,10 @@ __perf_counter_exit_task(struct task_struct *child, list_entry) { if (sub->parent) { sync_child_counter(sub, sub->parent); - kfree(sub); + free_counter(sub); } } - kfree(child_counter); + free_counter(child_counter); } } -- cgit v0.10.2 From e077df4f439681e43f0db8255b2d215b342ebdc6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Mar 2009 20:26:17 +0100 Subject: perf_counter: hook up the tracepoint events Impact: new perfcounters feature Enable usage of tracepoints as perf counter events. tracepoint event ids can be found in /debug/tracing/event/*/*/id and (for now) are represented as -65536+id in the type field. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Steven Rostedt Orig-LKML-Reference: <20090319194233.744044174@chello.nl> Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 08c11a6..065984c 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -53,6 +53,8 @@ enum hw_event_types { PERF_COUNT_PAGE_FAULTS_MAJ = -7, PERF_SW_EVENTS_MIN = -8, + + PERF_TP_EVENTS_MIN = -65536 }; /* @@ -222,6 +224,7 @@ struct perf_counter { struct perf_data *usrdata; struct perf_data data[2]; + void (*destroy)(struct perf_counter *); struct rcu_head rcu_head; #endif }; diff --git a/init/Kconfig b/init/Kconfig index 38a2ecd..4f64714 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -947,6 +947,11 @@ config PERF_COUNTERS Say Y if unsure. +config EVENT_PROFILE + bool "Tracepoint profile sources" + depends on PERF_COUNTERS && EVENT_TRACER + default y + endmenu config VM_EVENT_COUNTERS diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 97f891f..0bbe3e4 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1152,6 +1152,9 @@ static void free_counter_rcu(struct rcu_head *head) static void free_counter(struct perf_counter *counter) { + if (counter->destroy) + counter->destroy(counter); + call_rcu(&counter->rcu_head, free_counter_rcu); } @@ -1727,6 +1730,45 @@ static const struct hw_perf_counter_ops perf_ops_cpu_migrations = { .read = cpu_migrations_perf_counter_read, }; +#ifdef CONFIG_EVENT_PROFILE +void perf_tpcounter_event(int event_id) +{ + perf_swcounter_event(PERF_TP_EVENTS_MIN + event_id, 1, 1, + task_pt_regs(current)); +} + +extern int ftrace_profile_enable(int); +extern void ftrace_profile_disable(int); + +static void tp_perf_counter_destroy(struct perf_counter *counter) +{ + int event_id = counter->hw_event.type - PERF_TP_EVENTS_MIN; + + ftrace_profile_disable(event_id); +} + +static const struct hw_perf_counter_ops * +tp_perf_counter_init(struct perf_counter *counter) +{ + int event_id = counter->hw_event.type - PERF_TP_EVENTS_MIN; + int ret; + + ret = ftrace_profile_enable(event_id); + if (ret) + return NULL; + + counter->destroy = tp_perf_counter_destroy; + + return &perf_ops_generic; +} +#else +static const struct hw_perf_counter_ops * +tp_perf_counter_init(struct perf_counter *counter) +{ + return NULL; +} +#endif + static const struct hw_perf_counter_ops * sw_perf_counter_init(struct perf_counter *counter) { @@ -1772,6 +1814,7 @@ sw_perf_counter_init(struct perf_counter *counter) hw_ops = &perf_ops_cpu_migrations; break; default: + hw_ops = tp_perf_counter_init(counter); break; } -- cgit v0.10.2 From b8e83514b64577b48bfb794fe85fcde40a9343ca Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Mar 2009 20:26:18 +0100 Subject: perf_counter: revamp syscall input ABI Impact: modify ABI The hardware/software classification in hw_event->type became a little strained due to the addition of tracepoint tracing. Instead split up the field and provide a type field to explicitly specify the counter type, while using the event_id field to specify which event to use. Raw counters still work as before, only the raw config now goes into raw_event. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Steven Rostedt Orig-LKML-Reference: <20090319194233.836807573@chello.nl> Signed-off-by: Ingo Molnar diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 5008762..26f69dc 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -602,7 +602,7 @@ hw_perf_counter_init(struct perf_counter *counter) return NULL; if ((s64)counter->hw_event.irq_period < 0) return NULL; - ev = counter->hw_event.type; + ev = counter->hw_event.event_id; if (!counter->hw_event.raw) { if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) @@ -692,7 +692,7 @@ static void perf_handle_group(struct perf_counter *counter) list_for_each_entry(sub, &leader->sibling_list, list_entry) { if (sub != counter) sub->hw_ops->read(sub); - perf_store_irq_data(counter, sub->hw_event.type); + perf_store_irq_data(counter, sub->hw_event.event_config); perf_store_irq_data(counter, atomic64_read(&sub->count)); } } diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 6cba9d4..d844ae4 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -217,15 +217,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter) /* * Raw event type provide the config in the event structure */ - if (hw_event->raw) { - hwc->config |= pmc_ops->raw_event(hw_event->type); + if (hw_event->raw_type) { + hwc->config |= pmc_ops->raw_event(hw_event->raw_event_id); } else { - if (hw_event->type >= pmc_ops->max_events) + if (hw_event->event_id >= pmc_ops->max_events) return -EINVAL; /* * The generic map: */ - hwc->config |= pmc_ops->event_map(hw_event->type); + hwc->config |= pmc_ops->event_map(hw_event->event_id); } counter->wakeup_pending = 0; @@ -715,7 +715,7 @@ perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown) list_for_each_entry(counter, &group_leader->sibling_list, list_entry) { x86_perf_counter_update(counter, &counter->hw, counter->hw.idx); - perf_store_irq_data(sibling, counter->hw_event.type); + perf_store_irq_data(sibling, counter->hw_event.event_config); perf_store_irq_data(sibling, atomic64_read(&counter->count)); } } diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 065984c..8f93949 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -21,56 +21,81 @@ */ /* - * Generalized performance counter event types, used by the hw_event.type - * parameter of the sys_perf_counter_open() syscall: + * hw_event.type */ -enum hw_event_types { +enum perf_event_types { + PERF_TYPE_HARDWARE = 0, + PERF_TYPE_SOFTWARE = 1, + PERF_TYPE_TRACEPOINT = 2, + /* - * Common hardware events, generalized by the kernel: + * available TYPE space, raw is the max value. */ - PERF_COUNT_CPU_CYCLES = 0, - PERF_COUNT_INSTRUCTIONS = 1, - PERF_COUNT_CACHE_REFERENCES = 2, - PERF_COUNT_CACHE_MISSES = 3, - PERF_COUNT_BRANCH_INSTRUCTIONS = 4, - PERF_COUNT_BRANCH_MISSES = 5, - PERF_COUNT_BUS_CYCLES = 6, - PERF_HW_EVENTS_MAX = 7, + PERF_TYPE_RAW = 128, +}; +/* + * Generalized performance counter event types, used by the hw_event.event_id + * parameter of the sys_perf_counter_open() syscall: + */ +enum hw_event_ids { /* - * Special "software" counters provided by the kernel, even if - * the hardware does not support performance counters. These - * counters measure various physical and sw events of the - * kernel (and allow the profiling of them as well): + * Common hardware events, generalized by the kernel: */ - PERF_COUNT_CPU_CLOCK = -1, - PERF_COUNT_TASK_CLOCK = -2, - PERF_COUNT_PAGE_FAULTS = -3, - PERF_COUNT_CONTEXT_SWITCHES = -4, - PERF_COUNT_CPU_MIGRATIONS = -5, - PERF_COUNT_PAGE_FAULTS_MIN = -6, - PERF_COUNT_PAGE_FAULTS_MAJ = -7, - - PERF_SW_EVENTS_MIN = -8, + PERF_COUNT_CPU_CYCLES = 0, + PERF_COUNT_INSTRUCTIONS = 1, + PERF_COUNT_CACHE_REFERENCES = 2, + PERF_COUNT_CACHE_MISSES = 3, + PERF_COUNT_BRANCH_INSTRUCTIONS = 4, + PERF_COUNT_BRANCH_MISSES = 5, + PERF_COUNT_BUS_CYCLES = 6, + + PERF_HW_EVENTS_MAX = 7, +}; - PERF_TP_EVENTS_MIN = -65536 +/* + * Special "software" counters provided by the kernel, even if the hardware + * does not support performance counters. These counters measure various + * physical and sw events of the kernel (and allow the profiling of them as + * well): + */ +enum sw_event_ids { + PERF_COUNT_CPU_CLOCK = 0, + PERF_COUNT_TASK_CLOCK = 1, + PERF_COUNT_PAGE_FAULTS = 2, + PERF_COUNT_CONTEXT_SWITCHES = 3, + PERF_COUNT_CPU_MIGRATIONS = 4, + PERF_COUNT_PAGE_FAULTS_MIN = 5, + PERF_COUNT_PAGE_FAULTS_MAJ = 6, + + PERF_SW_EVENTS_MAX = 7, }; /* * IRQ-notification data record type: */ enum perf_counter_record_type { - PERF_RECORD_SIMPLE = 0, - PERF_RECORD_IRQ = 1, - PERF_RECORD_GROUP = 2, + PERF_RECORD_SIMPLE = 0, + PERF_RECORD_IRQ = 1, + PERF_RECORD_GROUP = 2, }; /* * Hardware event to monitor via a performance monitoring counter: */ struct perf_counter_hw_event { - __s64 type; + union { + struct { + __u64 event_id : 56, + type : 8; + }; + struct { + __u64 raw_event_id : 63, + raw_type : 1; + }; + __u64 event_config; + }; __u64 irq_period; __u64 record_type; @@ -78,7 +103,6 @@ struct perf_counter_hw_event { __u64 disabled : 1, /* off by default */ nmi : 1, /* NMI sampling */ - raw : 1, /* raw event type */ inherit : 1, /* children inherit it */ pinned : 1, /* must always be on PMU */ exclusive : 1, /* only group on PMU */ @@ -87,7 +111,7 @@ struct perf_counter_hw_event { exclude_hv : 1, /* ditto hypervisor */ exclude_idle : 1, /* don't count when idle */ - __reserved_1 : 54; + __reserved_1 : 55; __u32 extra_config_len; __u32 __reserved_4; @@ -298,10 +322,11 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader, */ static inline int is_software_counter(struct perf_counter *counter) { - return !counter->hw_event.raw && counter->hw_event.type < 0; + return !counter->hw_event.raw_type && + counter->hw_event.type != PERF_TYPE_HARDWARE; } -extern void perf_swcounter_event(enum hw_event_types, u64, int, struct pt_regs *); +extern void perf_swcounter_event(u32, u64, int, struct pt_regs *); #else static inline void @@ -320,7 +345,7 @@ static inline u64 hw_perf_save_disable(void) { return 0; } static inline int perf_counter_task_disable(void) { return -EINVAL; } static inline int perf_counter_task_enable(void) { return -EINVAL; } -static inline void perf_swcounter_event(enum hw_event_types event, u64 nr, +static inline void perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs) { } #endif diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 0bbe3e4..68a56a6 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1395,12 +1395,6 @@ static void perf_swcounter_set_period(struct perf_counter *counter) atomic64_set(&hwc->count, -left); } -static void perf_swcounter_save_and_restart(struct perf_counter *counter) -{ - perf_swcounter_update(counter); - perf_swcounter_set_period(counter); -} - static void perf_swcounter_store_irq(struct perf_counter *counter, u64 data) { struct perf_data *irqdata = counter->irqdata; @@ -1421,7 +1415,7 @@ static void perf_swcounter_handle_group(struct perf_counter *sibling) list_for_each_entry(counter, &group_leader->sibling_list, list_entry) { counter->hw_ops->read(counter); - perf_swcounter_store_irq(sibling, counter->hw_event.type); + perf_swcounter_store_irq(sibling, counter->hw_event.event_config); perf_swcounter_store_irq(sibling, atomic64_read(&counter->count)); } } @@ -1477,21 +1471,25 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) static void perf_swcounter_overflow(struct perf_counter *counter, int nmi, struct pt_regs *regs) { - perf_swcounter_save_and_restart(counter); + perf_swcounter_update(counter); + perf_swcounter_set_period(counter); perf_swcounter_interrupt(counter, nmi, regs); } static int perf_swcounter_match(struct perf_counter *counter, - enum hw_event_types event, - struct pt_regs *regs) + enum perf_event_types type, + u32 event, struct pt_regs *regs) { if (counter->state != PERF_COUNTER_STATE_ACTIVE) return 0; - if (counter->hw_event.raw) + if (counter->hw_event.raw_type) + return 0; + + if (counter->hw_event.type != type) return 0; - if (counter->hw_event.type != event) + if (counter->hw_event.event_id != event) return 0; if (counter->hw_event.exclude_user && user_mode(regs)) @@ -1512,8 +1510,8 @@ static void perf_swcounter_add(struct perf_counter *counter, u64 nr, } static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, - enum hw_event_types event, u64 nr, - int nmi, struct pt_regs *regs) + enum perf_event_types type, u32 event, + u64 nr, int nmi, struct pt_regs *regs) { struct perf_counter *counter; @@ -1522,24 +1520,31 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, rcu_read_lock(); list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { - if (perf_swcounter_match(counter, event, regs)) + if (perf_swcounter_match(counter, type, event, regs)) perf_swcounter_add(counter, nr, nmi, regs); } rcu_read_unlock(); } -void perf_swcounter_event(enum hw_event_types event, u64 nr, - int nmi, struct pt_regs *regs) +static void __perf_swcounter_event(enum perf_event_types type, u32 event, + u64 nr, int nmi, struct pt_regs *regs) { struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); - perf_swcounter_ctx_event(&cpuctx->ctx, event, nr, nmi, regs); - if (cpuctx->task_ctx) - perf_swcounter_ctx_event(cpuctx->task_ctx, event, nr, nmi, regs); + perf_swcounter_ctx_event(&cpuctx->ctx, type, event, nr, nmi, regs); + if (cpuctx->task_ctx) { + perf_swcounter_ctx_event(cpuctx->task_ctx, type, event, + nr, nmi, regs); + } put_cpu_var(perf_cpu_context); } +void perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs) +{ + __perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, regs); +} + static void perf_swcounter_read(struct perf_counter *counter) { perf_swcounter_update(counter); @@ -1733,8 +1738,12 @@ static const struct hw_perf_counter_ops perf_ops_cpu_migrations = { #ifdef CONFIG_EVENT_PROFILE void perf_tpcounter_event(int event_id) { - perf_swcounter_event(PERF_TP_EVENTS_MIN + event_id, 1, 1, - task_pt_regs(current)); + struct pt_regs *regs = get_irq_regs(); + + if (!regs) + regs = task_pt_regs(current); + + __perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, regs); } extern int ftrace_profile_enable(int); @@ -1742,15 +1751,13 @@ extern void ftrace_profile_disable(int); static void tp_perf_counter_destroy(struct perf_counter *counter) { - int event_id = counter->hw_event.type - PERF_TP_EVENTS_MIN; - - ftrace_profile_disable(event_id); + ftrace_profile_disable(counter->hw_event.event_id); } static const struct hw_perf_counter_ops * tp_perf_counter_init(struct perf_counter *counter) { - int event_id = counter->hw_event.type - PERF_TP_EVENTS_MIN; + int event_id = counter->hw_event.event_id; int ret; ret = ftrace_profile_enable(event_id); @@ -1758,6 +1765,7 @@ tp_perf_counter_init(struct perf_counter *counter) return NULL; counter->destroy = tp_perf_counter_destroy; + counter->hw.irq_period = counter->hw_event.irq_period; return &perf_ops_generic; } @@ -1783,7 +1791,7 @@ sw_perf_counter_init(struct perf_counter *counter) * to be kernel events, and page faults are never hypervisor * events. */ - switch (counter->hw_event.type) { + switch (counter->hw_event.event_id) { case PERF_COUNT_CPU_CLOCK: hw_ops = &perf_ops_cpu_clock; @@ -1813,9 +1821,6 @@ sw_perf_counter_init(struct perf_counter *counter) if (!counter->hw_event.exclude_kernel) hw_ops = &perf_ops_cpu_migrations; break; - default: - hw_ops = tp_perf_counter_init(counter); - break; } if (hw_ops) @@ -1870,10 +1875,22 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, counter->state = PERF_COUNTER_STATE_OFF; hw_ops = NULL; - if (!hw_event->raw && hw_event->type < 0) - hw_ops = sw_perf_counter_init(counter); - else + + if (hw_event->raw_type) + hw_ops = hw_perf_counter_init(counter); + else switch (hw_event->type) { + case PERF_TYPE_HARDWARE: hw_ops = hw_perf_counter_init(counter); + break; + + case PERF_TYPE_SOFTWARE: + hw_ops = sw_perf_counter_init(counter); + break; + + case PERF_TYPE_TRACEPOINT: + hw_ops = tp_perf_counter_init(counter); + break; + } if (!hw_ops) { kfree(counter); -- cgit v0.10.2 From 0322cd6ec504b0bf08ca7b2c3d7f43bda37d79c9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Mar 2009 20:26:19 +0100 Subject: perf_counter: unify irq output code Impact: cleanup Having 3 slightly different copies of the same code around does nobody any good. First step in revamping the output format. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Steven Rostedt Orig-LKML-Reference: <20090319194233.929962222@chello.nl> Signed-off-by: Ingo Molnar diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 26f69dc..88b72eb 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -663,41 +663,6 @@ void perf_counter_do_pending(void) } /* - * Record data for an irq counter. - * This function was lifted from the x86 code; maybe it should - * go in the core? - */ -static void perf_store_irq_data(struct perf_counter *counter, u64 data) -{ - struct perf_data *irqdata = counter->irqdata; - - if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) { - irqdata->overrun++; - } else { - u64 *p = (u64 *) &irqdata->data[irqdata->len]; - - *p = data; - irqdata->len += sizeof(u64); - } -} - -/* - * Record all the values of the counters in a group - */ -static void perf_handle_group(struct perf_counter *counter) -{ - struct perf_counter *leader, *sub; - - leader = counter->group_leader; - list_for_each_entry(sub, &leader->sibling_list, list_entry) { - if (sub != counter) - sub->hw_ops->read(sub); - perf_store_irq_data(counter, sub->hw_event.event_config); - perf_store_irq_data(counter, atomic64_read(&sub->count)); - } -} - -/* * A counter has overflowed; update its count and record * things if requested. Note that interrupts are hard-disabled * here so there is no possibility of being interrupted. @@ -736,20 +701,8 @@ static void record_and_restart(struct perf_counter *counter, long val, /* * Finally record data if requested. */ - if (record) { - switch (counter->hw_event.record_type) { - case PERF_RECORD_SIMPLE: - break; - case PERF_RECORD_IRQ: - perf_store_irq_data(counter, instruction_pointer(regs)); - counter->wakeup_pending = 1; - break; - case PERF_RECORD_GROUP: - perf_handle_group(counter); - counter->wakeup_pending = 1; - break; - } - } + if (record) + perf_counter_output(counter, 1, regs); } /* diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index d844ae4..902282d 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -674,20 +674,6 @@ static void pmc_generic_disable(struct perf_counter *counter) x86_perf_counter_update(counter, hwc, idx); } -static void perf_store_irq_data(struct perf_counter *counter, u64 data) -{ - struct perf_data *irqdata = counter->irqdata; - - if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) { - irqdata->overrun++; - } else { - u64 *p = (u64 *) &irqdata->data[irqdata->len]; - - *p = data; - irqdata->len += sizeof(u64); - } -} - /* * Save and restart an expired counter. Called by NMI contexts, * so it has to be careful about preempting normal counter ops: @@ -704,22 +690,6 @@ static void perf_save_and_restart(struct perf_counter *counter) __pmc_generic_enable(counter, hwc, idx); } -static void -perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown) -{ - struct perf_counter *counter, *group_leader = sibling->group_leader; - - /* - * Store sibling timestamps (if any): - */ - list_for_each_entry(counter, &group_leader->sibling_list, list_entry) { - - x86_perf_counter_update(counter, &counter->hw, counter->hw.idx); - perf_store_irq_data(sibling, counter->hw_event.event_config); - perf_store_irq_data(sibling, atomic64_read(&counter->count)); - } -} - /* * Maximum interrupt frequency of 100KHz per CPU */ @@ -754,28 +724,7 @@ again: continue; perf_save_and_restart(counter); - - switch (counter->hw_event.record_type) { - case PERF_RECORD_SIMPLE: - continue; - case PERF_RECORD_IRQ: - perf_store_irq_data(counter, instruction_pointer(regs)); - break; - case PERF_RECORD_GROUP: - perf_handle_group(counter, &status, &ack); - break; - } - /* - * From NMI context we cannot call into the scheduler to - * do a task wakeup - but we mark these generic as - * wakeup_pending and initate a wakeup callback: - */ - if (nmi) { - counter->wakeup_pending = 1; - set_tsk_thread_flag(current, TIF_PERF_COUNTERS); - } else { - wake_up(&counter->waitq); - } + perf_counter_output(counter, nmi, regs); } hw_perf_ack_status(ack); diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 8f93949..a4b76c0 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -317,6 +317,8 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader, struct perf_cpu_context *cpuctx, struct perf_counter_context *ctx, int cpu); +extern void perf_counter_output(struct perf_counter *counter, + int nmi, struct pt_regs *regs); /* * Return 1 for a software counter, 0 for a hardware counter */ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 68a56a6..f054b8c 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1354,6 +1354,60 @@ static const struct file_operations perf_fops = { }; /* + * Output + */ + +static void perf_counter_store_irq(struct perf_counter *counter, u64 data) +{ + struct perf_data *irqdata = counter->irqdata; + + if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) { + irqdata->overrun++; + } else { + u64 *p = (u64 *) &irqdata->data[irqdata->len]; + + *p = data; + irqdata->len += sizeof(u64); + } +} + +static void perf_counter_handle_group(struct perf_counter *counter) +{ + struct perf_counter *leader, *sub; + + leader = counter->group_leader; + list_for_each_entry(sub, &leader->sibling_list, list_entry) { + if (sub != counter) + sub->hw_ops->read(sub); + perf_counter_store_irq(counter, sub->hw_event.event_config); + perf_counter_store_irq(counter, atomic64_read(&sub->count)); + } +} + +void perf_counter_output(struct perf_counter *counter, + int nmi, struct pt_regs *regs) +{ + switch (counter->hw_event.record_type) { + case PERF_RECORD_SIMPLE: + return; + + case PERF_RECORD_IRQ: + perf_counter_store_irq(counter, instruction_pointer(regs)); + break; + + case PERF_RECORD_GROUP: + perf_counter_handle_group(counter); + break; + } + + if (nmi) { + counter->wakeup_pending = 1; + set_perf_counter_pending(); + } else + wake_up(&counter->waitq); +} + +/* * Generic software counter infrastructure */ @@ -1395,54 +1449,6 @@ static void perf_swcounter_set_period(struct perf_counter *counter) atomic64_set(&hwc->count, -left); } -static void perf_swcounter_store_irq(struct perf_counter *counter, u64 data) -{ - struct perf_data *irqdata = counter->irqdata; - - if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) { - irqdata->overrun++; - } else { - u64 *p = (u64 *) &irqdata->data[irqdata->len]; - - *p = data; - irqdata->len += sizeof(u64); - } -} - -static void perf_swcounter_handle_group(struct perf_counter *sibling) -{ - struct perf_counter *counter, *group_leader = sibling->group_leader; - - list_for_each_entry(counter, &group_leader->sibling_list, list_entry) { - counter->hw_ops->read(counter); - perf_swcounter_store_irq(sibling, counter->hw_event.event_config); - perf_swcounter_store_irq(sibling, atomic64_read(&counter->count)); - } -} - -static void perf_swcounter_interrupt(struct perf_counter *counter, - int nmi, struct pt_regs *regs) -{ - switch (counter->hw_event.record_type) { - case PERF_RECORD_SIMPLE: - break; - - case PERF_RECORD_IRQ: - perf_swcounter_store_irq(counter, instruction_pointer(regs)); - break; - - case PERF_RECORD_GROUP: - perf_swcounter_handle_group(counter); - break; - } - - if (nmi) { - counter->wakeup_pending = 1; - set_perf_counter_pending(); - } else - wake_up(&counter->waitq); -} - static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) { struct perf_counter *counter; @@ -1461,7 +1467,7 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) regs = task_pt_regs(current); if (regs) - perf_swcounter_interrupt(counter, 0, regs); + perf_counter_output(counter, 0, regs); hrtimer_forward_now(hrtimer, ns_to_ktime(counter->hw.irq_period)); @@ -1473,7 +1479,7 @@ static void perf_swcounter_overflow(struct perf_counter *counter, { perf_swcounter_update(counter); perf_swcounter_set_period(counter); - perf_swcounter_interrupt(counter, nmi, regs); + perf_counter_output(counter, nmi, regs); } static int perf_swcounter_match(struct perf_counter *counter, -- cgit v0.10.2 From db4fb5acf20295063d1d5105e67724eb51440207 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 19 Mar 2009 20:26:20 +0100 Subject: perf_counter: powerpc: clean up perc_counter_interrupt Impact: cleanup This updates the powerpc perf_counter_interrupt following on from the "perf_counter: unify irq output code" patch. Since we now use the generic perf_counter_output code, which sets the perf_counter_pending flag directly, we no longer need the need_wakeup variable. This removes need_wakeup and makes perf_counter_interrupt use get_perf_counter_pending() instead. Signed-off-by: Paul Mackerras Signed-off-by: Peter Zijlstra Cc: Steven Rostedt Orig-LKML-Reference: <20090319194234.024464535@chello.nl> Signed-off-by: Ingo Molnar diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 88b72eb..830ca9c 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -723,8 +723,6 @@ static void perf_counter_interrupt(struct pt_regs *regs) /* counter has overflowed */ found = 1; record_and_restart(counter, val, regs); - if (counter->wakeup_pending) - need_wakeup = 1; } } @@ -754,17 +752,14 @@ static void perf_counter_interrupt(struct pt_regs *regs) /* * If we need a wakeup, check whether interrupts were soft-enabled * when we took the interrupt. If they were, we can wake stuff up - * immediately; otherwise we'll have to set a flag and do the - * wakeup when interrupts get soft-enabled. + * immediately; otherwise we'll have do the wakeup when interrupts + * get soft-enabled. */ - if (need_wakeup) { - if (regs->softe) { - irq_enter(); - perf_counter_do_pending(); - irq_exit(); - } else { - set_perf_counter_pending(); - } + if (get_perf_counter_pending() && regs->softe) { + irq_enter(); + clear_perf_counter_pending(); + perf_counter_do_pending(); + irq_exit(); } } -- cgit v0.10.2 From 9aaa131a279834dff75c290c91f0058f62d72d46 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sat, 21 Mar 2009 15:31:47 +1100 Subject: perf_counter: fix type/event_id layout on big-endian systems Impact: build fix for powerpc Commit db3a944aca35ae61 ("perf_counter: revamp syscall input ABI") expanded the hw_event.type field into a union of structs containing bitfields. In particular it introduced a type field and a raw_type field, with the intention that the 1-bit raw_type field should overlay the most-significant bit of the 8-bit type field, and in fact perf_counter_alloc() now assumes that (or at least, assumes that raw_type doesn't overlay any of the bits that are 1 in the values of PERF_TYPE_{HARDWARE,SOFTWARE,TRACEPOINT}). Unfortunately this is not true on big-endian systems such as PowerPC, where bitfields are laid out from left to right, i.e. from most significant bit to least significant. This means that setting hw_event.type = PERF_TYPE_SOFTWARE will set hw_event.raw_type to 1. This fixes it by making the layout depend on whether or not __BIG_ENDIAN_BITFIELD is defined. It's a bit ugly, but that's what we get for using bitfields in a user/kernel ABI. Also, that commit didn't fix up some places in arch/powerpc/kernel/ perf_counter.c where hw_event.raw and hw_event.event_id were used. This fixes them too. Signed-off-by: Paul Mackerras diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 830ca9c..6413d9c 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -602,12 +602,13 @@ hw_perf_counter_init(struct perf_counter *counter) return NULL; if ((s64)counter->hw_event.irq_period < 0) return NULL; - ev = counter->hw_event.event_id; - if (!counter->hw_event.raw) { - if (ev >= ppmu->n_generic || - ppmu->generic_events[ev] == 0) + if (!counter->hw_event.raw_type) { + ev = counter->hw_event.event_id; + if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) return NULL; ev = ppmu->generic_events[ev]; + } else { + ev = counter->hw_event.raw_event_id; } counter->hw.config_base = ev; counter->hw.idx = 0; diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index a4b76c0..98f5990 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -15,6 +15,7 @@ #include #include +#include /* * User-space ABI bits: @@ -86,6 +87,7 @@ enum perf_counter_record_type { */ struct perf_counter_hw_event { union { +#ifndef __BIG_ENDIAN_BITFIELD struct { __u64 event_id : 56, type : 8; @@ -94,6 +96,16 @@ struct perf_counter_hw_event { __u64 raw_event_id : 63, raw_type : 1; }; +#else + struct { + __u64 type : 8, + event_id : 56; + }; + struct { + __u64 raw_type : 1, + raw_event_id : 63; + }; +#endif /* __BIT_ENDIAN_BITFIELD */ __u64 event_config; }; -- cgit v0.10.2 From 6f9f791eb53b56097cd311a1a5517a8e89bdaf35 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 23 Mar 2009 21:26:52 +0100 Subject: perf_counter: create Documentation/perf_counter/ and move perfcounters.txt there We'll have more files in that directory, prepare for that. Acked-by: Peter Zijlstra Cc: Paul Mackerras Signed-off-by: Ingo Molnar diff --git a/Documentation/perf-counters.txt b/Documentation/perf-counters.txt deleted file mode 100644 index fddd321..0000000 --- a/Documentation/perf-counters.txt +++ /dev/null @@ -1,147 +0,0 @@ - -Performance Counters for Linux ------------------------------- - -Performance counters are special hardware registers available on most modern -CPUs. These registers count the number of certain types of hw events: such -as instructions executed, cachemisses suffered, or branches mis-predicted - -without slowing down the kernel or applications. These registers can also -trigger interrupts when a threshold number of events have passed - and can -thus be used to profile the code that runs on that CPU. - -The Linux Performance Counter subsystem provides an abstraction of these -hardware capabilities. It provides per task and per CPU counters, counter -groups, and it provides event capabilities on top of those. - -Performance counters are accessed via special file descriptors. -There's one file descriptor per virtual counter used. - -The special file descriptor is opened via the perf_counter_open() -system call: - - int sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr, - pid_t pid, int cpu, int group_fd); - -The syscall returns the new fd. The fd can be used via the normal -VFS system calls: read() can be used to read the counter, fcntl() -can be used to set the blocking mode, etc. - -Multiple counters can be kept open at a time, and the counters -can be poll()ed. - -When creating a new counter fd, 'perf_counter_hw_event' is: - -/* - * Hardware event to monitor via a performance monitoring counter: - */ -struct perf_counter_hw_event { - s64 type; - - u64 irq_period; - u32 record_type; - - u32 disabled : 1, /* off by default */ - nmi : 1, /* NMI sampling */ - raw : 1, /* raw event type */ - __reserved_1 : 29; - - u64 __reserved_2; -}; - -/* - * Generalized performance counter event types, used by the hw_event.type - * parameter of the sys_perf_counter_open() syscall: - */ -enum hw_event_types { - /* - * Common hardware events, generalized by the kernel: - */ - PERF_COUNT_CYCLES = 0, - PERF_COUNT_INSTRUCTIONS = 1, - PERF_COUNT_CACHE_REFERENCES = 2, - PERF_COUNT_CACHE_MISSES = 3, - PERF_COUNT_BRANCH_INSTRUCTIONS = 4, - PERF_COUNT_BRANCH_MISSES = 5, - - /* - * Special "software" counters provided by the kernel, even if - * the hardware does not support performance counters. These - * counters measure various physical and sw events of the - * kernel (and allow the profiling of them as well): - */ - PERF_COUNT_CPU_CLOCK = -1, - PERF_COUNT_TASK_CLOCK = -2, - /* - * Future software events: - */ - /* PERF_COUNT_PAGE_FAULTS = -3, - PERF_COUNT_CONTEXT_SWITCHES = -4, */ -}; - -These are standardized types of events that work uniformly on all CPUs -that implements Performance Counters support under Linux. If a CPU is -not able to count branch-misses, then the system call will return --EINVAL. - -More hw_event_types are supported as well, but they are CPU -specific and are enumerated via /sys on a per CPU basis. Raw hw event -types can be passed in under hw_event.type if hw_event.raw is 1. -For example, to count "External bus cycles while bus lock signal asserted" -events on Intel Core CPUs, pass in a 0x4064 event type value and set -hw_event.raw to 1. - -'record_type' is the type of data that a read() will provide for the -counter, and it can be one of: - -/* - * IRQ-notification data record type: - */ -enum perf_counter_record_type { - PERF_RECORD_SIMPLE = 0, - PERF_RECORD_IRQ = 1, - PERF_RECORD_GROUP = 2, -}; - -a "simple" counter is one that counts hardware events and allows -them to be read out into a u64 count value. (read() returns 8 on -a successful read of a simple counter.) - -An "irq" counter is one that will also provide an IRQ context information: -the IP of the interrupted context. In this case read() will return -the 8-byte counter value, plus the Instruction Pointer address of the -interrupted context. - -The parameter 'hw_event_period' is the number of events before waking up -a read() that is blocked on a counter fd. Zero value means a non-blocking -counter. - -The 'pid' parameter allows the counter to be specific to a task: - - pid == 0: if the pid parameter is zero, the counter is attached to the - current task. - - pid > 0: the counter is attached to a specific task (if the current task - has sufficient privilege to do so) - - pid < 0: all tasks are counted (per cpu counters) - -The 'cpu' parameter allows a counter to be made specific to a full -CPU: - - cpu >= 0: the counter is restricted to a specific CPU - cpu == -1: the counter counts on all CPUs - -(Note: the combination of 'pid == -1' and 'cpu == -1' is not valid.) - -A 'pid > 0' and 'cpu == -1' counter is a per task counter that counts -events of that task and 'follows' that task to whatever CPU the task -gets schedule to. Per task counters can be created by any user, for -their own tasks. - -A 'pid == -1' and 'cpu == x' counter is a per CPU counter that counts -all events on CPU-x. Per CPU counters need CAP_SYS_ADMIN privilege. - -Group counters are created by passing in a group_fd of another counter. -Groups are scheduled at once and can be used with PERF_RECORD_GROUP -to record multi-dimensional timestamps. - diff --git a/Documentation/perf_counter/design.txt b/Documentation/perf_counter/design.txt new file mode 100644 index 0000000..fddd321 --- /dev/null +++ b/Documentation/perf_counter/design.txt @@ -0,0 +1,147 @@ + +Performance Counters for Linux +------------------------------ + +Performance counters are special hardware registers available on most modern +CPUs. These registers count the number of certain types of hw events: such +as instructions executed, cachemisses suffered, or branches mis-predicted - +without slowing down the kernel or applications. These registers can also +trigger interrupts when a threshold number of events have passed - and can +thus be used to profile the code that runs on that CPU. + +The Linux Performance Counter subsystem provides an abstraction of these +hardware capabilities. It provides per task and per CPU counters, counter +groups, and it provides event capabilities on top of those. + +Performance counters are accessed via special file descriptors. +There's one file descriptor per virtual counter used. + +The special file descriptor is opened via the perf_counter_open() +system call: + + int sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr, + pid_t pid, int cpu, int group_fd); + +The syscall returns the new fd. The fd can be used via the normal +VFS system calls: read() can be used to read the counter, fcntl() +can be used to set the blocking mode, etc. + +Multiple counters can be kept open at a time, and the counters +can be poll()ed. + +When creating a new counter fd, 'perf_counter_hw_event' is: + +/* + * Hardware event to monitor via a performance monitoring counter: + */ +struct perf_counter_hw_event { + s64 type; + + u64 irq_period; + u32 record_type; + + u32 disabled : 1, /* off by default */ + nmi : 1, /* NMI sampling */ + raw : 1, /* raw event type */ + __reserved_1 : 29; + + u64 __reserved_2; +}; + +/* + * Generalized performance counter event types, used by the hw_event.type + * parameter of the sys_perf_counter_open() syscall: + */ +enum hw_event_types { + /* + * Common hardware events, generalized by the kernel: + */ + PERF_COUNT_CYCLES = 0, + PERF_COUNT_INSTRUCTIONS = 1, + PERF_COUNT_CACHE_REFERENCES = 2, + PERF_COUNT_CACHE_MISSES = 3, + PERF_COUNT_BRANCH_INSTRUCTIONS = 4, + PERF_COUNT_BRANCH_MISSES = 5, + + /* + * Special "software" counters provided by the kernel, even if + * the hardware does not support performance counters. These + * counters measure various physical and sw events of the + * kernel (and allow the profiling of them as well): + */ + PERF_COUNT_CPU_CLOCK = -1, + PERF_COUNT_TASK_CLOCK = -2, + /* + * Future software events: + */ + /* PERF_COUNT_PAGE_FAULTS = -3, + PERF_COUNT_CONTEXT_SWITCHES = -4, */ +}; + +These are standardized types of events that work uniformly on all CPUs +that implements Performance Counters support under Linux. If a CPU is +not able to count branch-misses, then the system call will return +-EINVAL. + +More hw_event_types are supported as well, but they are CPU +specific and are enumerated via /sys on a per CPU basis. Raw hw event +types can be passed in under hw_event.type if hw_event.raw is 1. +For example, to count "External bus cycles while bus lock signal asserted" +events on Intel Core CPUs, pass in a 0x4064 event type value and set +hw_event.raw to 1. + +'record_type' is the type of data that a read() will provide for the +counter, and it can be one of: + +/* + * IRQ-notification data record type: + */ +enum perf_counter_record_type { + PERF_RECORD_SIMPLE = 0, + PERF_RECORD_IRQ = 1, + PERF_RECORD_GROUP = 2, +}; + +a "simple" counter is one that counts hardware events and allows +them to be read out into a u64 count value. (read() returns 8 on +a successful read of a simple counter.) + +An "irq" counter is one that will also provide an IRQ context information: +the IP of the interrupted context. In this case read() will return +the 8-byte counter value, plus the Instruction Pointer address of the +interrupted context. + +The parameter 'hw_event_period' is the number of events before waking up +a read() that is blocked on a counter fd. Zero value means a non-blocking +counter. + +The 'pid' parameter allows the counter to be specific to a task: + + pid == 0: if the pid parameter is zero, the counter is attached to the + current task. + + pid > 0: the counter is attached to a specific task (if the current task + has sufficient privilege to do so) + + pid < 0: all tasks are counted (per cpu counters) + +The 'cpu' parameter allows a counter to be made specific to a full +CPU: + + cpu >= 0: the counter is restricted to a specific CPU + cpu == -1: the counter counts on all CPUs + +(Note: the combination of 'pid == -1' and 'cpu == -1' is not valid.) + +A 'pid > 0' and 'cpu == -1' counter is a per task counter that counts +events of that task and 'follows' that task to whatever CPU the task +gets schedule to. Per task counters can be created by any user, for +their own tasks. + +A 'pid == -1' and 'cpu == x' counter is a per CPU counter that counts +all events on CPU-x. Per CPU counters need CAP_SYS_ADMIN privilege. + +Group counters are created by passing in a group_fd of another counter. +Groups are scheduled at once and can be used with PERF_RECORD_GROUP +to record multi-dimensional timestamps. + -- cgit v0.10.2 From e0143bad9dbf2a8fad4c5430562bceba196b66ea Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 23 Mar 2009 21:29:59 +0100 Subject: perf_counter: add sample user-space to Documentation/perf_counter/ Initial version of kerneltop.c and perfstat.c. Acked-by: Peter Zijlstra Cc: Paul Mackerras Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile new file mode 100644 index 0000000..b457497 --- /dev/null +++ b/Documentation/perf_counter/Makefile @@ -0,0 +1,12 @@ +BINS = kerneltop perfstat + +all: $(BINS) + +kerneltop: kerneltop.c perfcounters.h + cc -O6 -Wall -lrt `pkg-config --cflags --libs glib-2.0` -o $@ $< + +perfstat: kerneltop + ln -sf kerneltop perfstat + +clean: + rm $(BINS) diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c new file mode 100644 index 0000000..cf0e30b --- /dev/null +++ b/Documentation/perf_counter/kerneltop.c @@ -0,0 +1,956 @@ +/* + * kerneltop.c: show top kernel functions - performance counters showcase + + Build with: + + cc -O6 -Wall `pkg-config --cflags --libs glib-2.0` -o kerneltop kerneltop.c + + Sample output: + +------------------------------------------------------------------------------ + KernelTop: 2669 irqs/sec [NMI, cache-misses/cache-refs], (all, cpu: 2) +------------------------------------------------------------------------------ + + weight RIP kernel function + ______ ________________ _______________ + + 35.20 - ffffffff804ce74b : skb_copy_and_csum_dev + 33.00 - ffffffff804cb740 : sock_alloc_send_skb + 31.26 - ffffffff804ce808 : skb_push + 22.43 - ffffffff80510004 : tcp_established_options + 19.00 - ffffffff8027d250 : find_get_page + 15.76 - ffffffff804e4fc9 : eth_type_trans + 15.20 - ffffffff804d8baa : dst_release + 14.86 - ffffffff804cf5d8 : skb_release_head_state + 14.00 - ffffffff802217d5 : read_hpet + 12.00 - ffffffff804ffb7f : __ip_local_out + 11.97 - ffffffff804fc0c8 : ip_local_deliver_finish + 8.54 - ffffffff805001a3 : ip_queue_xmit + + Started by Ingo Molnar + + Improvements and fixes by: + + Arjan van de Ven + Yanmin Zhang + Mike Galbraith + + Released under the GPL v2. (and only v2, not any later version) + + */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +#include + +#ifdef __x86_64__ +# define __NR_perf_counter_open 295 +#endif + +#ifdef __i386__ +# define __NR_perf_counter_open 333 +#endif + +/* + * Pick up some kernel type conventions: + */ +#define __user +#define asmlinkage + +typedef unsigned int __u32; +typedef unsigned long long __u64; +typedef long long __s64; + +/* + * User-space ABI bits: + */ + +/* + * Generalized performance counter event types, used by the hw_event.type + * parameter of the sys_perf_counter_open() syscall: + */ +enum hw_event_types { + /* + * Common hardware events, generalized by the kernel: + */ + PERF_COUNT_CPU_CYCLES = 0, + PERF_COUNT_INSTRUCTIONS = 1, + PERF_COUNT_CACHE_REFERENCES = 2, + PERF_COUNT_CACHE_MISSES = 3, + PERF_COUNT_BRANCH_INSTRUCTIONS = 4, + PERF_COUNT_BRANCH_MISSES = 5, + PERF_COUNT_BUS_CYCLES = 6, + + PERF_HW_EVENTS_MAX = 7, + + /* + * Special "software" counters provided by the kernel, even if + * the hardware does not support performance counters. These + * counters measure various physical and sw events of the + * kernel (and allow the profiling of them as well): + */ + PERF_COUNT_CPU_CLOCK = -1, + PERF_COUNT_TASK_CLOCK = -2, + PERF_COUNT_PAGE_FAULTS = -3, + PERF_COUNT_CONTEXT_SWITCHES = -4, + PERF_COUNT_CPU_MIGRATIONS = -5, + + PERF_SW_EVENTS_MIN = -6, +}; + +/* + * IRQ-notification data record type: + */ +enum perf_counter_record_type { + PERF_RECORD_SIMPLE = 0, + PERF_RECORD_IRQ = 1, + PERF_RECORD_GROUP = 2, +}; + +/* + * Hardware event to monitor via a performance monitoring counter: + */ +struct perf_counter_hw_event { + __s64 type; + + __u64 irq_period; + __u64 record_type; + __u64 read_format; + + __u64 disabled : 1, /* off by default */ + nmi : 1, /* NMI sampling */ + raw : 1, /* raw event type */ + inherit : 1, /* children inherit it */ + pinned : 1, /* must always be on PMU */ + exclusive : 1, /* only group on PMU */ + exclude_user : 1, /* don't count user */ + exclude_kernel : 1, /* ditto kernel */ + exclude_hv : 1, /* ditto hypervisor */ + exclude_idle : 1, /* don't count when idle */ + + __reserved_1 : 54; + + __u32 extra_config_len; + __u32 __reserved_4; + + __u64 __reserved_2; + __u64 __reserved_3; +}; + +/* + * Ioctls that can be done on a perf counter fd: + */ +#define PERF_COUNTER_IOC_ENABLE _IO('$', 0) +#define PERF_COUNTER_IOC_DISABLE _IO('$', 1) + +asmlinkage int sys_perf_counter_open( + + struct perf_counter_hw_event *hw_event_uptr __user, + pid_t pid, + int cpu, + int group_fd, + unsigned long flags) +{ + int ret; + + ret = syscall( + __NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags); +#if defined(__x86_64__) || defined(__i386__) + if (ret < 0 && ret > -4096) { + errno = -ret; + ret = -1; + } +#endif + return ret; +} + +const char *event_types [] = { + "CPU cycles", + "instructions", + "cache-refs", + "cache-misses", + "branches", + "branch-misses", + "bus cycles" +}; + +const unsigned int default_count[] = { + 1000000, + 1000000, + 10000, + 10000, + 1000000, + 10000, +}; + +/* + * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all + * counters in the current task. + */ +#define PR_TASK_PERF_COUNTERS_DISABLE 31 +#define PR_TASK_PERF_COUNTERS_ENABLE 32 + +#define MAX_COUNTERS 8 + +static int nr_counters = -1; + +static __u64 count_filter = 100; + +#define MAX_NR_CPUS 256 + +static int event_count[MAX_COUNTERS]; +static unsigned long event_id[MAX_COUNTERS]; +static int event_raw[MAX_COUNTERS]; + +static int tid = -1; +static int profile_cpu = -1; +static int nr_cpus = 0; +static int nmi = 1; +static int group = 0; + +static char *vmlinux; + +static char *sym_filter; +static unsigned long filter_start; +static unsigned long filter_end; + +static int delay_secs = 2; +static int zero; +static int dump_symtab; + +struct source_line { + uint64_t EIP; + unsigned long count; + char *line; +}; + +static GList *lines; + +static void display_help(void) +{ + printf( + "Usage: kerneltop []\n\n" + "KernelTop Options (up to %d event types can be specified at once):\n\n", + MAX_COUNTERS); + printf( + " -e EID --event_id=EID # event type ID [default: 0]\n" + " 0: CPU cycles\n" + " 1: instructions\n" + " 2: cache accesses\n" + " 3: cache misses\n" + " 4: branch instructions\n" + " 5: branch prediction misses\n" + " 6: bus cycles\n\n" + " rNNN: raw PMU events (eventsel+umask)\n\n" + " -c CNT --count=CNT # event period to sample\n\n" + " -C CPU --cpu=CPU # CPU (-1 for all) [default: -1]\n" + " -p PID --pid=PID # PID of sampled task (-1 for all) [default: -1]\n\n" + " -d delay --delay= # sampling/display delay [default: 2]\n" + " -f CNT --filter=CNT # min-event-count filter [default: 100]\n\n" + " -s symbol --symbol= # function to be showed annotated one-shot\n" + " -x path --vmlinux= # the vmlinux binary, required for -s use:\n" + " -z --zero # zero counts after display\n" + " -D --dump_symtab # dump symbol table to stderr on startup\n" + "\n"); + + exit(0); +} + +static void process_options(int argc, char *argv[]) +{ + int error = 0, counter; + + for (;;) { + int option_index = 0; + /** Options for getopt */ + static struct option long_options[] = { + {"count", required_argument, NULL, 'c'}, + {"cpu", required_argument, NULL, 'C'}, + {"delay", required_argument, NULL, 'd'}, + {"dump_symtab", no_argument, NULL, 'D'}, + {"event_id", required_argument, NULL, 'e'}, + {"filter", required_argument, NULL, 'f'}, + {"group", required_argument, NULL, 'g'}, + {"help", no_argument, NULL, 'h'}, + {"nmi", required_argument, NULL, 'n'}, + {"pid", required_argument, NULL, 'p'}, + {"vmlinux", required_argument, NULL, 'x'}, + {"symbol", required_argument, NULL, 's'}, + {"zero", no_argument, NULL, 'z'}, + {NULL, 0, NULL, 0 } + }; + int c = getopt_long(argc, argv, "c:C:d:De:f:g:hn:p:s:x:z", + long_options, &option_index); + if (c == -1) + break; + + switch (c) { + case 'c': + if (nr_counters == -1) + nr_counters = 0; + event_count[nr_counters] = atoi(optarg); break; + case 'C': + /* CPU and PID are mutually exclusive */ + if (tid != -1) { + printf("WARNING: CPU switch overriding PID\n"); + sleep(1); + tid = -1; + } + profile_cpu = atoi(optarg); break; + case 'd': delay_secs = atoi(optarg); break; + case 'D': dump_symtab = 1; break; + + case 'e': + nr_counters++; + if (nr_counters == MAX_COUNTERS) { + error = 1; + break; + } + if (*optarg == 'r') { + event_raw[nr_counters] = 1; + ++optarg; + } + event_id[nr_counters] = strtol(optarg, NULL, 16); + break; + + case 'f': count_filter = atoi(optarg); break; + case 'g': group = atoi(optarg); break; + case 'h': display_help(); break; + case 'n': nmi = atoi(optarg); break; + case 'p': + /* CPU and PID are mutually exclusive */ + if (profile_cpu != -1) { + printf("WARNING: PID switch overriding CPU\n"); + sleep(1); + profile_cpu = -1; + } + tid = atoi(optarg); break; + case 's': sym_filter = strdup(optarg); break; + case 'x': vmlinux = strdup(optarg); break; + case 'z': zero = 1; break; + default: error = 1; break; + } + } + if (error) + display_help(); + + nr_counters++; + if (nr_counters < 1) + nr_counters = 1; + + for (counter = 0; counter < nr_counters; counter++) { + if (event_count[counter]) + continue; + + if (event_id[counter] < PERF_HW_EVENTS_MAX) + event_count[counter] = default_count[event_id[counter]]; + else + event_count[counter] = 100000; + } +} + +static uint64_t min_ip; +static uint64_t max_ip = -1ll; + +struct sym_entry { + unsigned long long addr; + char *sym; + unsigned long count[MAX_COUNTERS]; + int skip; + GList *source; +}; + +#define MAX_SYMS 100000 + +static int sym_table_count; + +struct sym_entry *sym_filter_entry; + +static struct sym_entry sym_table[MAX_SYMS]; + +static void show_details(struct sym_entry *sym); + +/* + * Ordering weight: count-1 * count-1 * ... / count-n + */ +static double sym_weight(const struct sym_entry *sym) +{ + double weight; + int counter; + + weight = sym->count[0]; + + for (counter = 1; counter < nr_counters-1; counter++) + weight *= sym->count[counter]; + + weight /= (sym->count[counter] + 1); + + return weight; +} + +static int compare(const void *__sym1, const void *__sym2) +{ + const struct sym_entry *sym1 = __sym1, *sym2 = __sym2; + + return sym_weight(sym1) < sym_weight(sym2); +} + +static time_t last_refresh; +static long events; +static long userspace_events; +static const char CONSOLE_CLEAR[] = ""; + +static struct sym_entry tmp[MAX_SYMS]; + +static void print_sym_table(void) +{ + int i, printed; + int counter; + float events_per_sec = events/delay_secs; + float kevents_per_sec = (events-userspace_events)/delay_secs; + + memcpy(tmp, sym_table, sizeof(sym_table[0])*sym_table_count); + qsort(tmp, sym_table_count, sizeof(tmp[0]), compare); + + write(1, CONSOLE_CLEAR, strlen(CONSOLE_CLEAR)); + + printf( +"------------------------------------------------------------------------------\n"); + printf( " KernelTop:%8.0f irqs/sec kernel:%3.1f%% [%s, ", + events_per_sec, + 100.0 - (100.0*((events_per_sec-kevents_per_sec)/events_per_sec)), + nmi ? "NMI" : "IRQ"); + + if (nr_counters == 1) + printf("%d ", event_count[0]); + + for (counter = 0; counter < nr_counters; counter++) { + if (counter) + printf("/"); + + if (event_id[counter] < PERF_HW_EVENTS_MAX) + printf( "%s", event_types[event_id[counter]]); + else + printf( "raw:%04lx", event_id[counter]); + } + + printf( "], "); + + if (tid != -1) + printf(" (tid: %d", tid); + else + printf(" (all"); + + if (profile_cpu != -1) + printf(", cpu: %d)\n", profile_cpu); + else { + if (tid != -1) + printf(")\n"); + else + printf(", %d CPUs)\n", nr_cpus); + } + + printf("------------------------------------------------------------------------------\n\n"); + + if (nr_counters == 1) + printf(" events"); + else + printf(" weight events"); + + printf(" RIP kernel function\n" + " ______ ______ ________________ _______________\n\n" + ); + + printed = 0; + for (i = 0; i < sym_table_count; i++) { + int count; + + if (nr_counters == 1) { + if (printed <= 18 && + tmp[i].count[0] >= count_filter) { + printf("%19.2f - %016llx : %s\n", + sym_weight(tmp + i), tmp[i].addr, tmp[i].sym); + printed++; + } + } else { + if (printed <= 18 && + tmp[i].count[0] >= count_filter) { + printf("%8.1f %10ld - %016llx : %s\n", + sym_weight(tmp + i), + tmp[i].count[0], + tmp[i].addr, tmp[i].sym); + printed++; + } + } + /* + * Add decay to the counts: + */ + for (count = 0; count < nr_counters; count++) + sym_table[i].count[count] = zero ? 0 : sym_table[i].count[count] * 7 / 8; + } + + if (sym_filter_entry) + show_details(sym_filter_entry); + + last_refresh = time(NULL); + + { + struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; + + if (poll(&stdin_poll, 1, 0) == 1) { + printf("key pressed - exiting.\n"); + exit(0); + } + } +} + +static int read_symbol(FILE *in, struct sym_entry *s) +{ + static int filter_match = 0; + char *sym, stype; + char str[500]; + int rc, pos; + + rc = fscanf(in, "%llx %c %499s", &s->addr, &stype, str); + if (rc == EOF) + return -1; + + assert(rc == 3); + + /* skip until end of line: */ + pos = strlen(str); + do { + rc = fgetc(in); + if (rc == '\n' || rc == EOF || pos >= 499) + break; + str[pos] = rc; + pos++; + } while (1); + str[pos] = 0; + + sym = str; + + /* Filter out known duplicates and non-text symbols. */ + if (!strcmp(sym, "_text")) + return 1; + if (!min_ip && !strcmp(sym, "_stext")) + return 1; + if (!strcmp(sym, "_etext") || !strcmp(sym, "_sinittext")) + return 1; + if (stype != 'T' && stype != 't') + return 1; + if (!strncmp("init_module", sym, 11) || !strncmp("cleanup_module", sym, 14)) + return 1; + if (strstr(sym, "_text_start") || strstr(sym, "_text_end")) + return 1; + + s->sym = malloc(strlen(str)); + assert(s->sym); + + strcpy((char *)s->sym, str); + s->skip = 0; + + /* Tag events to be skipped. */ + if (!strcmp("default_idle", s->sym) || !strcmp("cpu_idle", s->sym)) + s->skip = 1; + if (!strcmp("enter_idle", s->sym) || !strcmp("exit_idle", s->sym)) + s->skip = 1; + + if (filter_match == 1) { + filter_end = s->addr; + filter_match = -1; + if (filter_end - filter_start > 10000) { + printf("hm, too large filter symbol <%s> - skipping.\n", + sym_filter); + printf("symbol filter start: %016lx\n", filter_start); + printf(" end: %016lx\n", filter_end); + filter_end = filter_start = 0; + sym_filter = NULL; + sleep(1); + } + } + if (filter_match == 0 && sym_filter && !strcmp(s->sym, sym_filter)) { + filter_match = 1; + filter_start = s->addr; + } + + return 0; +} + +int compare_addr(const void *__sym1, const void *__sym2) +{ + const struct sym_entry *sym1 = __sym1, *sym2 = __sym2; + + return sym1->addr > sym2->addr; +} + +static void sort_symbol_table(void) +{ + int i, dups; + + do { + qsort(sym_table, sym_table_count, sizeof(sym_table[0]), compare_addr); + for (i = 0, dups = 0; i < sym_table_count; i++) { + if (sym_table[i].addr == sym_table[i+1].addr) { + sym_table[i+1].addr = -1ll; + dups++; + } + } + sym_table_count -= dups; + } while(dups); +} + +static void parse_symbols(void) +{ + struct sym_entry *last; + + FILE *kallsyms = fopen("/proc/kallsyms", "r"); + + if (!kallsyms) { + printf("Could not open /proc/kallsyms - no CONFIG_KALLSYMS_ALL=y?\n"); + exit(-1); + } + + while (!feof(kallsyms)) { + if (read_symbol(kallsyms, &sym_table[sym_table_count]) == 0) { + sym_table_count++; + assert(sym_table_count <= MAX_SYMS); + } + } + + sort_symbol_table(); + min_ip = sym_table[0].addr; + max_ip = sym_table[sym_table_count-1].addr; + last = sym_table + sym_table_count++; + + last->addr = -1ll; + last->sym = ""; + + if (filter_end) { + int count; + for (count=0; count < sym_table_count; count ++) { + if (!strcmp(sym_table[count].sym, sym_filter)) { + sym_filter_entry = &sym_table[count]; + break; + } + } + } + if (dump_symtab) { + int i; + + for (i = 0; i < sym_table_count; i++) + fprintf(stderr, "%llx %s\n", + sym_table[i].addr, sym_table[i].sym); + } +} + + +static void parse_vmlinux(char *filename) +{ + FILE *file; + char command[PATH_MAX*2]; + if (!filename) + return; + + sprintf(command, "objdump --start-address=0x%016lx --stop-address=0x%016lx -dS %s", filter_start, filter_end, filename); + + file = popen(command, "r"); + if (!file) + return; + + while (!feof(file)) { + struct source_line *src; + size_t dummy = 0; + char *c; + + src = malloc(sizeof(struct source_line)); + assert(src != NULL); + memset(src, 0, sizeof(struct source_line)); + + if (getline(&src->line, &dummy, file) < 0) + break; + if (!src->line) + break; + + c = strchr(src->line, '\n'); + if (c) + *c = 0; + + lines = g_list_prepend(lines, src); + + if (strlen(src->line)>8 && src->line[8] == ':') + src->EIP = strtoull(src->line, NULL, 16); + if (strlen(src->line)>8 && src->line[16] == ':') + src->EIP = strtoull(src->line, NULL, 16); + } + pclose(file); + lines = g_list_reverse(lines); +} + +static void record_precise_ip(uint64_t ip) +{ + struct source_line *line; + GList *item; + + item = g_list_first(lines); + while (item) { + line = item->data; + if (line->EIP == ip) + line->count++; + if (line->EIP > ip) + break; + item = g_list_next(item); + } +} + +static void lookup_sym_in_vmlinux(struct sym_entry *sym) +{ + struct source_line *line; + GList *item; + char pattern[PATH_MAX]; + sprintf(pattern, "<%s>:", sym->sym); + + item = g_list_first(lines); + while (item) { + line = item->data; + if (strstr(line->line, pattern)) { + sym->source = item; + break; + } + item = g_list_next(item); + } +} + +void show_lines(GList *item_queue, int item_queue_count) +{ + int i; + struct source_line *line; + + for (i = 0; i < item_queue_count; i++) { + line = item_queue->data; + printf("%8li\t%s\n", line->count, line->line); + item_queue = g_list_next(item_queue); + } +} + +#define TRACE_COUNT 3 + +static void show_details(struct sym_entry *sym) +{ + struct source_line *line; + GList *item; + int displayed = 0; + GList *item_queue = NULL; + int item_queue_count = 0; + + if (!sym->source) + lookup_sym_in_vmlinux(sym); + if (!sym->source) + return; + + printf("Showing details for %s\n", sym->sym); + + item = sym->source; + while (item) { + line = item->data; + if (displayed && strstr(line->line, ">:")) + break; + + if (!item_queue_count) + item_queue = item; + item_queue_count ++; + + if (line->count >= count_filter) { + show_lines(item_queue, item_queue_count); + item_queue_count = 0; + item_queue = NULL; + } else if (item_queue_count > TRACE_COUNT) { + item_queue = g_list_next(item_queue); + item_queue_count --; + } + + line->count = 0; + displayed++; + if (displayed > 300) + break; + item = g_list_next(item); + } +} + +/* + * Binary search in the histogram table and record the hit: + */ +static void record_ip(uint64_t ip, int counter) +{ + int left_idx, middle_idx, right_idx, idx; + unsigned long left, middle, right; + + record_precise_ip(ip); + + left_idx = 0; + right_idx = sym_table_count-1; + assert(ip <= max_ip && ip >= min_ip); + + while (left_idx + 1 < right_idx) { + middle_idx = (left_idx + right_idx) / 2; + + left = sym_table[ left_idx].addr; + middle = sym_table[middle_idx].addr; + right = sym_table[ right_idx].addr; + + if (!(left <= middle && middle <= right)) { + printf("%016lx...\n%016lx...\n%016lx\n", left, middle, right); + printf("%d %d %d\n", left_idx, middle_idx, right_idx); + } + assert(left <= middle && middle <= right); + if (!(left <= ip && ip <= right)) { + printf(" left: %016lx\n", left); + printf(" ip: %016lx\n", ip); + printf("right: %016lx\n", right); + } + assert(left <= ip && ip <= right); + /* + * [ left .... target .... middle .... right ] + * => right := middle + */ + if (ip < middle) { + right_idx = middle_idx; + continue; + } + /* + * [ left .... middle ... target ... right ] + * => left := middle + */ + left_idx = middle_idx; + } + + idx = left_idx; + + if (!sym_table[idx].skip) + sym_table[idx].count[counter]++; + else events--; +} + +static void process_event(uint64_t ip, int counter) +{ + events++; + + if (ip < min_ip || ip > max_ip) { + userspace_events++; + return; + } + + record_ip(ip, counter); +} + +int main(int argc, char *argv[]) +{ + struct pollfd event_array[MAX_NR_CPUS][MAX_COUNTERS]; + struct perf_counter_hw_event hw_event; + int fd[MAX_NR_CPUS][MAX_COUNTERS]; + int i, counter, group_fd; + unsigned int cpu; + uint64_t ip; + ssize_t res; + int ret; + + process_options(argc, argv); + + nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); + if (tid != -1 || profile_cpu != -1) + nr_cpus = 1; + + assert(nr_cpus <= MAX_NR_CPUS); + + for (i = 0; i < nr_cpus; i++) { + group_fd = -1; + for (counter = 0; counter < nr_counters; counter++) { + + cpu = profile_cpu; + if (tid == -1 && profile_cpu == -1) + cpu = i; + + memset(&hw_event, 0, sizeof(hw_event)); + hw_event.type = event_id[counter]; + hw_event.raw = event_raw[counter]; + hw_event.irq_period = event_count[counter]; + hw_event.record_type = PERF_RECORD_IRQ; + hw_event.nmi = nmi; + + fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0); + fcntl(fd[i][counter], F_SETFL, O_NONBLOCK); + if (fd[i][counter] < 0) { + printf("kerneltop error: syscall returned with %d (%s)\n", + fd[i][counter], strerror(-fd[i][counter])); + if (fd[i][counter] == -1) + printf("Are you root?\n"); + exit(-1); + } + assert(fd[i][counter] >= 0); + + /* + * First counter acts as the group leader: + */ + if (group && group_fd == -1) + group_fd = fd[i][counter]; + + event_array[i][counter].fd = fd[i][counter]; + event_array[i][counter].events = POLLIN; + } + } + + parse_symbols(); + if (vmlinux && sym_filter_entry) + parse_vmlinux(vmlinux); + + printf("KernelTop refresh period: %d seconds\n", delay_secs); + last_refresh = time(NULL); + + while (1) { + int hits = events; + + for (i = 0; i < nr_cpus; i++) { + for (counter = 0; counter < nr_counters; counter++) { + res = read(fd[i][counter], (char *) &ip, sizeof(ip)); + if (res > 0) { + assert(res == sizeof(ip)); + + process_event(ip, counter); + } + } + } + + if (time(NULL) >= last_refresh + delay_secs) { + print_sym_table(); + events = userspace_events = 0; + } + + if (hits == events) + ret = poll(event_array[0], nr_cpus, 1000); + hits = events; + } + + return 0; +} diff --git a/Documentation/perf_counter/perfstat.c b/Documentation/perf_counter/perfstat.c new file mode 100644 index 0000000..9a5808f --- /dev/null +++ b/Documentation/perf_counter/perfstat.c @@ -0,0 +1,521 @@ +/* + * perfstat: /usr/bin/time -alike performance counter statistics utility + * + * It summarizes the counter events of all tasks (and child tasks), + * covering all CPUs that the command (or workload) executes on. + * It only counts the per-task events of the workload started, + * independent of how many other tasks run on those CPUs. + * + * Build with: cc -O2 -g -lrt -Wall -W -o perfstat perfstat.c + * + * Sample output: + * + + $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null + + Performance counter stats for 'ls': + + 163516953 instructions + 2295 cache-misses + 2855182 branch-misses + + * + * Copyright (C) 2008, Red Hat Inc, Ingo Molnar + * + * Released under the GPLv2 (not later). + * + * Percpu counter support by: Yanmin Zhang + * Symbolic event options by: Wu Fengguang + */ +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#ifdef __x86_64__ +# define __NR_perf_counter_open 295 +#endif + +#ifdef __i386__ +# define __NR_perf_counter_open 333 +#endif + +#ifdef __powerpc__ +#define __NR_perf_counter_open 319 +#endif + +/* + * Pick up some kernel type conventions: + */ +#define __user +#define asmlinkage + +typedef unsigned int __u32; +typedef unsigned long long __u64; +typedef long long __s64; + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +/* + * User-space ABI bits: + */ + +/* + * Generalized performance counter event types, used by the hw_event.type + * parameter of the sys_perf_counter_open() syscall: + */ +enum hw_event_types { + /* + * Common hardware events, generalized by the kernel: + */ + PERF_COUNT_CPU_CYCLES = 0, + PERF_COUNT_INSTRUCTIONS = 1, + PERF_COUNT_CACHE_REFERENCES = 2, + PERF_COUNT_CACHE_MISSES = 3, + PERF_COUNT_BRANCH_INSTRUCTIONS = 4, + PERF_COUNT_BRANCH_MISSES = 5, + PERF_COUNT_BUS_CYCLES = 6, + + PERF_HW_EVENTS_MAX = 7, + + /* + * Special "software" counters provided by the kernel, even if + * the hardware does not support performance counters. These + * counters measure various physical and sw events of the + * kernel (and allow the profiling of them as well): + */ + PERF_COUNT_CPU_CLOCK = -1, + PERF_COUNT_TASK_CLOCK = -2, + PERF_COUNT_PAGE_FAULTS = -3, + PERF_COUNT_CONTEXT_SWITCHES = -4, + PERF_COUNT_CPU_MIGRATIONS = -5, + + PERF_SW_EVENTS_MIN = -6, +}; + +/* + * IRQ-notification data record type: + */ +enum perf_counter_record_type { + PERF_RECORD_SIMPLE = 0, + PERF_RECORD_IRQ = 1, + PERF_RECORD_GROUP = 2, +}; + +/* + * Hardware event to monitor via a performance monitoring counter: + */ +struct perf_counter_hw_event { + __s64 type; + + __u64 irq_period; + __u64 record_type; + __u64 read_format; + + __u64 disabled : 1, /* off by default */ + nmi : 1, /* NMI sampling */ + raw : 1, /* raw event type */ + inherit : 1, /* children inherit it */ + pinned : 1, /* must always be on PMU */ + exclusive : 1, /* only group on PMU */ + exclude_user : 1, /* don't count user */ + exclude_kernel : 1, /* ditto kernel */ + exclude_hv : 1, /* ditto hypervisor */ + exclude_idle : 1, /* don't count when idle */ + + __reserved_1 : 54; + + __u32 extra_config_len; + __u32 __reserved_4; + + __u64 __reserved_2; + __u64 __reserved_3; +}; + +/* + * Ioctls that can be done on a perf counter fd: + */ +#define PERF_COUNTER_IOC_ENABLE _IO('$', 0) +#define PERF_COUNTER_IOC_DISABLE _IO('$', 1) + +asmlinkage int sys_perf_counter_open( + + struct perf_counter_hw_event *hw_event_uptr __user, + pid_t pid, + int cpu, + int group_fd, + unsigned long flags) +{ + int ret; + + ret = syscall( + __NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags); +#if defined(__x86_64__) || defined(__i386__) + if (ret < 0 && ret > -4096) { + errno = -ret; + ret = -1; + } +#endif + return ret; +} + + +static char *hw_event_names [] = { + "CPU cycles", + "instructions", + "cache references", + "cache misses", + "branches", + "branch misses", + "bus cycles", +}; + +static char *sw_event_names [] = { + "cpu clock ticks", + "task clock ticks", + "pagefaults", + "context switches", + "CPU migrations", +}; + +struct event_symbol { + int event; + char *symbol; +}; + +static struct event_symbol event_symbols [] = { + {PERF_COUNT_CPU_CYCLES, "cpu-cycles", }, + {PERF_COUNT_CPU_CYCLES, "cycles", }, + {PERF_COUNT_INSTRUCTIONS, "instructions", }, + {PERF_COUNT_CACHE_REFERENCES, "cache-references", }, + {PERF_COUNT_CACHE_MISSES, "cache-misses", }, + {PERF_COUNT_BRANCH_INSTRUCTIONS, "branch-instructions", }, + {PERF_COUNT_BRANCH_INSTRUCTIONS, "branches", }, + {PERF_COUNT_BRANCH_MISSES, "branch-misses", }, + {PERF_COUNT_BUS_CYCLES, "bus-cycles", }, + {PERF_COUNT_CPU_CLOCK, "cpu-ticks", }, + {PERF_COUNT_CPU_CLOCK, "ticks", }, + {PERF_COUNT_TASK_CLOCK, "task-ticks", }, + {PERF_COUNT_PAGE_FAULTS, "page-faults", }, + {PERF_COUNT_PAGE_FAULTS, "faults", }, + {PERF_COUNT_CONTEXT_SWITCHES, "context-switches", }, + {PERF_COUNT_CONTEXT_SWITCHES, "cs", }, + {PERF_COUNT_CPU_MIGRATIONS, "cpu-migrations", }, + {PERF_COUNT_CPU_MIGRATIONS, "migrations", }, +}; + +#define MAX_COUNTERS 64 +#define MAX_NR_CPUS 256 + +static int nr_counters = 0; +static int nr_cpus = 0; + +static int event_id[MAX_COUNTERS] = + { -2, -5, -4, -3, 0, 1, 2, 3}; + +static int event_raw[MAX_COUNTERS]; + +static int system_wide = 0; + +static void display_help(void) +{ + unsigned int i; + int e; + + printf( + "Usage: perfstat [] \n\n" + "PerfStat Options (up to %d event types can be specified):\n\n", + MAX_COUNTERS); + printf( + " -e EVENT --event=EVENT # symbolic-name abbreviations"); + + for (i = 0, e = PERF_HW_EVENTS_MAX; i < ARRAY_SIZE(event_symbols); i++) { + if (e != event_symbols[i].event) { + e = event_symbols[i].event; + printf( + "\n %2d: %-20s", e, event_symbols[i].symbol); + } else + printf(" %s", event_symbols[i].symbol); + } + + printf("\n" + " rNNN: raw event type\n\n" + " -s # system-wide collection\n\n" + " -c --command= # command+arguments to be timed.\n" + "\n"); + exit(0); +} + +static int type_valid(int type) +{ + if (type >= PERF_HW_EVENTS_MAX) + return 0; + if (type <= PERF_SW_EVENTS_MIN) + return 0; + + return 1; +} + +static char *event_name(int ctr) +{ + int type = event_id[ctr]; + static char buf[32]; + + if (event_raw[ctr]) { + sprintf(buf, "raw 0x%x", type); + return buf; + } + if (!type_valid(type)) + return "unknown"; + + if (type >= 0) + return hw_event_names[type]; + + return sw_event_names[-type-1]; +} + +/* + * Each event can have multiple symbolic names. + * Symbolic names are (almost) exactly matched. + */ +static int match_event_symbols(char *str) +{ + unsigned int i; + + if (isdigit(str[0]) || str[0] == '-') + return atoi(str); + + for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { + if (!strncmp(str, event_symbols[i].symbol, + strlen(event_symbols[i].symbol))) + return event_symbols[i].event; + } + + return PERF_HW_EVENTS_MAX; +} + +static void parse_events(char *str) +{ + int type, raw; + +again: + nr_counters++; + if (nr_counters == MAX_COUNTERS) + display_help(); + + raw = 0; + if (*str == 'r') { + raw = 1; + ++str; + type = strtol(str, NULL, 16); + } else { + type = match_event_symbols(str); + if (!type_valid(type)) + display_help(); + } + + event_id[nr_counters] = type; + event_raw[nr_counters] = raw; + + str = strstr(str, ","); + if (str) { + str++; + goto again; + } +} + +static void process_options(int argc, char *argv[]) +{ + for (;;) { + int option_index = 0; + /** Options for getopt */ + static struct option long_options[] = { + {"event", required_argument, NULL, 'e'}, + {"help", no_argument, NULL, 'h'}, + {"command", no_argument, NULL, 'c'}, + {NULL, 0, NULL, 0 } + }; + int c = getopt_long(argc, argv, "+:e:c:s", + long_options, &option_index); + if (c == -1) + break; + + switch (c) { + case 'c': + break; + case 's': + system_wide = 1; + break; + case 'e': + parse_events(optarg); + break; + default: + break; + } + } + if (optind == argc) + goto err; + + if (!nr_counters) + nr_counters = 8; + else + nr_counters++; + return; + +err: + display_help(); +} + +char fault_here[1000000]; + +#define PR_TASK_PERF_COUNTERS_DISABLE 31 +#define PR_TASK_PERF_COUNTERS_ENABLE 32 + +static int fd[MAX_NR_CPUS][MAX_COUNTERS]; + +static void create_counter(int counter) +{ + struct perf_counter_hw_event hw_event; + + memset(&hw_event, 0, sizeof(hw_event)); + hw_event.type = event_id[counter]; + hw_event.raw = event_raw[counter]; + hw_event.record_type = PERF_RECORD_SIMPLE; + hw_event.nmi = 0; + + if (system_wide) { + int cpu; + for (cpu = 0; cpu < nr_cpus; cpu ++) { + fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0); + if (fd[cpu][counter] < 0) { + printf("perfstat error: syscall returned with %d (%s)\n", + fd[cpu][counter], strerror(errno)); + exit(-1); + } + + } + } else { + hw_event.inherit = 1; + hw_event.disabled = 1; + + fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0); + if (fd[0][counter] < 0) { + printf("perfstat error: syscall returned with %d (%s)\n", + fd[0][counter], strerror(errno)); + exit(-1); + } + } +} + + +#define rdclock() \ +({ \ + struct timespec ts; \ + \ + clock_gettime(CLOCK_MONOTONIC, &ts); \ + ts.tv_sec * 1000000000ULL + ts.tv_nsec; \ +}) + +int main(int argc, char *argv[]) +{ + unsigned long long t0, t1; + int counter; + ssize_t res; + int status; + int pid; + + process_options(argc, argv); + + if (system_wide) { + nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); + assert(nr_cpus <= MAX_NR_CPUS); + assert(nr_cpus >= 0); + } else + nr_cpus = 1; + + for (counter = 0; counter < nr_counters; counter++) + create_counter(counter); + + argc -= optind; + argv += optind; + + /* + * Enable counters and exec the command: + */ + t0 = rdclock(); + prctl(PR_TASK_PERF_COUNTERS_ENABLE); + + if ((pid = fork()) < 0) + perror("failed to fork"); + if (!pid) { + if (execvp(argv[0], argv)) { + perror(argv[0]); + exit(-1); + } + } + while (wait(&status) >= 0) + ; + prctl(PR_TASK_PERF_COUNTERS_DISABLE); + t1 = rdclock(); + + fflush(stdout); + + fprintf(stderr, "\n"); + fprintf(stderr, " Performance counter stats for \'%s\':\n", + argv[0]); + fprintf(stderr, "\n"); + + for (counter = 0; counter < nr_counters; counter++) { + int cpu; + __u64 count, single_count; + + count = 0; + for (cpu = 0; cpu < nr_cpus; cpu ++) { + res = read(fd[cpu][counter], + (char *) &single_count, sizeof(single_count)); + assert(res == sizeof(single_count)); + count += single_count; + } + + if (!event_raw[counter] && + (event_id[counter] == PERF_COUNT_CPU_CLOCK || + event_id[counter] == PERF_COUNT_TASK_CLOCK)) { + + double msecs = (double)count / 1000000; + + fprintf(stderr, " %14.6f %-20s (msecs)\n", + msecs, event_name(counter)); + } else { + fprintf(stderr, " %14Ld %-20s (events)\n", + count, event_name(counter)); + } + if (!counter) + fprintf(stderr, "\n"); + } + fprintf(stderr, "\n"); + fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n", + (double)(t1-t0)/1e6); + fprintf(stderr, "\n"); + + return 0; +} -- cgit v0.10.2 From cea92ce5b07078cd62c4712d51390b09a43dba2e Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Fri, 20 Mar 2009 10:08:02 +0800 Subject: perf_counter tools: Merge common code into perfcounters.h kerneltop's MAX_COUNTERS is increased from 8 to 64(the value used by perfstat). Signed-off-by: Wu Fengguang Acked-by: Peter Zijlstra Cc: Paul Mackerras Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c index cf0e30b..fe70a2c 100644 --- a/Documentation/perf_counter/kerneltop.c +++ b/Documentation/perf_counter/kerneltop.c @@ -65,126 +65,7 @@ #include -#ifdef __x86_64__ -# define __NR_perf_counter_open 295 -#endif - -#ifdef __i386__ -# define __NR_perf_counter_open 333 -#endif - -/* - * Pick up some kernel type conventions: - */ -#define __user -#define asmlinkage - -typedef unsigned int __u32; -typedef unsigned long long __u64; -typedef long long __s64; - -/* - * User-space ABI bits: - */ - -/* - * Generalized performance counter event types, used by the hw_event.type - * parameter of the sys_perf_counter_open() syscall: - */ -enum hw_event_types { - /* - * Common hardware events, generalized by the kernel: - */ - PERF_COUNT_CPU_CYCLES = 0, - PERF_COUNT_INSTRUCTIONS = 1, - PERF_COUNT_CACHE_REFERENCES = 2, - PERF_COUNT_CACHE_MISSES = 3, - PERF_COUNT_BRANCH_INSTRUCTIONS = 4, - PERF_COUNT_BRANCH_MISSES = 5, - PERF_COUNT_BUS_CYCLES = 6, - - PERF_HW_EVENTS_MAX = 7, - - /* - * Special "software" counters provided by the kernel, even if - * the hardware does not support performance counters. These - * counters measure various physical and sw events of the - * kernel (and allow the profiling of them as well): - */ - PERF_COUNT_CPU_CLOCK = -1, - PERF_COUNT_TASK_CLOCK = -2, - PERF_COUNT_PAGE_FAULTS = -3, - PERF_COUNT_CONTEXT_SWITCHES = -4, - PERF_COUNT_CPU_MIGRATIONS = -5, - - PERF_SW_EVENTS_MIN = -6, -}; - -/* - * IRQ-notification data record type: - */ -enum perf_counter_record_type { - PERF_RECORD_SIMPLE = 0, - PERF_RECORD_IRQ = 1, - PERF_RECORD_GROUP = 2, -}; - -/* - * Hardware event to monitor via a performance monitoring counter: - */ -struct perf_counter_hw_event { - __s64 type; - - __u64 irq_period; - __u64 record_type; - __u64 read_format; - - __u64 disabled : 1, /* off by default */ - nmi : 1, /* NMI sampling */ - raw : 1, /* raw event type */ - inherit : 1, /* children inherit it */ - pinned : 1, /* must always be on PMU */ - exclusive : 1, /* only group on PMU */ - exclude_user : 1, /* don't count user */ - exclude_kernel : 1, /* ditto kernel */ - exclude_hv : 1, /* ditto hypervisor */ - exclude_idle : 1, /* don't count when idle */ - - __reserved_1 : 54; - - __u32 extra_config_len; - __u32 __reserved_4; - - __u64 __reserved_2; - __u64 __reserved_3; -}; - -/* - * Ioctls that can be done on a perf counter fd: - */ -#define PERF_COUNTER_IOC_ENABLE _IO('$', 0) -#define PERF_COUNTER_IOC_DISABLE _IO('$', 1) - -asmlinkage int sys_perf_counter_open( - - struct perf_counter_hw_event *hw_event_uptr __user, - pid_t pid, - int cpu, - int group_fd, - unsigned long flags) -{ - int ret; - - ret = syscall( - __NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags); -#if defined(__x86_64__) || defined(__i386__) - if (ret < 0 && ret > -4096) { - errno = -ret; - ret = -1; - } -#endif - return ret; -} +#include "perfcounters.h" const char *event_types [] = { "CPU cycles", @@ -205,21 +86,10 @@ const unsigned int default_count[] = { 10000, }; -/* - * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all - * counters in the current task. - */ -#define PR_TASK_PERF_COUNTERS_DISABLE 31 -#define PR_TASK_PERF_COUNTERS_ENABLE 32 - -#define MAX_COUNTERS 8 - static int nr_counters = -1; static __u64 count_filter = 100; -#define MAX_NR_CPUS 256 - static int event_count[MAX_COUNTERS]; static unsigned long event_id[MAX_COUNTERS]; static int event_raw[MAX_COUNTERS]; diff --git a/Documentation/perf_counter/perfcounters.h b/Documentation/perf_counter/perfcounters.h new file mode 100644 index 0000000..8c1559b --- /dev/null +++ b/Documentation/perf_counter/perfcounters.h @@ -0,0 +1,137 @@ +/* + * Ioctls that can be done on a perf counter fd: + */ +#define PERF_COUNTER_IOC_ENABLE _IO('$', 0) +#define PERF_COUNTER_IOC_DISABLE _IO('$', 1) + +/* + * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all + * counters in the current task. + */ +#define PR_TASK_PERF_COUNTERS_DISABLE 31 +#define PR_TASK_PERF_COUNTERS_ENABLE 32 + +#define MAX_COUNTERS 64 +#define MAX_NR_CPUS 256 + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +/* + * Pick up some kernel type conventions: + */ +#define __user +#define asmlinkage + +typedef unsigned int __u32; +typedef unsigned long long __u64; +typedef long long __s64; + +/* + * User-space ABI bits: + */ + +/* + * Generalized performance counter event types, used by the hw_event.type + * parameter of the sys_perf_counter_open() syscall: + */ +enum hw_event_types { + /* + * Common hardware events, generalized by the kernel: + */ + PERF_COUNT_CPU_CYCLES = 0, + PERF_COUNT_INSTRUCTIONS = 1, + PERF_COUNT_CACHE_REFERENCES = 2, + PERF_COUNT_CACHE_MISSES = 3, + PERF_COUNT_BRANCH_INSTRUCTIONS = 4, + PERF_COUNT_BRANCH_MISSES = 5, + PERF_COUNT_BUS_CYCLES = 6, + + PERF_HW_EVENTS_MAX = 7, + + /* + * Special "software" counters provided by the kernel, even if + * the hardware does not support performance counters. These + * counters measure various physical and sw events of the + * kernel (and allow the profiling of them as well): + */ + PERF_COUNT_CPU_CLOCK = -1, + PERF_COUNT_TASK_CLOCK = -2, + PERF_COUNT_PAGE_FAULTS = -3, + PERF_COUNT_CONTEXT_SWITCHES = -4, + PERF_COUNT_CPU_MIGRATIONS = -5, + + PERF_SW_EVENTS_MIN = -6, +}; + +/* + * IRQ-notification data record type: + */ +enum perf_counter_record_type { + PERF_RECORD_SIMPLE = 0, + PERF_RECORD_IRQ = 1, + PERF_RECORD_GROUP = 2, +}; + +/* + * Hardware event to monitor via a performance monitoring counter: + */ +struct perf_counter_hw_event { + __s64 type; + + __u64 irq_period; + __u64 record_type; + __u64 read_format; + + __u64 disabled : 1, /* off by default */ + nmi : 1, /* NMI sampling */ + raw : 1, /* raw event type */ + inherit : 1, /* children inherit it */ + pinned : 1, /* must always be on PMU */ + exclusive : 1, /* only group on PMU */ + exclude_user : 1, /* don't count user */ + exclude_kernel : 1, /* ditto kernel */ + exclude_hv : 1, /* ditto hypervisor */ + exclude_idle : 1, /* don't count when idle */ + + __reserved_1 : 54; + + __u32 extra_config_len; + __u32 __reserved_4; + + __u64 __reserved_2; + __u64 __reserved_3; +}; + +#ifdef __x86_64__ +# define __NR_perf_counter_open 295 +#endif + +#ifdef __i386__ +# define __NR_perf_counter_open 333 +#endif + +#ifdef __powerpc__ +#define __NR_perf_counter_open 319 +#endif + +asmlinkage int sys_perf_counter_open( + + struct perf_counter_hw_event *hw_event_uptr __user, + pid_t pid, + int cpu, + int group_fd, + unsigned long flags) +{ + int ret; + + ret = syscall( + __NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags); +#if defined(__x86_64__) || defined(__i386__) + if (ret < 0 && ret > -4096) { + errno = -ret; + ret = -1; + } +#endif + return ret; +} + diff --git a/Documentation/perf_counter/perfstat.c b/Documentation/perf_counter/perfstat.c index 9a5808f..a3d4a7a 100644 --- a/Documentation/perf_counter/perfstat.c +++ b/Documentation/perf_counter/perfstat.c @@ -52,133 +52,7 @@ #include -#ifdef __x86_64__ -# define __NR_perf_counter_open 295 -#endif - -#ifdef __i386__ -# define __NR_perf_counter_open 333 -#endif - -#ifdef __powerpc__ -#define __NR_perf_counter_open 319 -#endif - -/* - * Pick up some kernel type conventions: - */ -#define __user -#define asmlinkage - -typedef unsigned int __u32; -typedef unsigned long long __u64; -typedef long long __s64; - -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) - -/* - * User-space ABI bits: - */ - -/* - * Generalized performance counter event types, used by the hw_event.type - * parameter of the sys_perf_counter_open() syscall: - */ -enum hw_event_types { - /* - * Common hardware events, generalized by the kernel: - */ - PERF_COUNT_CPU_CYCLES = 0, - PERF_COUNT_INSTRUCTIONS = 1, - PERF_COUNT_CACHE_REFERENCES = 2, - PERF_COUNT_CACHE_MISSES = 3, - PERF_COUNT_BRANCH_INSTRUCTIONS = 4, - PERF_COUNT_BRANCH_MISSES = 5, - PERF_COUNT_BUS_CYCLES = 6, - - PERF_HW_EVENTS_MAX = 7, - - /* - * Special "software" counters provided by the kernel, even if - * the hardware does not support performance counters. These - * counters measure various physical and sw events of the - * kernel (and allow the profiling of them as well): - */ - PERF_COUNT_CPU_CLOCK = -1, - PERF_COUNT_TASK_CLOCK = -2, - PERF_COUNT_PAGE_FAULTS = -3, - PERF_COUNT_CONTEXT_SWITCHES = -4, - PERF_COUNT_CPU_MIGRATIONS = -5, - - PERF_SW_EVENTS_MIN = -6, -}; - -/* - * IRQ-notification data record type: - */ -enum perf_counter_record_type { - PERF_RECORD_SIMPLE = 0, - PERF_RECORD_IRQ = 1, - PERF_RECORD_GROUP = 2, -}; - -/* - * Hardware event to monitor via a performance monitoring counter: - */ -struct perf_counter_hw_event { - __s64 type; - - __u64 irq_period; - __u64 record_type; - __u64 read_format; - - __u64 disabled : 1, /* off by default */ - nmi : 1, /* NMI sampling */ - raw : 1, /* raw event type */ - inherit : 1, /* children inherit it */ - pinned : 1, /* must always be on PMU */ - exclusive : 1, /* only group on PMU */ - exclude_user : 1, /* don't count user */ - exclude_kernel : 1, /* ditto kernel */ - exclude_hv : 1, /* ditto hypervisor */ - exclude_idle : 1, /* don't count when idle */ - - __reserved_1 : 54; - - __u32 extra_config_len; - __u32 __reserved_4; - - __u64 __reserved_2; - __u64 __reserved_3; -}; - -/* - * Ioctls that can be done on a perf counter fd: - */ -#define PERF_COUNTER_IOC_ENABLE _IO('$', 0) -#define PERF_COUNTER_IOC_DISABLE _IO('$', 1) - -asmlinkage int sys_perf_counter_open( - - struct perf_counter_hw_event *hw_event_uptr __user, - pid_t pid, - int cpu, - int group_fd, - unsigned long flags) -{ - int ret; - - ret = syscall( - __NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags); -#if defined(__x86_64__) || defined(__i386__) - if (ret < 0 && ret > -4096) { - errno = -ret; - ret = -1; - } -#endif - return ret; -} - +#include "perfcounters.h" static char *hw_event_names [] = { "CPU cycles", @@ -224,9 +98,6 @@ static struct event_symbol event_symbols [] = { {PERF_COUNT_CPU_MIGRATIONS, "migrations", }, }; -#define MAX_COUNTERS 64 -#define MAX_NR_CPUS 256 - static int nr_counters = 0; static int nr_cpus = 0; @@ -388,9 +259,6 @@ err: char fault_here[1000000]; -#define PR_TASK_PERF_COUNTERS_DISABLE 31 -#define PR_TASK_PERF_COUNTERS_ENABLE 32 - static int fd[MAX_NR_CPUS][MAX_COUNTERS]; static void create_counter(int counter) -- cgit v0.10.2 From f49012fad4ed2231c7380c0b1901122242b3eab0 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Fri, 20 Mar 2009 10:08:03 +0800 Subject: perf_counter tools: Move perfstat supporting code into perfcounters.h Signed-off-by: Wu Fengguang Acked-by: Peter Zijlstra Cc: Paul Mackerras Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/perfcounters.h b/Documentation/perf_counter/perfcounters.h index 8c1559b..0f3764a 100644 --- a/Documentation/perf_counter/perfcounters.h +++ b/Documentation/perf_counter/perfcounters.h @@ -16,6 +16,14 @@ #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#define rdclock() \ +({ \ + struct timespec ts; \ + \ + clock_gettime(CLOCK_MONOTONIC, &ts); \ + ts.tv_sec * 1000000000ULL + ts.tv_nsec; \ +}) + /* * Pick up some kernel type conventions: */ @@ -135,3 +143,125 @@ asmlinkage int sys_perf_counter_open( return ret; } +static char *hw_event_names [] = { + "CPU cycles", + "instructions", + "cache references", + "cache misses", + "branches", + "branch misses", + "bus cycles", +}; + +static char *sw_event_names [] = { + "cpu clock ticks", + "task clock ticks", + "pagefaults", + "context switches", + "CPU migrations", +}; + +struct event_symbol { + int event; + char *symbol; +}; + +static struct event_symbol event_symbols [] = { + {PERF_COUNT_CPU_CYCLES, "cpu-cycles", }, + {PERF_COUNT_CPU_CYCLES, "cycles", }, + {PERF_COUNT_INSTRUCTIONS, "instructions", }, + {PERF_COUNT_CACHE_REFERENCES, "cache-references", }, + {PERF_COUNT_CACHE_MISSES, "cache-misses", }, + {PERF_COUNT_BRANCH_INSTRUCTIONS, "branch-instructions", }, + {PERF_COUNT_BRANCH_INSTRUCTIONS, "branches", }, + {PERF_COUNT_BRANCH_MISSES, "branch-misses", }, + {PERF_COUNT_BUS_CYCLES, "bus-cycles", }, + {PERF_COUNT_CPU_CLOCK, "cpu-ticks", }, + {PERF_COUNT_CPU_CLOCK, "ticks", }, + {PERF_COUNT_TASK_CLOCK, "task-ticks", }, + {PERF_COUNT_PAGE_FAULTS, "page-faults", }, + {PERF_COUNT_PAGE_FAULTS, "faults", }, + {PERF_COUNT_CONTEXT_SWITCHES, "context-switches", }, + {PERF_COUNT_CONTEXT_SWITCHES, "cs", }, + {PERF_COUNT_CPU_MIGRATIONS, "cpu-migrations", }, + {PERF_COUNT_CPU_MIGRATIONS, "migrations", }, +}; + +static int type_valid(int type) +{ + if (type >= PERF_HW_EVENTS_MAX) + return 0; + if (type <= PERF_SW_EVENTS_MIN) + return 0; + + return 1; +} + +static char *event_name(int ctr) +{ + int type = event_id[ctr]; + static char buf[32]; + + if (event_raw[ctr]) { + sprintf(buf, "raw 0x%x", type); + return buf; + } + if (!type_valid(type)) + return "unknown"; + + if (type >= 0) + return hw_event_names[type]; + + return sw_event_names[-type-1]; +} + +/* + * Each event can have multiple symbolic names. + * Symbolic names are (almost) exactly matched. + */ +static int match_event_symbols(char *str) +{ + unsigned int i; + + if (isdigit(str[0]) || str[0] == '-') + return atoi(str); + + for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { + if (!strncmp(str, event_symbols[i].symbol, + strlen(event_symbols[i].symbol))) + return event_symbols[i].event; + } + + return PERF_HW_EVENTS_MAX; +} + +static void parse_events(char *str) +{ + int type, raw; + +again: + nr_counters++; + if (nr_counters == MAX_COUNTERS) + display_help(); + + raw = 0; + if (*str == 'r') { + raw = 1; + ++str; + type = strtol(str, NULL, 16); + } else { + type = match_event_symbols(str); + if (!type_valid(type)) + display_help(); + } + + event_id[nr_counters] = type; + event_raw[nr_counters] = raw; + + str = strstr(str, ","); + if (str) { + str++; + goto again; + } +} + diff --git a/Documentation/perf_counter/perfstat.c b/Documentation/perf_counter/perfstat.c index a3d4a7a..3364dcb 100644 --- a/Documentation/perf_counter/perfstat.c +++ b/Documentation/perf_counter/perfstat.c @@ -54,50 +54,6 @@ #include "perfcounters.h" -static char *hw_event_names [] = { - "CPU cycles", - "instructions", - "cache references", - "cache misses", - "branches", - "branch misses", - "bus cycles", -}; - -static char *sw_event_names [] = { - "cpu clock ticks", - "task clock ticks", - "pagefaults", - "context switches", - "CPU migrations", -}; - -struct event_symbol { - int event; - char *symbol; -}; - -static struct event_symbol event_symbols [] = { - {PERF_COUNT_CPU_CYCLES, "cpu-cycles", }, - {PERF_COUNT_CPU_CYCLES, "cycles", }, - {PERF_COUNT_INSTRUCTIONS, "instructions", }, - {PERF_COUNT_CACHE_REFERENCES, "cache-references", }, - {PERF_COUNT_CACHE_MISSES, "cache-misses", }, - {PERF_COUNT_BRANCH_INSTRUCTIONS, "branch-instructions", }, - {PERF_COUNT_BRANCH_INSTRUCTIONS, "branches", }, - {PERF_COUNT_BRANCH_MISSES, "branch-misses", }, - {PERF_COUNT_BUS_CYCLES, "bus-cycles", }, - {PERF_COUNT_CPU_CLOCK, "cpu-ticks", }, - {PERF_COUNT_CPU_CLOCK, "ticks", }, - {PERF_COUNT_TASK_CLOCK, "task-ticks", }, - {PERF_COUNT_PAGE_FAULTS, "page-faults", }, - {PERF_COUNT_PAGE_FAULTS, "faults", }, - {PERF_COUNT_CONTEXT_SWITCHES, "context-switches", }, - {PERF_COUNT_CONTEXT_SWITCHES, "cs", }, - {PERF_COUNT_CPU_MIGRATIONS, "cpu-migrations", }, - {PERF_COUNT_CPU_MIGRATIONS, "migrations", }, -}; - static int nr_counters = 0; static int nr_cpus = 0; @@ -137,84 +93,6 @@ static void display_help(void) exit(0); } -static int type_valid(int type) -{ - if (type >= PERF_HW_EVENTS_MAX) - return 0; - if (type <= PERF_SW_EVENTS_MIN) - return 0; - - return 1; -} - -static char *event_name(int ctr) -{ - int type = event_id[ctr]; - static char buf[32]; - - if (event_raw[ctr]) { - sprintf(buf, "raw 0x%x", type); - return buf; - } - if (!type_valid(type)) - return "unknown"; - - if (type >= 0) - return hw_event_names[type]; - - return sw_event_names[-type-1]; -} - -/* - * Each event can have multiple symbolic names. - * Symbolic names are (almost) exactly matched. - */ -static int match_event_symbols(char *str) -{ - unsigned int i; - - if (isdigit(str[0]) || str[0] == '-') - return atoi(str); - - for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { - if (!strncmp(str, event_symbols[i].symbol, - strlen(event_symbols[i].symbol))) - return event_symbols[i].event; - } - - return PERF_HW_EVENTS_MAX; -} - -static void parse_events(char *str) -{ - int type, raw; - -again: - nr_counters++; - if (nr_counters == MAX_COUNTERS) - display_help(); - - raw = 0; - if (*str == 'r') { - raw = 1; - ++str; - type = strtol(str, NULL, 16); - } else { - type = match_event_symbols(str); - if (!type_valid(type)) - display_help(); - } - - event_id[nr_counters] = type; - event_raw[nr_counters] = raw; - - str = strstr(str, ","); - if (str) { - str++; - goto again; - } -} - static void process_options(int argc, char *argv[]) { for (;;) { @@ -296,14 +174,6 @@ static void create_counter(int counter) } -#define rdclock() \ -({ \ - struct timespec ts; \ - \ - clock_gettime(CLOCK_MONOTONIC, &ts); \ - ts.tv_sec * 1000000000ULL + ts.tv_nsec; \ -}) - int main(int argc, char *argv[]) { unsigned long long t0, t1; -- cgit v0.10.2 From 95bb3be1b3ca4a71cc168787b675d5b7852fc6be Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Fri, 20 Mar 2009 10:08:04 +0800 Subject: perf_counter tools: support symbolic event names in kerneltop - kerneltop: --event_id => --event - kerneltop: can accept SW event types now - perfstat: it used to implicitly add event -2(task-clock), the new code no longer does this. Shall we? Signed-off-by: Wu Fengguang Acked-by: Peter Zijlstra Cc: Paul Mackerras Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c index fe70a2c..edc5b09 100644 --- a/Documentation/perf_counter/kerneltop.c +++ b/Documentation/perf_counter/kerneltop.c @@ -86,13 +86,9 @@ const unsigned int default_count[] = { 10000, }; -static int nr_counters = -1; - static __u64 count_filter = 100; static int event_count[MAX_COUNTERS]; -static unsigned long event_id[MAX_COUNTERS]; -static int event_raw[MAX_COUNTERS]; static int tid = -1; static int profile_cpu = -1; @@ -125,7 +121,7 @@ static void display_help(void) "KernelTop Options (up to %d event types can be specified at once):\n\n", MAX_COUNTERS); printf( - " -e EID --event_id=EID # event type ID [default: 0]\n" + " -e EID --event=EID # event type ID [default: 0]\n" " 0: CPU cycles\n" " 1: instructions\n" " 2: cache accesses\n" @@ -160,7 +156,7 @@ static void process_options(int argc, char *argv[]) {"cpu", required_argument, NULL, 'C'}, {"delay", required_argument, NULL, 'd'}, {"dump_symtab", no_argument, NULL, 'D'}, - {"event_id", required_argument, NULL, 'e'}, + {"event", required_argument, NULL, 'e'}, {"filter", required_argument, NULL, 'f'}, {"group", required_argument, NULL, 'g'}, {"help", no_argument, NULL, 'h'}, @@ -178,8 +174,6 @@ static void process_options(int argc, char *argv[]) switch (c) { case 'c': - if (nr_counters == -1) - nr_counters = 0; event_count[nr_counters] = atoi(optarg); break; case 'C': /* CPU and PID are mutually exclusive */ @@ -192,18 +186,7 @@ static void process_options(int argc, char *argv[]) case 'd': delay_secs = atoi(optarg); break; case 'D': dump_symtab = 1; break; - case 'e': - nr_counters++; - if (nr_counters == MAX_COUNTERS) { - error = 1; - break; - } - if (*optarg == 'r') { - event_raw[nr_counters] = 1; - ++optarg; - } - event_id[nr_counters] = strtol(optarg, NULL, 16); - break; + case 'e': error = parse_events(optarg); break; case 'f': count_filter = atoi(optarg); break; case 'g': group = atoi(optarg); break; @@ -226,9 +209,10 @@ static void process_options(int argc, char *argv[]) if (error) display_help(); - nr_counters++; - if (nr_counters < 1) + if (!nr_counters) { nr_counters = 1; + event_id[0] = 0; + } for (counter = 0; counter < nr_counters; counter++) { if (event_count[counter]) diff --git a/Documentation/perf_counter/perfcounters.h b/Documentation/perf_counter/perfcounters.h index 0f3764a..99a90d8 100644 --- a/Documentation/perf_counter/perfcounters.h +++ b/Documentation/perf_counter/perfcounters.h @@ -143,6 +143,10 @@ asmlinkage int sys_perf_counter_open( return ret; } +static int nr_counters = 0; +static long event_id[MAX_COUNTERS] = { -2, -5, -4, -3, 0, 1, 2, 3}; +static int event_raw[MAX_COUNTERS]; + static char *hw_event_names [] = { "CPU cycles", "instructions", @@ -235,14 +239,13 @@ static int match_event_symbols(char *str) return PERF_HW_EVENTS_MAX; } -static void parse_events(char *str) +static int parse_events(char *str) { int type, raw; again: - nr_counters++; if (nr_counters == MAX_COUNTERS) - display_help(); + return -1; raw = 0; if (*str == 'r') { @@ -252,16 +255,18 @@ again: } else { type = match_event_symbols(str); if (!type_valid(type)) - display_help(); + return -1; } event_id[nr_counters] = type; event_raw[nr_counters] = raw; + nr_counters++; str = strstr(str, ","); if (str) { str++; goto again; } -} + return 0; +} diff --git a/Documentation/perf_counter/perfstat.c b/Documentation/perf_counter/perfstat.c index 3364dcb..fd59446 100644 --- a/Documentation/perf_counter/perfstat.c +++ b/Documentation/perf_counter/perfstat.c @@ -54,14 +54,8 @@ #include "perfcounters.h" -static int nr_counters = 0; static int nr_cpus = 0; -static int event_id[MAX_COUNTERS] = - { -2, -5, -4, -3, 0, 1, 2, 3}; - -static int event_raw[MAX_COUNTERS]; - static int system_wide = 0; static void display_help(void) @@ -127,8 +121,6 @@ static void process_options(int argc, char *argv[]) if (!nr_counters) nr_counters = 8; - else - nr_counters++; return; err: -- cgit v0.10.2 From e3908612d93dff9d7746d85d37c42593254bf282 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Fri, 20 Mar 2009 10:08:05 +0800 Subject: perf_counter tools: Reuse event_name() in kerneltop - can handle sw counters now - the outputs will look slightly different Acked-by: Peter Zijlstra Cc: Paul Mackerras Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c index edc5b09..cba5cb0 100644 --- a/Documentation/perf_counter/kerneltop.c +++ b/Documentation/perf_counter/kerneltop.c @@ -67,16 +67,6 @@ #include "perfcounters.h" -const char *event_types [] = { - "CPU cycles", - "instructions", - "cache-refs", - "cache-misses", - "branches", - "branch-misses", - "bus cycles" -}; - const unsigned int default_count[] = { 1000000, 1000000, @@ -304,10 +294,7 @@ static void print_sym_table(void) if (counter) printf("/"); - if (event_id[counter] < PERF_HW_EVENTS_MAX) - printf( "%s", event_types[event_id[counter]]); - else - printf( "raw:%04lx", event_id[counter]); + printf("%s", event_name(counter)); } printf( "], "); -- cgit v0.10.2 From f7524bda8be8be98db356d6a83ac1da451ecdb2e Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Fri, 20 Mar 2009 10:08:06 +0800 Subject: perf_counter tools: move remaining code into kerneltop.c - perfstat.c can be safely removed now - perfstat: -s => -a for system wide accounting - kerneltop: add -S/--stat for perfstat mode - minor adjustments to kerneltop --help, perfstat --help Signed-off-by: Wu Fengguang Acked-by: Peter Zijlstra Cc: Paul Mackerras Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c index cba5cb0..9db65a4 100644 --- a/Documentation/perf_counter/kerneltop.c +++ b/Documentation/perf_counter/kerneltop.c @@ -3,7 +3,7 @@ Build with: - cc -O6 -Wall `pkg-config --cflags --libs glib-2.0` -o kerneltop kerneltop.c + cc -O6 -Wall -lrt `pkg-config --cflags --libs glib-2.0` -o kerneltop kerneltop.c Sample output: @@ -26,18 +26,40 @@ 12.00 - ffffffff804ffb7f : __ip_local_out 11.97 - ffffffff804fc0c8 : ip_local_deliver_finish 8.54 - ffffffff805001a3 : ip_queue_xmit + */ - Started by Ingo Molnar +/* + * perfstat: /usr/bin/time -alike performance counter statistics utility - Improvements and fixes by: + It summarizes the counter events of all tasks (and child tasks), + covering all CPUs that the command (or workload) executes on. + It only counts the per-task events of the workload started, + independent of how many other tasks run on those CPUs. - Arjan van de Ven - Yanmin Zhang - Mike Galbraith + Sample output: - Released under the GPL v2. (and only v2, not any later version) + $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null + Performance counter stats for 'ls': + + 163516953 instructions + 2295 cache-misses + 2855182 branch-misses */ + + /* + * Copyright (C) 2008, Red Hat Inc, Ingo Molnar + * + * Improvements and fixes by: + * + * Arjan van de Ven + * Yanmin Zhang + * Wu Fengguang + * Mike Galbraith + * + * Released under the GPL v2. (and only v2, not any later version) + */ + #define _GNU_SOURCE #include #include @@ -67,18 +89,22 @@ #include "perfcounters.h" -const unsigned int default_count[] = { - 1000000, - 1000000, - 10000, - 10000, - 1000000, - 10000, -}; -static __u64 count_filter = 100; +#define MAX_COUNTERS 64 +#define MAX_NR_CPUS 256 + +#define DEF_PERFSTAT_EVENTS { -2, -5, -4, -3, 0, 1, 2, 3} + +static int run_perfstat = 0; +static int system_wide = 0; +static int nr_counters = 0; +static long event_id[MAX_COUNTERS] = DEF_PERFSTAT_EVENTS; +static int event_raw[MAX_COUNTERS]; static int event_count[MAX_COUNTERS]; +static int fd[MAX_NR_CPUS][MAX_COUNTERS]; + +static __u64 count_filter = 100; static int tid = -1; static int profile_cpu = -1; @@ -96,125 +122,335 @@ static int delay_secs = 2; static int zero; static int dump_symtab; +static GList *lines; + struct source_line { uint64_t EIP; unsigned long count; char *line; }; -static GList *lines; + +const unsigned int default_count[] = { + 1000000, + 1000000, + 10000, + 10000, + 1000000, + 10000, +}; + +static char *hw_event_names[] = { + "CPU cycles", + "instructions", + "cache references", + "cache misses", + "branches", + "branch misses", + "bus cycles", +}; + +static char *sw_event_names[] = { + "cpu clock ticks", + "task clock ticks", + "pagefaults", + "context switches", + "CPU migrations", +}; + +struct event_symbol { + int event; + char *symbol; +}; + +static struct event_symbol event_symbols[] = { + {PERF_COUNT_CPU_CYCLES, "cpu-cycles", }, + {PERF_COUNT_CPU_CYCLES, "cycles", }, + {PERF_COUNT_INSTRUCTIONS, "instructions", }, + {PERF_COUNT_CACHE_REFERENCES, "cache-references", }, + {PERF_COUNT_CACHE_MISSES, "cache-misses", }, + {PERF_COUNT_BRANCH_INSTRUCTIONS, "branch-instructions", }, + {PERF_COUNT_BRANCH_INSTRUCTIONS, "branches", }, + {PERF_COUNT_BRANCH_MISSES, "branch-misses", }, + {PERF_COUNT_BUS_CYCLES, "bus-cycles", }, + {PERF_COUNT_CPU_CLOCK, "cpu-ticks", }, + {PERF_COUNT_CPU_CLOCK, "ticks", }, + {PERF_COUNT_TASK_CLOCK, "task-ticks", }, + {PERF_COUNT_PAGE_FAULTS, "page-faults", }, + {PERF_COUNT_PAGE_FAULTS, "faults", }, + {PERF_COUNT_CONTEXT_SWITCHES, "context-switches", }, + {PERF_COUNT_CONTEXT_SWITCHES, "cs", }, + {PERF_COUNT_CPU_MIGRATIONS, "cpu-migrations", }, + {PERF_COUNT_CPU_MIGRATIONS, "migrations", }, +}; + +static void display_events_help(void) +{ + unsigned int i; + int e; + + printf( + " -e EVENT --event=EVENT # symbolic-name abbreviations"); + + for (i = 0, e = PERF_HW_EVENTS_MAX; i < ARRAY_SIZE(event_symbols); i++) { + if (e != event_symbols[i].event) { + e = event_symbols[i].event; + printf( + "\n %2d: %-20s", e, event_symbols[i].symbol); + } else + printf(" %s", event_symbols[i].symbol); + } + + printf("\n" + " rNNN: raw PMU events (eventsel+umask)\n\n"); +} + +static void display_perfstat_help(void) +{ + printf( + "Usage: perfstat [] \n\n" + "PerfStat Options (up to %d event types can be specified):\n\n", + MAX_COUNTERS); + + display_events_help(); + + printf( + " -a # system-wide collection\n"); + exit(0); +} static void display_help(void) { + if (run_perfstat) + return display_perfstat_help(); + printf( - "Usage: kerneltop []\n\n" + "Usage: kerneltop []\n" + " Or: kerneltop -S [] COMMAND [ARGS]\n\n" "KernelTop Options (up to %d event types can be specified at once):\n\n", MAX_COUNTERS); + + display_events_help(); + printf( - " -e EID --event=EID # event type ID [default: 0]\n" - " 0: CPU cycles\n" - " 1: instructions\n" - " 2: cache accesses\n" - " 3: cache misses\n" - " 4: branch instructions\n" - " 5: branch prediction misses\n" - " 6: bus cycles\n\n" - " rNNN: raw PMU events (eventsel+umask)\n\n" + " -S --stat # perfstat COMMAND\n" + " -a # system-wide collection (for perfstat)\n\n" " -c CNT --count=CNT # event period to sample\n\n" " -C CPU --cpu=CPU # CPU (-1 for all) [default: -1]\n" " -p PID --pid=PID # PID of sampled task (-1 for all) [default: -1]\n\n" " -d delay --delay= # sampling/display delay [default: 2]\n" - " -f CNT --filter=CNT # min-event-count filter [default: 100]\n\n" + " -f CNT --filter=CNT # min-event-count filter [default: 100]\n\n" " -s symbol --symbol= # function to be showed annotated one-shot\n" - " -x path --vmlinux= # the vmlinux binary, required for -s use:\n" + " -x path --vmlinux= # the vmlinux binary, required for -s use\n" " -z --zero # zero counts after display\n" " -D --dump_symtab # dump symbol table to stderr on startup\n" - "\n"); + ); exit(0); } -static void process_options(int argc, char *argv[]) +static int type_valid(int type) { - int error = 0, counter; + if (type >= PERF_HW_EVENTS_MAX) + return 0; + if (type <= PERF_SW_EVENTS_MIN) + return 0; - for (;;) { - int option_index = 0; - /** Options for getopt */ - static struct option long_options[] = { - {"count", required_argument, NULL, 'c'}, - {"cpu", required_argument, NULL, 'C'}, - {"delay", required_argument, NULL, 'd'}, - {"dump_symtab", no_argument, NULL, 'D'}, - {"event", required_argument, NULL, 'e'}, - {"filter", required_argument, NULL, 'f'}, - {"group", required_argument, NULL, 'g'}, - {"help", no_argument, NULL, 'h'}, - {"nmi", required_argument, NULL, 'n'}, - {"pid", required_argument, NULL, 'p'}, - {"vmlinux", required_argument, NULL, 'x'}, - {"symbol", required_argument, NULL, 's'}, - {"zero", no_argument, NULL, 'z'}, - {NULL, 0, NULL, 0 } - }; - int c = getopt_long(argc, argv, "c:C:d:De:f:g:hn:p:s:x:z", - long_options, &option_index); - if (c == -1) - break; + return 1; +} - switch (c) { - case 'c': - event_count[nr_counters] = atoi(optarg); break; - case 'C': - /* CPU and PID are mutually exclusive */ - if (tid != -1) { - printf("WARNING: CPU switch overriding PID\n"); - sleep(1); - tid = -1; - } - profile_cpu = atoi(optarg); break; - case 'd': delay_secs = atoi(optarg); break; - case 'D': dump_symtab = 1; break; +static char *event_name(int ctr) +{ + int type = event_id[ctr]; + static char buf[32]; - case 'e': error = parse_events(optarg); break; + if (event_raw[ctr]) { + sprintf(buf, "raw 0x%x", type); + return buf; + } + if (!type_valid(type)) + return "unknown"; - case 'f': count_filter = atoi(optarg); break; - case 'g': group = atoi(optarg); break; - case 'h': display_help(); break; - case 'n': nmi = atoi(optarg); break; - case 'p': - /* CPU and PID are mutually exclusive */ - if (profile_cpu != -1) { - printf("WARNING: PID switch overriding CPU\n"); - sleep(1); - profile_cpu = -1; + if (type >= 0) + return hw_event_names[type]; + + return sw_event_names[-type-1]; +} + +/* + * Each event can have multiple symbolic names. + * Symbolic names are (almost) exactly matched. + */ +static int match_event_symbols(char *str) +{ + unsigned int i; + + if (isdigit(str[0]) || str[0] == '-') + return atoi(str); + + for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { + if (!strncmp(str, event_symbols[i].symbol, + strlen(event_symbols[i].symbol))) + return event_symbols[i].event; + } + + return PERF_HW_EVENTS_MAX; +} + +static int parse_events(char *str) +{ + int type, raw; + +again: + if (nr_counters == MAX_COUNTERS) + return -1; + + raw = 0; + if (*str == 'r') { + raw = 1; + ++str; + type = strtol(str, NULL, 16); + } else { + type = match_event_symbols(str); + if (!type_valid(type)) + return -1; + } + + event_id[nr_counters] = type; + event_raw[nr_counters] = raw; + nr_counters++; + + str = strstr(str, ","); + if (str) { + str++; + goto again; + } + + return 0; +} + + +/* + * perfstat + */ + +char fault_here[1000000]; + +static void create_perfstat_counter(int counter) +{ + struct perf_counter_hw_event hw_event; + + memset(&hw_event, 0, sizeof(hw_event)); + hw_event.type = event_id[counter]; + hw_event.raw = event_raw[counter]; + hw_event.record_type = PERF_RECORD_SIMPLE; + hw_event.nmi = 0; + + if (system_wide) { + int cpu; + for (cpu = 0; cpu < nr_cpus; cpu ++) { + fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0); + if (fd[cpu][counter] < 0) { + printf("perfstat error: syscall returned with %d (%s)\n", + fd[cpu][counter], strerror(errno)); + exit(-1); } - tid = atoi(optarg); break; - case 's': sym_filter = strdup(optarg); break; - case 'x': vmlinux = strdup(optarg); break; - case 'z': zero = 1; break; - default: error = 1; break; + } + } else { + hw_event.inherit = 1; + hw_event.disabled = 1; + + fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0); + if (fd[0][counter] < 0) { + printf("perfstat error: syscall returned with %d (%s)\n", + fd[0][counter], strerror(errno)); + exit(-1); } } - if (error) - display_help(); +} - if (!nr_counters) { - nr_counters = 1; - event_id[0] = 0; +int do_perfstat(int argc, char *argv[]) +{ + unsigned long long t0, t1; + int counter; + ssize_t res; + int status; + int pid; + + if (!system_wide) + nr_cpus = 1; + + for (counter = 0; counter < nr_counters; counter++) + create_perfstat_counter(counter); + + argc -= optind; + argv += optind; + + /* + * Enable counters and exec the command: + */ + t0 = rdclock(); + prctl(PR_TASK_PERF_COUNTERS_ENABLE); + + if ((pid = fork()) < 0) + perror("failed to fork"); + if (!pid) { + if (execvp(argv[0], argv)) { + perror(argv[0]); + exit(-1); + } } + while (wait(&status) >= 0) + ; + prctl(PR_TASK_PERF_COUNTERS_DISABLE); + t1 = rdclock(); + + fflush(stdout); + + fprintf(stderr, "\n"); + fprintf(stderr, " Performance counter stats for \'%s\':\n", + argv[0]); + fprintf(stderr, "\n"); for (counter = 0; counter < nr_counters; counter++) { - if (event_count[counter]) - continue; + int cpu; + __u64 count, single_count; + + count = 0; + for (cpu = 0; cpu < nr_cpus; cpu ++) { + res = read(fd[cpu][counter], + (char *) &single_count, sizeof(single_count)); + assert(res == sizeof(single_count)); + count += single_count; + } - if (event_id[counter] < PERF_HW_EVENTS_MAX) - event_count[counter] = default_count[event_id[counter]]; - else - event_count[counter] = 100000; + if (!event_raw[counter] && + (event_id[counter] == PERF_COUNT_CPU_CLOCK || + event_id[counter] == PERF_COUNT_TASK_CLOCK)) { + + double msecs = (double)count / 1000000; + + fprintf(stderr, " %14.6f %-20s (msecs)\n", + msecs, event_name(counter)); + } else { + fprintf(stderr, " %14Ld %-20s (events)\n", + count, event_name(counter)); + } + if (!counter) + fprintf(stderr, "\n"); } + fprintf(stderr, "\n"); + fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n", + (double)(t1-t0)/1e6); + fprintf(stderr, "\n"); + + return 0; } +/* + * Symbols + */ + static uint64_t min_ip; static uint64_t max_ip = -1ll; @@ -507,6 +743,9 @@ static void parse_symbols(void) } } +/* + * Source lines + */ static void parse_vmlinux(char *filename) { @@ -527,7 +766,7 @@ static void parse_vmlinux(char *filename) char *c; src = malloc(sizeof(struct source_line)); - assert(src != NULL); + assert(src != NULL); memset(src, 0, sizeof(struct source_line)); if (getline(&src->line, &dummy, file) < 0) @@ -706,11 +945,100 @@ static void process_event(uint64_t ip, int counter) record_ip(ip, counter); } +static void process_options(int argc, char *argv[]) +{ + int error = 0, counter; + + if (strstr(argv[0], "perfstat")) + run_perfstat = 1; + + for (;;) { + int option_index = 0; + /** Options for getopt */ + static struct option long_options[] = { + {"count", required_argument, NULL, 'c'}, + {"cpu", required_argument, NULL, 'C'}, + {"delay", required_argument, NULL, 'd'}, + {"dump_symtab", no_argument, NULL, 'D'}, + {"event", required_argument, NULL, 'e'}, + {"filter", required_argument, NULL, 'f'}, + {"group", required_argument, NULL, 'g'}, + {"help", no_argument, NULL, 'h'}, + {"nmi", required_argument, NULL, 'n'}, + {"pid", required_argument, NULL, 'p'}, + {"vmlinux", required_argument, NULL, 'x'}, + {"symbol", required_argument, NULL, 's'}, + {"stat", no_argument, NULL, 'S'}, + {"zero", no_argument, NULL, 'z'}, + {NULL, 0, NULL, 0 } + }; + int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hn:p:s:Sx:z", + long_options, &option_index); + if (c == -1) + break; + + switch (c) { + case 'a': system_wide = 1; break; + case 'c': event_count[nr_counters] = atoi(optarg); break; + case 'C': + /* CPU and PID are mutually exclusive */ + if (tid != -1) { + printf("WARNING: CPU switch overriding PID\n"); + sleep(1); + tid = -1; + } + profile_cpu = atoi(optarg); break; + case 'd': delay_secs = atoi(optarg); break; + case 'D': dump_symtab = 1; break; + + case 'e': error = parse_events(optarg); break; + + case 'f': count_filter = atoi(optarg); break; + case 'g': group = atoi(optarg); break; + case 'h': display_help(); break; + case 'n': nmi = atoi(optarg); break; + case 'p': + /* CPU and PID are mutually exclusive */ + if (profile_cpu != -1) { + printf("WARNING: PID switch overriding CPU\n"); + sleep(1); + profile_cpu = -1; + } + tid = atoi(optarg); break; + case 's': sym_filter = strdup(optarg); break; + case 'S': run_perfstat = 1; break; + case 'x': vmlinux = strdup(optarg); break; + case 'z': zero = 1; break; + default: error = 1; break; + } + } + if (error) + display_help(); + + if (!nr_counters) { + if (run_perfstat) + nr_counters = 8; + else { + nr_counters = 1; + event_id[0] = 0; + } + } + + for (counter = 0; counter < nr_counters; counter++) { + if (event_count[counter]) + continue; + + if (event_id[counter] < PERF_HW_EVENTS_MAX) + event_count[counter] = default_count[event_id[counter]]; + else + event_count[counter] = 100000; + } +} + int main(int argc, char *argv[]) { struct pollfd event_array[MAX_NR_CPUS][MAX_COUNTERS]; struct perf_counter_hw_event hw_event; - int fd[MAX_NR_CPUS][MAX_COUNTERS]; int i, counter, group_fd; unsigned int cpu; uint64_t ip; @@ -720,11 +1048,15 @@ int main(int argc, char *argv[]) process_options(argc, argv); nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); + assert(nr_cpus <= MAX_NR_CPUS); + assert(nr_cpus >= 0); + + if (run_perfstat) + return do_perfstat(argc, argv); + if (tid != -1 || profile_cpu != -1) nr_cpus = 1; - assert(nr_cpus <= MAX_NR_CPUS); - for (i = 0; i < nr_cpus; i++) { group_fd = -1; for (counter = 0; counter < nr_counters; counter++) { diff --git a/Documentation/perf_counter/perfcounters.h b/Documentation/perf_counter/perfcounters.h index 99a90d8..32e24b9 100644 --- a/Documentation/perf_counter/perfcounters.h +++ b/Documentation/perf_counter/perfcounters.h @@ -11,9 +11,6 @@ #define PR_TASK_PERF_COUNTERS_DISABLE 31 #define PR_TASK_PERF_COUNTERS_ENABLE 32 -#define MAX_COUNTERS 64 -#define MAX_NR_CPUS 256 - #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #define rdclock() \ @@ -110,6 +107,7 @@ struct perf_counter_hw_event { __u64 __reserved_3; }; + #ifdef __x86_64__ # define __NR_perf_counter_open 295 #endif @@ -142,131 +140,3 @@ asmlinkage int sys_perf_counter_open( #endif return ret; } - -static int nr_counters = 0; -static long event_id[MAX_COUNTERS] = { -2, -5, -4, -3, 0, 1, 2, 3}; -static int event_raw[MAX_COUNTERS]; - -static char *hw_event_names [] = { - "CPU cycles", - "instructions", - "cache references", - "cache misses", - "branches", - "branch misses", - "bus cycles", -}; - -static char *sw_event_names [] = { - "cpu clock ticks", - "task clock ticks", - "pagefaults", - "context switches", - "CPU migrations", -}; - -struct event_symbol { - int event; - char *symbol; -}; - -static struct event_symbol event_symbols [] = { - {PERF_COUNT_CPU_CYCLES, "cpu-cycles", }, - {PERF_COUNT_CPU_CYCLES, "cycles", }, - {PERF_COUNT_INSTRUCTIONS, "instructions", }, - {PERF_COUNT_CACHE_REFERENCES, "cache-references", }, - {PERF_COUNT_CACHE_MISSES, "cache-misses", }, - {PERF_COUNT_BRANCH_INSTRUCTIONS, "branch-instructions", }, - {PERF_COUNT_BRANCH_INSTRUCTIONS, "branches", }, - {PERF_COUNT_BRANCH_MISSES, "branch-misses", }, - {PERF_COUNT_BUS_CYCLES, "bus-cycles", }, - {PERF_COUNT_CPU_CLOCK, "cpu-ticks", }, - {PERF_COUNT_CPU_CLOCK, "ticks", }, - {PERF_COUNT_TASK_CLOCK, "task-ticks", }, - {PERF_COUNT_PAGE_FAULTS, "page-faults", }, - {PERF_COUNT_PAGE_FAULTS, "faults", }, - {PERF_COUNT_CONTEXT_SWITCHES, "context-switches", }, - {PERF_COUNT_CONTEXT_SWITCHES, "cs", }, - {PERF_COUNT_CPU_MIGRATIONS, "cpu-migrations", }, - {PERF_COUNT_CPU_MIGRATIONS, "migrations", }, -}; - -static int type_valid(int type) -{ - if (type >= PERF_HW_EVENTS_MAX) - return 0; - if (type <= PERF_SW_EVENTS_MIN) - return 0; - - return 1; -} - -static char *event_name(int ctr) -{ - int type = event_id[ctr]; - static char buf[32]; - - if (event_raw[ctr]) { - sprintf(buf, "raw 0x%x", type); - return buf; - } - if (!type_valid(type)) - return "unknown"; - - if (type >= 0) - return hw_event_names[type]; - - return sw_event_names[-type-1]; -} - -/* - * Each event can have multiple symbolic names. - * Symbolic names are (almost) exactly matched. - */ -static int match_event_symbols(char *str) -{ - unsigned int i; - - if (isdigit(str[0]) || str[0] == '-') - return atoi(str); - - for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { - if (!strncmp(str, event_symbols[i].symbol, - strlen(event_symbols[i].symbol))) - return event_symbols[i].event; - } - - return PERF_HW_EVENTS_MAX; -} - -static int parse_events(char *str) -{ - int type, raw; - -again: - if (nr_counters == MAX_COUNTERS) - return -1; - - raw = 0; - if (*str == 'r') { - raw = 1; - ++str; - type = strtol(str, NULL, 16); - } else { - type = match_event_symbols(str); - if (!type_valid(type)) - return -1; - } - - event_id[nr_counters] = type; - event_raw[nr_counters] = raw; - nr_counters++; - - str = strstr(str, ","); - if (str) { - str++; - goto again; - } - - return 0; -} -- cgit v0.10.2 From ef45fa9e6c1694d3e8063f39749097a6e496b12c Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Fri, 20 Mar 2009 10:08:07 +0800 Subject: perf_counter tools: fix comment for sym_weight() Signed-off-by: Wu Fengguang Acked-by: Peter Zijlstra Cc: Paul Mackerras Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c index 9db65a4..7bf2a51 100644 --- a/Documentation/perf_counter/kerneltop.c +++ b/Documentation/perf_counter/kerneltop.c @@ -473,7 +473,7 @@ static struct sym_entry sym_table[MAX_SYMS]; static void show_details(struct sym_entry *sym); /* - * Ordering weight: count-1 * count-1 * ... / count-n + * Ordering weight: count-1 * count-2 * ... / count-n */ static double sym_weight(const struct sym_entry *sym) { -- cgit v0.10.2 From 3ab8d792b1348eaabfe550ba60902d781d160dd4 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Fri, 20 Mar 2009 10:08:08 +0800 Subject: perf_counter tools: fix event_id type Signed-off-by: Wu Fengguang Acked-by: Peter Zijlstra Cc: Paul Mackerras Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c index 7bf2a51..7bfb0f0 100644 --- a/Documentation/perf_counter/kerneltop.c +++ b/Documentation/perf_counter/kerneltop.c @@ -99,7 +99,7 @@ static int run_perfstat = 0; static int system_wide = 0; static int nr_counters = 0; -static long event_id[MAX_COUNTERS] = DEF_PERFSTAT_EVENTS; +static __s64 event_id[MAX_COUNTERS] = DEF_PERFSTAT_EVENTS; static int event_raw[MAX_COUNTERS]; static int event_count[MAX_COUNTERS]; static int fd[MAX_NR_CPUS][MAX_COUNTERS]; @@ -261,11 +261,11 @@ static int type_valid(int type) static char *event_name(int ctr) { - int type = event_id[ctr]; + __s64 type = event_id[ctr]; static char buf[32]; if (event_raw[ctr]) { - sprintf(buf, "raw 0x%x", type); + sprintf(buf, "raw 0x%llx", (long long)type); return buf; } if (!type_valid(type)) @@ -299,7 +299,8 @@ static int match_event_symbols(char *str) static int parse_events(char *str) { - int type, raw; + __s64 type; + int raw; again: if (nr_counters == MAX_COUNTERS) -- cgit v0.10.2 From dda7c02f33833bfa9412ba6f0e410b0a18b42c88 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Fri, 20 Mar 2009 10:08:09 +0800 Subject: perf_counter tools: cut down default count for cpu-cycles In my system, it takes kerneltop dozens of minutes to show up usable numbers. Make the default count 100 times smaller fixed this long startup latency. I'm not sure if it's the right solution though. Signed-off-by: Wu Fengguang Acked-by: Peter Zijlstra Cc: Paul Mackerras Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c index 7bfb0f0..0bd3c13 100644 --- a/Documentation/perf_counter/kerneltop.c +++ b/Documentation/perf_counter/kerneltop.c @@ -132,7 +132,7 @@ struct source_line { const unsigned int default_count[] = { - 1000000, + 10000, 1000000, 10000, 10000, -- cgit v0.10.2 From af9522cf133e9be6da8525a46a9ed7e7659f0e1a Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Fri, 20 Mar 2009 10:08:10 +0800 Subject: perf_counter tools: when no command is feed to perfstat, display help and exit Signed-off-by: Wu Fengguang Acked-by: Peter Zijlstra Cc: Paul Mackerras Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c index 0bd3c13..81a68aa 100644 --- a/Documentation/perf_counter/kerneltop.c +++ b/Documentation/perf_counter/kerneltop.c @@ -387,6 +387,9 @@ int do_perfstat(int argc, char *argv[]) argc -= optind; argv += optind; + if (!argc) + display_help(); + /* * Enable counters and exec the command: */ -- cgit v0.10.2 From f4a2deb4860497f4332cf6a1acddab3dd628ddf0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 23 Mar 2009 18:22:06 +0100 Subject: perf_counter: remove the event config bitfields Since the bitfields turned into a bit of a mess, remove them and rely on good old masks. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Orig-LKML-Reference: <20090323172417.059499915@chello.nl> Signed-off-by: Ingo Molnar diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 6413d9c..d056515 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -602,13 +602,13 @@ hw_perf_counter_init(struct perf_counter *counter) return NULL; if ((s64)counter->hw_event.irq_period < 0) return NULL; - if (!counter->hw_event.raw_type) { - ev = counter->hw_event.event_id; + if (!perf_event_raw(&counter->hw_event)) { + ev = perf_event_id(&counter->hw_event); if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) return NULL; ev = ppmu->generic_events[ev]; } else { - ev = counter->hw_event.raw_event_id; + ev = perf_event_config(&counter->hw_event); } counter->hw.config_base = ev; counter->hw.idx = 0; diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 902282d..3f95b0c 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -217,15 +217,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter) /* * Raw event type provide the config in the event structure */ - if (hw_event->raw_type) { - hwc->config |= pmc_ops->raw_event(hw_event->raw_event_id); + if (perf_event_raw(hw_event)) { + hwc->config |= pmc_ops->raw_event(perf_event_config(hw_event)); } else { - if (hw_event->event_id >= pmc_ops->max_events) + if (perf_event_id(hw_event) >= pmc_ops->max_events) return -EINVAL; /* * The generic map: */ - hwc->config |= pmc_ops->event_map(hw_event->event_id); + hwc->config |= pmc_ops->event_map(perf_event_id(hw_event)); } counter->wakeup_pending = 0; diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 98f5990..56099e5 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -82,32 +82,37 @@ enum perf_counter_record_type { PERF_RECORD_GROUP = 2, }; +#define __PERF_COUNTER_MASK(name) \ + (((1ULL << PERF_COUNTER_##name##_BITS) - 1) << \ + PERF_COUNTER_##name##_SHIFT) + +#define PERF_COUNTER_RAW_BITS 1 +#define PERF_COUNTER_RAW_SHIFT 63 +#define PERF_COUNTER_RAW_MASK __PERF_COUNTER_MASK(RAW) + +#define PERF_COUNTER_CONFIG_BITS 63 +#define PERF_COUNTER_CONFIG_SHIFT 0 +#define PERF_COUNTER_CONFIG_MASK __PERF_COUNTER_MASK(CONFIG) + +#define PERF_COUNTER_TYPE_BITS 7 +#define PERF_COUNTER_TYPE_SHIFT 56 +#define PERF_COUNTER_TYPE_MASK __PERF_COUNTER_MASK(TYPE) + +#define PERF_COUNTER_EVENT_BITS 56 +#define PERF_COUNTER_EVENT_SHIFT 0 +#define PERF_COUNTER_EVENT_MASK __PERF_COUNTER_MASK(EVENT) + /* * Hardware event to monitor via a performance monitoring counter: */ struct perf_counter_hw_event { - union { -#ifndef __BIG_ENDIAN_BITFIELD - struct { - __u64 event_id : 56, - type : 8; - }; - struct { - __u64 raw_event_id : 63, - raw_type : 1; - }; -#else - struct { - __u64 type : 8, - event_id : 56; - }; - struct { - __u64 raw_type : 1, - raw_event_id : 63; - }; -#endif /* __BIT_ENDIAN_BITFIELD */ - __u64 event_config; - }; + /* + * The MSB of the config word signifies if the rest contains cpu + * specific (raw) counter configuration data, if unset, the next + * 7 bits are an event type and the rest of the bits are the event + * identifier. + */ + __u64 config; __u64 irq_period; __u64 record_type; @@ -157,6 +162,27 @@ struct perf_counter_hw_event { struct task_struct; +static inline u64 perf_event_raw(struct perf_counter_hw_event *hw_event) +{ + return hw_event->config & PERF_COUNTER_RAW_MASK; +} + +static inline u64 perf_event_config(struct perf_counter_hw_event *hw_event) +{ + return hw_event->config & PERF_COUNTER_CONFIG_MASK; +} + +static inline u64 perf_event_type(struct perf_counter_hw_event *hw_event) +{ + return (hw_event->config & PERF_COUNTER_TYPE_MASK) >> + PERF_COUNTER_TYPE_SHIFT; +} + +static inline u64 perf_event_id(struct perf_counter_hw_event *hw_event) +{ + return hw_event->config & PERF_COUNTER_EVENT_MASK; +} + /** * struct hw_perf_counter - performance counter hardware details: */ @@ -336,8 +362,8 @@ extern void perf_counter_output(struct perf_counter *counter, */ static inline int is_software_counter(struct perf_counter *counter) { - return !counter->hw_event.raw_type && - counter->hw_event.type != PERF_TYPE_HARDWARE; + return !perf_event_raw(&counter->hw_event) && + perf_event_type(&counter->hw_event) != PERF_TYPE_HARDWARE; } extern void perf_swcounter_event(u32, u64, int, struct pt_regs *); diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index f054b8c..ca14fc4 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1379,7 +1379,7 @@ static void perf_counter_handle_group(struct perf_counter *counter) list_for_each_entry(sub, &leader->sibling_list, list_entry) { if (sub != counter) sub->hw_ops->read(sub); - perf_counter_store_irq(counter, sub->hw_event.event_config); + perf_counter_store_irq(counter, sub->hw_event.config); perf_counter_store_irq(counter, atomic64_read(&sub->count)); } } @@ -1489,13 +1489,13 @@ static int perf_swcounter_match(struct perf_counter *counter, if (counter->state != PERF_COUNTER_STATE_ACTIVE) return 0; - if (counter->hw_event.raw_type) + if (perf_event_raw(&counter->hw_event)) return 0; - if (counter->hw_event.type != type) + if (perf_event_type(&counter->hw_event) != type) return 0; - if (counter->hw_event.event_id != event) + if (perf_event_id(&counter->hw_event) != event) return 0; if (counter->hw_event.exclude_user && user_mode(regs)) @@ -1757,13 +1757,13 @@ extern void ftrace_profile_disable(int); static void tp_perf_counter_destroy(struct perf_counter *counter) { - ftrace_profile_disable(counter->hw_event.event_id); + ftrace_profile_disable(perf_event_id(&counter->hw_event)); } static const struct hw_perf_counter_ops * tp_perf_counter_init(struct perf_counter *counter) { - int event_id = counter->hw_event.event_id; + int event_id = perf_event_id(&counter->hw_event); int ret; ret = ftrace_profile_enable(event_id); @@ -1797,7 +1797,7 @@ sw_perf_counter_init(struct perf_counter *counter) * to be kernel events, and page faults are never hypervisor * events. */ - switch (counter->hw_event.event_id) { + switch (perf_event_id(&counter->hw_event)) { case PERF_COUNT_CPU_CLOCK: hw_ops = &perf_ops_cpu_clock; @@ -1882,9 +1882,12 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, hw_ops = NULL; - if (hw_event->raw_type) + if (perf_event_raw(hw_event)) { hw_ops = hw_perf_counter_init(counter); - else switch (hw_event->type) { + goto done; + } + + switch (perf_event_type(hw_event)) { case PERF_TYPE_HARDWARE: hw_ops = hw_perf_counter_init(counter); break; @@ -1902,6 +1905,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, kfree(counter); return NULL; } +done: counter->hw_ops = hw_ops; return counter; -- cgit v0.10.2 From 96f6d4444302bb2ea2cf409529eef816462f6ce0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 23 Mar 2009 18:22:07 +0100 Subject: perf_counter: avoid recursion Tracepoint events like lock_acquire and software counters like pagefaults can recurse into the perf counter code again, avoid that. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Orig-LKML-Reference: <20090323172417.152096433@chello.nl> Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 56099e5..18dc17d 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -328,6 +328,13 @@ struct perf_cpu_context { int active_oncpu; int max_pertask; int exclusive; + + /* + * Recursion avoidance: + * + * task, softirq, irq, nmi context + */ + int recursion[4]; }; /* diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index ca14fc4..ce34bff 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -1532,10 +1533,31 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, rcu_read_unlock(); } +static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx) +{ + if (in_nmi()) + return &cpuctx->recursion[3]; + + if (in_irq()) + return &cpuctx->recursion[2]; + + if (in_softirq()) + return &cpuctx->recursion[1]; + + return &cpuctx->recursion[0]; +} + static void __perf_swcounter_event(enum perf_event_types type, u32 event, u64 nr, int nmi, struct pt_regs *regs) { struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); + int *recursion = perf_swcounter_recursion_context(cpuctx); + + if (*recursion) + goto out; + + (*recursion)++; + barrier(); perf_swcounter_ctx_event(&cpuctx->ctx, type, event, nr, nmi, regs); if (cpuctx->task_ctx) { @@ -1543,6 +1565,10 @@ static void __perf_swcounter_event(enum perf_event_types type, u32 event, nr, nmi, regs); } + barrier(); + (*recursion)--; + +out: put_cpu_var(perf_cpu_context); } -- cgit v0.10.2 From 37d81828385f8ff823caaaf1a83e72d065b6cfa1 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 23 Mar 2009 18:22:08 +0100 Subject: perf_counter: add an mmap method to allow userspace to read hardware counters Impact: new feature giving performance improvement This adds the ability for userspace to do an mmap on a hardware counter fd and get access to a read-only page that contains the information needed to translate a hardware counter value to the full 64-bit counter value that would be returned by a read on the fd. This is useful on architectures that allow user programs to read the hardware counters, such as PowerPC. The mmap will only succeed if the counter is a hardware counter monitoring the current process. On my quad 2.5GHz PowerPC 970MP machine, userspace can read a counter and translate it to the full 64-bit value in about 30ns using the mmapped page, compared to about 830ns for the read syscall on the counter, so this does give a significant performance improvement. Signed-off-by: Paul Mackerras Signed-off-by: Peter Zijlstra Orig-LKML-Reference: <20090323172417.297057964@chello.nl> Signed-off-by: Ingo Molnar diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index d056515..e434928 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -417,6 +417,8 @@ void hw_perf_restore(u64 disable) atomic64_set(&counter->hw.prev_count, val); counter->hw.idx = hwc_index[i] + 1; write_pmc(counter->hw.idx, val); + if (counter->user_page) + perf_counter_update_userpage(counter); } mb(); cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; @@ -572,6 +574,8 @@ static void power_perf_disable(struct perf_counter *counter) ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr); write_pmc(counter->hw.idx, 0); counter->hw.idx = 0; + if (counter->user_page) + perf_counter_update_userpage(counter); break; } } @@ -698,6 +702,8 @@ static void record_and_restart(struct perf_counter *counter, long val, write_pmc(counter->hw.idx, val); atomic64_set(&counter->hw.prev_count, val); atomic64_set(&counter->hw.period_left, left); + if (counter->user_page) + perf_counter_update_userpage(counter); /* * Finally record data if requested. diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 18dc17d..40b324e 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -143,6 +143,17 @@ struct perf_counter_hw_event { #define PERF_COUNTER_IOC_ENABLE _IO('$', 0) #define PERF_COUNTER_IOC_DISABLE _IO('$', 1) +/* + * Structure of the page that can be mapped via mmap + */ +struct perf_counter_mmap_page { + __u32 version; /* version number of this structure */ + __u32 compat_version; /* lowest version this is compat with */ + __u32 lock; /* seqlock for synchronization */ + __u32 index; /* hardware counter identifier */ + __s64 offset; /* add to hardware counter value */ +}; + #ifdef __KERNEL__ /* * Kernel-internal data types and definitions: @@ -278,6 +289,9 @@ struct perf_counter { int oncpu; int cpu; + /* pointer to page shared with userspace via mmap */ + unsigned long user_page; + /* read() / irq related data */ wait_queue_head_t waitq; /* optional: for NMIs */ @@ -361,6 +375,7 @@ extern int perf_counter_task_enable(void); extern int hw_perf_group_sched_in(struct perf_counter *group_leader, struct perf_cpu_context *cpuctx, struct perf_counter_context *ctx, int cpu); +extern void perf_counter_update_userpage(struct perf_counter *counter); extern void perf_counter_output(struct perf_counter *counter, int nmi, struct pt_regs *regs); diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index ce34bff..d9cfd90 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1177,6 +1177,7 @@ static int perf_release(struct inode *inode, struct file *file) mutex_unlock(&counter->mutex); mutex_unlock(&ctx->mutex); + free_page(counter->user_page); free_counter(counter); put_context(ctx); @@ -1346,12 +1347,87 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return err; } +void perf_counter_update_userpage(struct perf_counter *counter) +{ + struct perf_counter_mmap_page *userpg; + + if (!counter->user_page) + return; + userpg = (struct perf_counter_mmap_page *) counter->user_page; + + ++userpg->lock; + smp_wmb(); + userpg->index = counter->hw.idx; + userpg->offset = atomic64_read(&counter->count); + if (counter->state == PERF_COUNTER_STATE_ACTIVE) + userpg->offset -= atomic64_read(&counter->hw.prev_count); + smp_wmb(); + ++userpg->lock; +} + +static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct perf_counter *counter = vma->vm_file->private_data; + + if (!counter->user_page) + return VM_FAULT_SIGBUS; + + vmf->page = virt_to_page(counter->user_page); + get_page(vmf->page); + return 0; +} + +static struct vm_operations_struct perf_mmap_vmops = { + .fault = perf_mmap_fault, +}; + +static int perf_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct perf_counter *counter = file->private_data; + unsigned long userpg; + + if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE)) + return -EINVAL; + if (vma->vm_end - vma->vm_start != PAGE_SIZE) + return -EINVAL; + + /* + * For now, restrict to the case of a hardware counter + * on the current task. + */ + if (is_software_counter(counter) || counter->task != current) + return -EINVAL; + + userpg = counter->user_page; + if (!userpg) { + userpg = get_zeroed_page(GFP_KERNEL); + mutex_lock(&counter->mutex); + if (counter->user_page) { + free_page(userpg); + userpg = counter->user_page; + } else { + counter->user_page = userpg; + } + mutex_unlock(&counter->mutex); + if (!userpg) + return -ENOMEM; + } + + perf_counter_update_userpage(counter); + + vma->vm_flags &= ~VM_MAYWRITE; + vma->vm_flags |= VM_RESERVED; + vma->vm_ops = &perf_mmap_vmops; + return 0; +} + static const struct file_operations perf_fops = { .release = perf_release, .read = perf_read, .poll = perf_poll, .unlocked_ioctl = perf_ioctl, .compat_ioctl = perf_ioctl, + .mmap = perf_mmap, }; /* -- cgit v0.10.2 From 9ab772cd535c4b256a577eae516f9c7462346b2d Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Mon, 23 Mar 2009 18:22:09 +0100 Subject: mutex: add atomic_dec_and_mutex_lock() Much like the atomic_dec_and_lock() function in which we take an hold a spin_lock if we drop the atomic to 0 this function takes and holds the mutex if we dec the atomic to 0. Signed-off-by: Eric Paris Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Orig-LKML-Reference: <20090323172417.410913479@chello.nl> Signed-off-by: Ingo Molnar diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 3069ec7..93054fc 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -151,4 +151,27 @@ extern int __must_check mutex_lock_killable(struct mutex *lock); extern int mutex_trylock(struct mutex *lock); extern void mutex_unlock(struct mutex *lock); +/** + * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0 + * @cnt: the atomic which we are to dec + * @lock: the mutex to return holding if we dec to 0 + * + * return true and hold lock if we dec to 0, return false otherwise + */ +static inline int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) +{ + /* dec if we can't possibly hit 0 */ + if (atomic_add_unless(cnt, -1, 1)) + return 0; + /* we might hit 0, so take the lock */ + mutex_lock(lock); + if (!atomic_dec_and_test(cnt)) { + /* when we actually did the dec, we didn't hit 0 */ + mutex_unlock(lock); + return 0; + } + /* we hit 0, and we hold the lock */ + return 1; +} + #endif -- cgit v0.10.2 From b09d2501ed3d294619cbfbcf828ad39324d0e548 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 1 Apr 2009 17:21:56 -0700 Subject: mutex: drop "inline" from mutex_lock() inside kernel/mutex.c Impact: build fix mutex_lock() is was defined inline in kernel/mutex.c, but wasn't declared so not in . This didn't cause a problem until checkin 3a2d367d9aabac486ac4444c6c7ec7a1dab16267 added the atomic_dec_and_mutex_lock() inline in between declaration and definion. This broke building with CONFIG_ALLOW_WARNINGS=n, e.g. make allnoconfig. Either from the source code nor the allnoconfig binary output I cannot find any internal references to mutex_lock() in kernel/mutex.c, so presumably this "inline" is now-useless legacy. Cc: Eric Paris Cc: Peter Zijlstra Cc: Paul Mackerras Orig-LKML-Reference: Signed-off-by: H. Peter Anvin diff --git a/kernel/mutex.c b/kernel/mutex.c index 5d79781..fd95eaa 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -89,7 +89,7 @@ __mutex_lock_slowpath(atomic_t *lock_count); * * This function is similar to (but not equivalent to) down(). */ -void inline __sched mutex_lock(struct mutex *lock) +void __sched mutex_lock(struct mutex *lock) { might_sleep(); /* -- cgit v0.10.2 From 7b732a75047738e4f85438ed2f9cd34bf5f2a19a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 23 Mar 2009 18:22:10 +0100 Subject: perf_counter: new output ABI - part 1 Impact: Rework the perfcounter output ABI use sys_read() only for instant data and provide mmap() output for all async overflow data. The first mmap() determines the size of the output buffer. The mmap() size must be a PAGE_SIZE multiple of 1+pages, where pages must be a power of 2 or 0. Further mmap()s of the same fd must have the same size. Once all maps are gone, you can again mmap() with a new size. In case of 0 extra pages there is no data output and the first page only contains meta data. When there are data pages, a poll() event will be generated for each full page of data. Furthermore, the output is circular. This means that although 1 page is a valid configuration, its useless, since we'll start overwriting it the instant we report a full page. Future work will focus on the output format (currently maintained) where we'll likey want each entry denoted by a header which includes a type and length. Further future work will allow to splice() the fd, also containing the async overflow data -- splice() would be mutually exclusive with mmap() of the data. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Orig-LKML-Reference: <20090323172417.470536358@chello.nl> Signed-off-by: Ingo Molnar diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index e434928..d48596a 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -417,8 +417,7 @@ void hw_perf_restore(u64 disable) atomic64_set(&counter->hw.prev_count, val); counter->hw.idx = hwc_index[i] + 1; write_pmc(counter->hw.idx, val); - if (counter->user_page) - perf_counter_update_userpage(counter); + perf_counter_update_userpage(counter); } mb(); cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; @@ -574,8 +573,7 @@ static void power_perf_disable(struct perf_counter *counter) ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr); write_pmc(counter->hw.idx, 0); counter->hw.idx = 0; - if (counter->user_page) - perf_counter_update_userpage(counter); + perf_counter_update_userpage(counter); break; } } @@ -702,8 +700,7 @@ static void record_and_restart(struct perf_counter *counter, long val, write_pmc(counter->hw.idx, val); atomic64_set(&counter->hw.prev_count, val); atomic64_set(&counter->hw.period_left, left); - if (counter->user_page) - perf_counter_update_userpage(counter); + perf_counter_update_userpage(counter); /* * Finally record data if requested. diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 40b324e..2b5e66d 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -152,6 +152,8 @@ struct perf_counter_mmap_page { __u32 lock; /* seqlock for synchronization */ __u32 index; /* hardware counter identifier */ __s64 offset; /* add to hardware counter value */ + + __u32 data_head; /* head in the data section */ }; #ifdef __KERNEL__ @@ -218,21 +220,6 @@ struct hw_perf_counter { #endif }; -/* - * Hardcoded buffer length limit for now, for IRQ-fed events: - */ -#define PERF_DATA_BUFLEN 2048 - -/** - * struct perf_data - performance counter IRQ data sampling ... - */ -struct perf_data { - int len; - int rd_idx; - int overrun; - u8 data[PERF_DATA_BUFLEN]; -}; - struct perf_counter; /** @@ -256,6 +243,14 @@ enum perf_counter_active_state { struct file; +struct perf_mmap_data { + struct rcu_head rcu_head; + int nr_pages; + atomic_t head; + struct perf_counter_mmap_page *user_page; + void *data_pages[0]; +}; + /** * struct perf_counter - performance counter kernel representation: */ @@ -289,16 +284,15 @@ struct perf_counter { int oncpu; int cpu; - /* pointer to page shared with userspace via mmap */ - unsigned long user_page; + /* mmap bits */ + struct mutex mmap_mutex; + atomic_t mmap_count; + struct perf_mmap_data *data; - /* read() / irq related data */ + /* poll related */ wait_queue_head_t waitq; /* optional: for NMIs */ int wakeup_pending; - struct perf_data *irqdata; - struct perf_data *usrdata; - struct perf_data data[2]; void (*destroy)(struct perf_counter *); struct rcu_head rcu_head; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index d9cfd90..0dfe910 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -4,7 +4,8 @@ * Copyright(C) 2008 Thomas Gleixner * Copyright(C) 2008 Red Hat, Inc., Ingo Molnar * - * For licencing details see kernel-base/COPYING + * + * For licensing details see kernel-base/COPYING */ #include @@ -1022,66 +1023,6 @@ static u64 perf_counter_read(struct perf_counter *counter) return atomic64_read(&counter->count); } -/* - * Cross CPU call to switch performance data pointers - */ -static void __perf_switch_irq_data(void *info) -{ - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - struct perf_counter *counter = info; - struct perf_counter_context *ctx = counter->ctx; - struct perf_data *oldirqdata = counter->irqdata; - - /* - * If this is a task context, we need to check whether it is - * the current task context of this cpu. If not it has been - * scheduled out before the smp call arrived. - */ - if (ctx->task) { - if (cpuctx->task_ctx != ctx) - return; - spin_lock(&ctx->lock); - } - - /* Change the pointer NMI safe */ - atomic_long_set((atomic_long_t *)&counter->irqdata, - (unsigned long) counter->usrdata); - counter->usrdata = oldirqdata; - - if (ctx->task) - spin_unlock(&ctx->lock); -} - -static struct perf_data *perf_switch_irq_data(struct perf_counter *counter) -{ - struct perf_counter_context *ctx = counter->ctx; - struct perf_data *oldirqdata = counter->irqdata; - struct task_struct *task = ctx->task; - - if (!task) { - smp_call_function_single(counter->cpu, - __perf_switch_irq_data, - counter, 1); - return counter->usrdata; - } - -retry: - spin_lock_irq(&ctx->lock); - if (counter->state != PERF_COUNTER_STATE_ACTIVE) { - counter->irqdata = counter->usrdata; - counter->usrdata = oldirqdata; - spin_unlock_irq(&ctx->lock); - return oldirqdata; - } - spin_unlock_irq(&ctx->lock); - task_oncpu_function_call(task, __perf_switch_irq_data, counter); - /* Might have failed, because task was scheduled out */ - if (counter->irqdata == oldirqdata) - goto retry; - - return counter->usrdata; -} - static void put_context(struct perf_counter_context *ctx) { if (ctx->task) @@ -1177,7 +1118,6 @@ static int perf_release(struct inode *inode, struct file *file) mutex_unlock(&counter->mutex); mutex_unlock(&ctx->mutex); - free_page(counter->user_page); free_counter(counter); put_context(ctx); @@ -1192,7 +1132,7 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) { u64 cntval; - if (count != sizeof(cntval)) + if (count < sizeof(cntval)) return -EINVAL; /* @@ -1211,121 +1151,20 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) } static ssize_t -perf_copy_usrdata(struct perf_data *usrdata, char __user *buf, size_t count) -{ - if (!usrdata->len) - return 0; - - count = min(count, (size_t)usrdata->len); - if (copy_to_user(buf, usrdata->data + usrdata->rd_idx, count)) - return -EFAULT; - - /* Adjust the counters */ - usrdata->len -= count; - if (!usrdata->len) - usrdata->rd_idx = 0; - else - usrdata->rd_idx += count; - - return count; -} - -static ssize_t -perf_read_irq_data(struct perf_counter *counter, - char __user *buf, - size_t count, - int nonblocking) -{ - struct perf_data *irqdata, *usrdata; - DECLARE_WAITQUEUE(wait, current); - ssize_t res, res2; - - irqdata = counter->irqdata; - usrdata = counter->usrdata; - - if (usrdata->len + irqdata->len >= count) - goto read_pending; - - if (nonblocking) - return -EAGAIN; - - spin_lock_irq(&counter->waitq.lock); - __add_wait_queue(&counter->waitq, &wait); - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - if (usrdata->len + irqdata->len >= count) - break; - - if (signal_pending(current)) - break; - - if (counter->state == PERF_COUNTER_STATE_ERROR) - break; - - spin_unlock_irq(&counter->waitq.lock); - schedule(); - spin_lock_irq(&counter->waitq.lock); - } - __remove_wait_queue(&counter->waitq, &wait); - __set_current_state(TASK_RUNNING); - spin_unlock_irq(&counter->waitq.lock); - - if (usrdata->len + irqdata->len < count && - counter->state != PERF_COUNTER_STATE_ERROR) - return -ERESTARTSYS; -read_pending: - mutex_lock(&counter->mutex); - - /* Drain pending data first: */ - res = perf_copy_usrdata(usrdata, buf, count); - if (res < 0 || res == count) - goto out; - - /* Switch irq buffer: */ - usrdata = perf_switch_irq_data(counter); - res2 = perf_copy_usrdata(usrdata, buf + res, count - res); - if (res2 < 0) { - if (!res) - res = -EFAULT; - } else { - res += res2; - } -out: - mutex_unlock(&counter->mutex); - - return res; -} - -static ssize_t perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct perf_counter *counter = file->private_data; - switch (counter->hw_event.record_type) { - case PERF_RECORD_SIMPLE: - return perf_read_hw(counter, buf, count); - - case PERF_RECORD_IRQ: - case PERF_RECORD_GROUP: - return perf_read_irq_data(counter, buf, count, - file->f_flags & O_NONBLOCK); - } - return -EINVAL; + return perf_read_hw(counter, buf, count); } static unsigned int perf_poll(struct file *file, poll_table *wait) { struct perf_counter *counter = file->private_data; - unsigned int events = 0; - unsigned long flags; + unsigned int events = POLLIN; poll_wait(file, &counter->waitq, wait); - spin_lock_irqsave(&counter->waitq.lock, flags); - if (counter->usrdata->len || counter->irqdata->len) - events |= POLLIN; - spin_unlock_irqrestore(&counter->waitq.lock, flags); - return events; } @@ -1347,78 +1186,207 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return err; } -void perf_counter_update_userpage(struct perf_counter *counter) +static void __perf_counter_update_userpage(struct perf_counter *counter, + struct perf_mmap_data *data) { - struct perf_counter_mmap_page *userpg; - - if (!counter->user_page) - return; - userpg = (struct perf_counter_mmap_page *) counter->user_page; + struct perf_counter_mmap_page *userpg = data->user_page; + /* + * Disable preemption so as to not let the corresponding user-space + * spin too long if we get preempted. + */ + preempt_disable(); ++userpg->lock; smp_wmb(); userpg->index = counter->hw.idx; userpg->offset = atomic64_read(&counter->count); if (counter->state == PERF_COUNTER_STATE_ACTIVE) userpg->offset -= atomic64_read(&counter->hw.prev_count); + + userpg->data_head = atomic_read(&data->head); smp_wmb(); ++userpg->lock; + preempt_enable(); +} + +void perf_counter_update_userpage(struct perf_counter *counter) +{ + struct perf_mmap_data *data; + + rcu_read_lock(); + data = rcu_dereference(counter->data); + if (data) + __perf_counter_update_userpage(counter, data); + rcu_read_unlock(); } static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { struct perf_counter *counter = vma->vm_file->private_data; + struct perf_mmap_data *data; + int ret = VM_FAULT_SIGBUS; - if (!counter->user_page) - return VM_FAULT_SIGBUS; + rcu_read_lock(); + data = rcu_dereference(counter->data); + if (!data) + goto unlock; + + if (vmf->pgoff == 0) { + vmf->page = virt_to_page(data->user_page); + } else { + int nr = vmf->pgoff - 1; - vmf->page = virt_to_page(counter->user_page); + if ((unsigned)nr > data->nr_pages) + goto unlock; + + vmf->page = virt_to_page(data->data_pages[nr]); + } get_page(vmf->page); + ret = 0; +unlock: + rcu_read_unlock(); + + return ret; +} + +static int perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages) +{ + struct perf_mmap_data *data; + unsigned long size; + int i; + + WARN_ON(atomic_read(&counter->mmap_count)); + + size = sizeof(struct perf_mmap_data); + size += nr_pages * sizeof(void *); + + data = kzalloc(size, GFP_KERNEL); + if (!data) + goto fail; + + data->user_page = (void *)get_zeroed_page(GFP_KERNEL); + if (!data->user_page) + goto fail_user_page; + + for (i = 0; i < nr_pages; i++) { + data->data_pages[i] = (void *)get_zeroed_page(GFP_KERNEL); + if (!data->data_pages[i]) + goto fail_data_pages; + } + + data->nr_pages = nr_pages; + + rcu_assign_pointer(counter->data, data); + return 0; + +fail_data_pages: + for (i--; i >= 0; i--) + free_page((unsigned long)data->data_pages[i]); + + free_page((unsigned long)data->user_page); + +fail_user_page: + kfree(data); + +fail: + return -ENOMEM; +} + +static void __perf_mmap_data_free(struct rcu_head *rcu_head) +{ + struct perf_mmap_data *data = container_of(rcu_head, + struct perf_mmap_data, rcu_head); + int i; + + free_page((unsigned long)data->user_page); + for (i = 0; i < data->nr_pages; i++) + free_page((unsigned long)data->data_pages[i]); + kfree(data); +} + +static void perf_mmap_data_free(struct perf_counter *counter) +{ + struct perf_mmap_data *data = counter->data; + + WARN_ON(atomic_read(&counter->mmap_count)); + + rcu_assign_pointer(counter->data, NULL); + call_rcu(&data->rcu_head, __perf_mmap_data_free); +} + +static void perf_mmap_open(struct vm_area_struct *vma) +{ + struct perf_counter *counter = vma->vm_file->private_data; + + atomic_inc(&counter->mmap_count); +} + +static void perf_mmap_close(struct vm_area_struct *vma) +{ + struct perf_counter *counter = vma->vm_file->private_data; + + if (atomic_dec_and_mutex_lock(&counter->mmap_count, + &counter->mmap_mutex)) { + perf_mmap_data_free(counter); + mutex_unlock(&counter->mmap_mutex); + } } static struct vm_operations_struct perf_mmap_vmops = { + .open = perf_mmap_open, + .close = perf_mmap_close, .fault = perf_mmap_fault, }; static int perf_mmap(struct file *file, struct vm_area_struct *vma) { struct perf_counter *counter = file->private_data; - unsigned long userpg; + unsigned long vma_size; + unsigned long nr_pages; + unsigned long locked, lock_limit; + int ret = 0; if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE)) return -EINVAL; - if (vma->vm_end - vma->vm_start != PAGE_SIZE) + + vma_size = vma->vm_end - vma->vm_start; + nr_pages = (vma_size / PAGE_SIZE) - 1; + + if (nr_pages == 0 || !is_power_of_2(nr_pages)) return -EINVAL; - /* - * For now, restrict to the case of a hardware counter - * on the current task. - */ - if (is_software_counter(counter) || counter->task != current) + if (vma_size != PAGE_SIZE * (1 + nr_pages)) return -EINVAL; - userpg = counter->user_page; - if (!userpg) { - userpg = get_zeroed_page(GFP_KERNEL); - mutex_lock(&counter->mutex); - if (counter->user_page) { - free_page(userpg); - userpg = counter->user_page; - } else { - counter->user_page = userpg; - } - mutex_unlock(&counter->mutex); - if (!userpg) - return -ENOMEM; - } + if (vma->vm_pgoff != 0) + return -EINVAL; + + locked = vma_size >> PAGE_SHIFT; + locked += vma->vm_mm->locked_vm; - perf_counter_update_userpage(counter); + lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; + lock_limit >>= PAGE_SHIFT; + + if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) + return -EPERM; + + mutex_lock(&counter->mmap_mutex); + if (atomic_inc_not_zero(&counter->mmap_count)) + goto out; + + WARN_ON(counter->data); + ret = perf_mmap_data_alloc(counter, nr_pages); + if (!ret) + atomic_set(&counter->mmap_count, 1); +out: + mutex_unlock(&counter->mmap_mutex); vma->vm_flags &= ~VM_MAYWRITE; vma->vm_flags |= VM_RESERVED; vma->vm_ops = &perf_mmap_vmops; - return 0; + + return ret; } static const struct file_operations perf_fops = { @@ -1434,30 +1402,94 @@ static const struct file_operations perf_fops = { * Output */ -static void perf_counter_store_irq(struct perf_counter *counter, u64 data) +static int perf_output_write(struct perf_counter *counter, int nmi, + void *buf, ssize_t size) { - struct perf_data *irqdata = counter->irqdata; + struct perf_mmap_data *data; + unsigned int offset, head, nr; + unsigned int len; + int ret, wakeup; - if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) { - irqdata->overrun++; - } else { - u64 *p = (u64 *) &irqdata->data[irqdata->len]; + rcu_read_lock(); + ret = -ENOSPC; + data = rcu_dereference(counter->data); + if (!data) + goto out; + + if (!data->nr_pages) + goto out; + + ret = -EINVAL; + if (size > PAGE_SIZE) + goto out; + + do { + offset = head = atomic_read(&data->head); + head += sizeof(u64); + } while (atomic_cmpxchg(&data->head, offset, head) != offset); + + wakeup = (offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT); - *p = data; - irqdata->len += sizeof(u64); + nr = (offset >> PAGE_SHIFT) & (data->nr_pages - 1); + offset &= PAGE_SIZE - 1; + + len = min_t(unsigned int, PAGE_SIZE - offset, size); + memcpy(data->data_pages[nr] + offset, buf, len); + size -= len; + + if (size) { + nr = (nr + 1) & (data->nr_pages - 1); + memcpy(data->data_pages[nr], buf + len, size); + } + + /* + * generate a poll() wakeup for every page boundary crossed + */ + if (wakeup) { + __perf_counter_update_userpage(counter, data); + if (nmi) { + counter->wakeup_pending = 1; + set_perf_counter_pending(); + } else + wake_up(&counter->waitq); } + ret = 0; +out: + rcu_read_unlock(); + + return ret; } -static void perf_counter_handle_group(struct perf_counter *counter) +static void perf_output_simple(struct perf_counter *counter, + int nmi, struct pt_regs *regs) +{ + u64 entry; + + entry = instruction_pointer(regs); + + perf_output_write(counter, nmi, &entry, sizeof(entry)); +} + +struct group_entry { + u64 event; + u64 counter; +}; + +static void perf_output_group(struct perf_counter *counter, int nmi) { struct perf_counter *leader, *sub; leader = counter->group_leader; list_for_each_entry(sub, &leader->sibling_list, list_entry) { + struct group_entry entry; + if (sub != counter) sub->hw_ops->read(sub); - perf_counter_store_irq(counter, sub->hw_event.config); - perf_counter_store_irq(counter, atomic64_read(&sub->count)); + + entry.event = sub->hw_event.config; + entry.counter = atomic64_read(&sub->count); + + perf_output_write(counter, nmi, &entry, sizeof(entry)); } } @@ -1469,19 +1501,13 @@ void perf_counter_output(struct perf_counter *counter, return; case PERF_RECORD_IRQ: - perf_counter_store_irq(counter, instruction_pointer(regs)); + perf_output_simple(counter, nmi, regs); break; case PERF_RECORD_GROUP: - perf_counter_handle_group(counter); + perf_output_group(counter, nmi); break; } - - if (nmi) { - counter->wakeup_pending = 1; - set_perf_counter_pending(); - } else - wake_up(&counter->waitq); } /* @@ -1967,10 +1993,10 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, INIT_LIST_HEAD(&counter->sibling_list); init_waitqueue_head(&counter->waitq); + mutex_init(&counter->mmap_mutex); + INIT_LIST_HEAD(&counter->child_list); - counter->irqdata = &counter->data[0]; - counter->usrdata = &counter->data[1]; counter->cpu = cpu; counter->hw_event = *hw_event; counter->wakeup_pending = 0; -- cgit v0.10.2 From 803d4f3980f6e220b27311a283aab0a4d68b6709 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 23 Mar 2009 18:22:11 +0100 Subject: perf_counter tools: update to new syscall ABI update the kerneltop userspace to work with the latest syscall ABI Signed-off-by: Peter Zijlstra Cc: Wu Fengguang Cc: Paul Mackerras Orig-LKML-Reference: <20090323172417.559643732@chello.nl> Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c index 81a68aa..a72c9bd 100644 --- a/Documentation/perf_counter/kerneltop.c +++ b/Documentation/perf_counter/kerneltop.c @@ -87,20 +87,90 @@ #include -#include "perfcounters.h" +#include "include/linux/perf_counter.h" +/* + * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all + * counters in the current task. + */ +#define PR_TASK_PERF_COUNTERS_DISABLE 31 +#define PR_TASK_PERF_COUNTERS_ENABLE 32 + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +#define rdclock() \ +({ \ + struct timespec ts; \ + \ + clock_gettime(CLOCK_MONOTONIC, &ts); \ + ts.tv_sec * 1000000000ULL + ts.tv_nsec; \ +}) + +/* + * Pick up some kernel type conventions: + */ +#define __user +#define asmlinkage + +typedef unsigned int __u32; +typedef unsigned long long __u64; +typedef long long __s64; + + +#ifdef __x86_64__ +# define __NR_perf_counter_open 295 +#endif + +#ifdef __i386__ +# define __NR_perf_counter_open 333 +#endif + +#ifdef __powerpc__ +#define __NR_perf_counter_open 319 +#endif + +asmlinkage int sys_perf_counter_open( + struct perf_counter_hw_event *hw_event_uptr __user, + pid_t pid, + int cpu, + int group_fd, + unsigned long flags) +{ + int ret; + + ret = syscall( + __NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags); +#if defined(__x86_64__) || defined(__i386__) + if (ret < 0 && ret > -4096) { + errno = -ret; + ret = -1; + } +#endif + return ret; +} + #define MAX_COUNTERS 64 #define MAX_NR_CPUS 256 -#define DEF_PERFSTAT_EVENTS { -2, -5, -4, -3, 0, 1, 2, 3} +#define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id)) static int run_perfstat = 0; static int system_wide = 0; static int nr_counters = 0; -static __s64 event_id[MAX_COUNTERS] = DEF_PERFSTAT_EVENTS; -static int event_raw[MAX_COUNTERS]; +static __u64 event_id[MAX_COUNTERS] = { + EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), + EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), + EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), + EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), + + EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), + EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), + EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), + EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), +}; +static int default_interval = 100000; static int event_count[MAX_COUNTERS]; static int fd[MAX_NR_CPUS][MAX_COUNTERS]; @@ -156,49 +226,63 @@ static char *sw_event_names[] = { "pagefaults", "context switches", "CPU migrations", + "minor faults", + "major faults", }; struct event_symbol { - int event; + __u64 event; char *symbol; }; static struct event_symbol event_symbols[] = { - {PERF_COUNT_CPU_CYCLES, "cpu-cycles", }, - {PERF_COUNT_CPU_CYCLES, "cycles", }, - {PERF_COUNT_INSTRUCTIONS, "instructions", }, - {PERF_COUNT_CACHE_REFERENCES, "cache-references", }, - {PERF_COUNT_CACHE_MISSES, "cache-misses", }, - {PERF_COUNT_BRANCH_INSTRUCTIONS, "branch-instructions", }, - {PERF_COUNT_BRANCH_INSTRUCTIONS, "branches", }, - {PERF_COUNT_BRANCH_MISSES, "branch-misses", }, - {PERF_COUNT_BUS_CYCLES, "bus-cycles", }, - {PERF_COUNT_CPU_CLOCK, "cpu-ticks", }, - {PERF_COUNT_CPU_CLOCK, "ticks", }, - {PERF_COUNT_TASK_CLOCK, "task-ticks", }, - {PERF_COUNT_PAGE_FAULTS, "page-faults", }, - {PERF_COUNT_PAGE_FAULTS, "faults", }, - {PERF_COUNT_CONTEXT_SWITCHES, "context-switches", }, - {PERF_COUNT_CONTEXT_SWITCHES, "cs", }, - {PERF_COUNT_CPU_MIGRATIONS, "cpu-migrations", }, - {PERF_COUNT_CPU_MIGRATIONS, "migrations", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cpu-cycles", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cycles", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), "instructions", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), "cache-references", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), "cache-misses", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branch-instructions", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branches", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES), "branch-misses", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES), "bus-cycles", }, + + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK), "cpu-clock", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), "task-clock", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "page-faults", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "faults", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN), "minor-faults", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ), "major-faults", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "context-switches", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "cs", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "cpu-migrations", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "migrations", }, }; +#define __PERF_COUNTER_FIELD(config, name) \ + ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT) + +#define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW) +#define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG) +#define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE) +#define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT) + static void display_events_help(void) { unsigned int i; - int e; + __u64 e; printf( " -e EVENT --event=EVENT # symbolic-name abbreviations"); - for (i = 0, e = PERF_HW_EVENTS_MAX; i < ARRAY_SIZE(event_symbols); i++) { - if (e != event_symbols[i].event) { - e = event_symbols[i].event; - printf( - "\n %2d: %-20s", e, event_symbols[i].symbol); - } else - printf(" %s", event_symbols[i].symbol); + for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { + int type, id; + + e = event_symbols[i].event; + type = PERF_COUNTER_TYPE(e); + id = PERF_COUNTER_ID(e); + + printf("\n %d:%d: %-20s", + type, id, event_symbols[i].symbol); } printf("\n" @@ -249,44 +333,51 @@ static void display_help(void) exit(0); } -static int type_valid(int type) -{ - if (type >= PERF_HW_EVENTS_MAX) - return 0; - if (type <= PERF_SW_EVENTS_MIN) - return 0; - - return 1; -} - static char *event_name(int ctr) { - __s64 type = event_id[ctr]; + __u64 config = event_id[ctr]; + int type = PERF_COUNTER_TYPE(config); + int id = PERF_COUNTER_ID(config); static char buf[32]; - if (event_raw[ctr]) { - sprintf(buf, "raw 0x%llx", (long long)type); + if (PERF_COUNTER_RAW(config)) { + sprintf(buf, "raw 0x%llx", PERF_COUNTER_CONFIG(config)); return buf; } - if (!type_valid(type)) - return "unknown"; - if (type >= 0) - return hw_event_names[type]; + switch (type) { + case PERF_TYPE_HARDWARE: + if (id < PERF_HW_EVENTS_MAX) + return hw_event_names[id]; + return "unknown-hardware"; + + case PERF_TYPE_SOFTWARE: + if (id < PERF_SW_EVENTS_MAX) + return sw_event_names[id]; + return "unknown-software"; - return sw_event_names[-type-1]; + default: + break; + } + + return "unknown"; } /* * Each event can have multiple symbolic names. * Symbolic names are (almost) exactly matched. */ -static int match_event_symbols(char *str) +static __u64 match_event_symbols(char *str) { + __u64 config, id; + int type; unsigned int i; - if (isdigit(str[0]) || str[0] == '-') - return atoi(str); + if (sscanf(str, "r%llx", &config) == 1) + return config | PERF_COUNTER_RAW_MASK; + + if (sscanf(str, "%d:%llu", &type, &id) == 2) + return EID(type, id); for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { if (!strncmp(str, event_symbols[i].symbol, @@ -294,31 +385,22 @@ static int match_event_symbols(char *str) return event_symbols[i].event; } - return PERF_HW_EVENTS_MAX; + return ~0ULL; } static int parse_events(char *str) { - __s64 type; - int raw; + __u64 config; again: if (nr_counters == MAX_COUNTERS) return -1; - raw = 0; - if (*str == 'r') { - raw = 1; - ++str; - type = strtol(str, NULL, 16); - } else { - type = match_event_symbols(str); - if (!type_valid(type)) - return -1; - } + config = match_event_symbols(str); + if (config == ~0ULL) + return -1; - event_id[nr_counters] = type; - event_raw[nr_counters] = raw; + event_id[nr_counters] = config; nr_counters++; str = strstr(str, ","); @@ -342,8 +424,7 @@ static void create_perfstat_counter(int counter) struct perf_counter_hw_event hw_event; memset(&hw_event, 0, sizeof(hw_event)); - hw_event.type = event_id[counter]; - hw_event.raw = event_raw[counter]; + hw_event.config = event_id[counter]; hw_event.record_type = PERF_RECORD_SIMPLE; hw_event.nmi = 0; @@ -428,7 +509,7 @@ int do_perfstat(int argc, char *argv[]) count += single_count; } - if (!event_raw[counter] && + if (!PERF_COUNTER_RAW(event_id[counter]) && (event_id[counter] == PERF_COUNT_CPU_CLOCK || event_id[counter] == PERF_COUNT_TASK_CLOCK)) { @@ -911,7 +992,7 @@ static void record_ip(uint64_t ip, int counter) assert(left <= middle && middle <= right); if (!(left <= ip && ip <= right)) { printf(" left: %016lx\n", left); - printf(" ip: %016lx\n", ip); + printf(" ip: %016llx\n", ip); printf("right: %016lx\n", right); } assert(left <= ip && ip <= right); @@ -983,7 +1064,7 @@ static void process_options(int argc, char *argv[]) switch (c) { case 'a': system_wide = 1; break; - case 'c': event_count[nr_counters] = atoi(optarg); break; + case 'c': default_interval = atoi(optarg); break; case 'C': /* CPU and PID are mutually exclusive */ if (tid != -1) { @@ -1032,10 +1113,7 @@ static void process_options(int argc, char *argv[]) if (event_count[counter]) continue; - if (event_id[counter] < PERF_HW_EVENTS_MAX) - event_count[counter] = default_count[event_id[counter]]; - else - event_count[counter] = 100000; + event_count[counter] = default_interval; } } @@ -1070,12 +1148,13 @@ int main(int argc, char *argv[]) cpu = i; memset(&hw_event, 0, sizeof(hw_event)); - hw_event.type = event_id[counter]; - hw_event.raw = event_raw[counter]; + hw_event.config = event_id[counter]; hw_event.irq_period = event_count[counter]; hw_event.record_type = PERF_RECORD_IRQ; hw_event.nmi = nmi; + printf("FOO: %d %llx %llx\n", counter, event_id[counter], event_count[counter]); + fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0); fcntl(fd[i][counter], F_SETFL, O_NONBLOCK); if (fd[i][counter] < 0) { -- cgit v0.10.2 From bcbcb37cdb67d8100acfa66df40c4d636c28c4d1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 23 Mar 2009 18:22:12 +0100 Subject: perf_counter tools: use mmap() output update kerneltop to use the mmap() output to gather overflow information Signed-off-by: Peter Zijlstra Cc: Wu Fengguang Cc: Paul Mackerras Orig-LKML-Reference: <20090323172417.677932499@chello.nl> Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c index a72c9bd..80b7905 100644 --- a/Documentation/perf_counter/kerneltop.c +++ b/Documentation/perf_counter/kerneltop.c @@ -84,6 +84,7 @@ #include #include #include +#include #include @@ -119,17 +120,25 @@ typedef long long __s64; #ifdef __x86_64__ -# define __NR_perf_counter_open 295 +#define __NR_perf_counter_open 295 +#define rmb() asm volatile("lfence" ::: "memory") +#define cpu_relax() asm volatile("rep; nop" ::: "memory"); #endif #ifdef __i386__ -# define __NR_perf_counter_open 333 +#define __NR_perf_counter_open 333 +#define rmb() asm volatile("lfence" ::: "memory") +#define cpu_relax() asm volatile("rep; nop" ::: "memory"); #endif #ifdef __powerpc__ #define __NR_perf_counter_open 319 +#define rmb() asm volatile ("sync" ::: "memory") +#define cpu_relax() asm volatile ("" ::: "memory"); #endif +#define unlikely(x) __builtin_expect(!!(x), 0) + asmlinkage int sys_perf_counter_open( struct perf_counter_hw_event *hw_event_uptr __user, pid_t pid, @@ -181,6 +190,7 @@ static int profile_cpu = -1; static int nr_cpus = 0; static int nmi = 1; static int group = 0; +static unsigned int page_size; static char *vmlinux; @@ -1117,16 +1127,68 @@ static void process_options(int argc, char *argv[]) } } +struct mmap_data { + int counter; + void *base; + unsigned int mask; + unsigned int prev; +}; + +static unsigned int mmap_read_head(struct mmap_data *md) +{ + struct perf_counter_mmap_page *pc = md->base; + unsigned int seq, head; + +repeat: + rmb(); + seq = pc->lock; + + if (unlikely(seq & 1)) { + cpu_relax(); + goto repeat; + } + + head = pc->data_head; + + rmb(); + if (pc->lock != seq) + goto repeat; + + return head; +} + +static void mmap_read(struct mmap_data *md) +{ + unsigned int head = mmap_read_head(md); + unsigned int old = md->prev; + unsigned char *data = md->base + page_size; + + if (head - old > md->mask) { + printf("ERROR: failed to keep up with mmap data\n"); + exit(-1); + } + + for (; old != head;) { + __u64 *ptr = (__u64 *)&data[old & md->mask]; + old += sizeof(__u64); + + process_event(*ptr, md->counter); + } + + md->prev = old; +} + int main(int argc, char *argv[]) { struct pollfd event_array[MAX_NR_CPUS][MAX_COUNTERS]; + struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; struct perf_counter_hw_event hw_event; int i, counter, group_fd; unsigned int cpu; - uint64_t ip; - ssize_t res; int ret; + page_size = sysconf(_SC_PAGE_SIZE); + process_options(argc, argv); nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); @@ -1153,8 +1215,6 @@ int main(int argc, char *argv[]) hw_event.record_type = PERF_RECORD_IRQ; hw_event.nmi = nmi; - printf("FOO: %d %llx %llx\n", counter, event_id[counter], event_count[counter]); - fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0); fcntl(fd[i][counter], F_SETFL, O_NONBLOCK); if (fd[i][counter] < 0) { @@ -1174,6 +1234,17 @@ int main(int argc, char *argv[]) event_array[i][counter].fd = fd[i][counter]; event_array[i][counter].events = POLLIN; + + mmap_array[i][counter].counter = counter; + mmap_array[i][counter].prev = 0; + mmap_array[i][counter].mask = 2*page_size - 1; + mmap_array[i][counter].base = mmap(NULL, 3*page_size, + PROT_READ, MAP_SHARED, fd[i][counter], 0); + if (mmap_array[i][counter].base == MAP_FAILED) { + printf("kerneltop error: failed to mmap with %d (%s)\n", + errno, strerror(errno)); + exit(-1); + } } } @@ -1188,14 +1259,8 @@ int main(int argc, char *argv[]) int hits = events; for (i = 0; i < nr_cpus; i++) { - for (counter = 0; counter < nr_counters; counter++) { - res = read(fd[i][counter], (char *) &ip, sizeof(ip)); - if (res > 0) { - assert(res == sizeof(ip)); - - process_event(ip, counter); - } - } + for (counter = 0; counter < nr_counters; counter++) + mmap_read(&mmap_array[i][counter]); } if (time(NULL) >= last_refresh + delay_secs) { -- cgit v0.10.2 From 383c5f8cd7d253be0017d8d5a39cbb78aaf70206 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 23 Mar 2009 21:49:25 +0100 Subject: perf_counter tools: tidy up in-kernel dependencies Remove now unified perfstat.c and perf_counter.h, and link to the in-kernel perf_counter.h. Cc: Wu Fengguang Cc: Paul Mackerras Acked-by: Peter Zijlstra Orig-LKML-Reference: <20090323172417.677932499@chello.nl> Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile index b457497..666da95 100644 --- a/Documentation/perf_counter/Makefile +++ b/Documentation/perf_counter/Makefile @@ -2,7 +2,7 @@ BINS = kerneltop perfstat all: $(BINS) -kerneltop: kerneltop.c perfcounters.h +kerneltop: kerneltop.c ../../include/linux/perf_counter.h cc -O6 -Wall -lrt `pkg-config --cflags --libs glib-2.0` -o $@ $< perfstat: kerneltop diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c index 80b7905..25e80bc 100644 --- a/Documentation/perf_counter/kerneltop.c +++ b/Documentation/perf_counter/kerneltop.c @@ -88,7 +88,7 @@ #include -#include "include/linux/perf_counter.h" +#include "../../include/linux/perf_counter.h" /* diff --git a/Documentation/perf_counter/perfcounters.h b/Documentation/perf_counter/perfcounters.h deleted file mode 100644 index 32e24b9..0000000 --- a/Documentation/perf_counter/perfcounters.h +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Ioctls that can be done on a perf counter fd: - */ -#define PERF_COUNTER_IOC_ENABLE _IO('$', 0) -#define PERF_COUNTER_IOC_DISABLE _IO('$', 1) - -/* - * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all - * counters in the current task. - */ -#define PR_TASK_PERF_COUNTERS_DISABLE 31 -#define PR_TASK_PERF_COUNTERS_ENABLE 32 - -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) - -#define rdclock() \ -({ \ - struct timespec ts; \ - \ - clock_gettime(CLOCK_MONOTONIC, &ts); \ - ts.tv_sec * 1000000000ULL + ts.tv_nsec; \ -}) - -/* - * Pick up some kernel type conventions: - */ -#define __user -#define asmlinkage - -typedef unsigned int __u32; -typedef unsigned long long __u64; -typedef long long __s64; - -/* - * User-space ABI bits: - */ - -/* - * Generalized performance counter event types, used by the hw_event.type - * parameter of the sys_perf_counter_open() syscall: - */ -enum hw_event_types { - /* - * Common hardware events, generalized by the kernel: - */ - PERF_COUNT_CPU_CYCLES = 0, - PERF_COUNT_INSTRUCTIONS = 1, - PERF_COUNT_CACHE_REFERENCES = 2, - PERF_COUNT_CACHE_MISSES = 3, - PERF_COUNT_BRANCH_INSTRUCTIONS = 4, - PERF_COUNT_BRANCH_MISSES = 5, - PERF_COUNT_BUS_CYCLES = 6, - - PERF_HW_EVENTS_MAX = 7, - - /* - * Special "software" counters provided by the kernel, even if - * the hardware does not support performance counters. These - * counters measure various physical and sw events of the - * kernel (and allow the profiling of them as well): - */ - PERF_COUNT_CPU_CLOCK = -1, - PERF_COUNT_TASK_CLOCK = -2, - PERF_COUNT_PAGE_FAULTS = -3, - PERF_COUNT_CONTEXT_SWITCHES = -4, - PERF_COUNT_CPU_MIGRATIONS = -5, - - PERF_SW_EVENTS_MIN = -6, -}; - -/* - * IRQ-notification data record type: - */ -enum perf_counter_record_type { - PERF_RECORD_SIMPLE = 0, - PERF_RECORD_IRQ = 1, - PERF_RECORD_GROUP = 2, -}; - -/* - * Hardware event to monitor via a performance monitoring counter: - */ -struct perf_counter_hw_event { - __s64 type; - - __u64 irq_period; - __u64 record_type; - __u64 read_format; - - __u64 disabled : 1, /* off by default */ - nmi : 1, /* NMI sampling */ - raw : 1, /* raw event type */ - inherit : 1, /* children inherit it */ - pinned : 1, /* must always be on PMU */ - exclusive : 1, /* only group on PMU */ - exclude_user : 1, /* don't count user */ - exclude_kernel : 1, /* ditto kernel */ - exclude_hv : 1, /* ditto hypervisor */ - exclude_idle : 1, /* don't count when idle */ - - __reserved_1 : 54; - - __u32 extra_config_len; - __u32 __reserved_4; - - __u64 __reserved_2; - __u64 __reserved_3; -}; - - -#ifdef __x86_64__ -# define __NR_perf_counter_open 295 -#endif - -#ifdef __i386__ -# define __NR_perf_counter_open 333 -#endif - -#ifdef __powerpc__ -#define __NR_perf_counter_open 319 -#endif - -asmlinkage int sys_perf_counter_open( - - struct perf_counter_hw_event *hw_event_uptr __user, - pid_t pid, - int cpu, - int group_fd, - unsigned long flags) -{ - int ret; - - ret = syscall( - __NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags); -#if defined(__x86_64__) || defined(__i386__) - if (ret < 0 && ret > -4096) { - errno = -ret; - ret = -1; - } -#endif - return ret; -} diff --git a/Documentation/perf_counter/perfstat.c b/Documentation/perf_counter/perfstat.c deleted file mode 100644 index fd59446..0000000 --- a/Documentation/perf_counter/perfstat.c +++ /dev/null @@ -1,251 +0,0 @@ -/* - * perfstat: /usr/bin/time -alike performance counter statistics utility - * - * It summarizes the counter events of all tasks (and child tasks), - * covering all CPUs that the command (or workload) executes on. - * It only counts the per-task events of the workload started, - * independent of how many other tasks run on those CPUs. - * - * Build with: cc -O2 -g -lrt -Wall -W -o perfstat perfstat.c - * - * Sample output: - * - - $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null - - Performance counter stats for 'ls': - - 163516953 instructions - 2295 cache-misses - 2855182 branch-misses - - * - * Copyright (C) 2008, Red Hat Inc, Ingo Molnar - * - * Released under the GPLv2 (not later). - * - * Percpu counter support by: Yanmin Zhang - * Symbolic event options by: Wu Fengguang - */ -#define _GNU_SOURCE - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "perfcounters.h" - -static int nr_cpus = 0; - -static int system_wide = 0; - -static void display_help(void) -{ - unsigned int i; - int e; - - printf( - "Usage: perfstat [] \n\n" - "PerfStat Options (up to %d event types can be specified):\n\n", - MAX_COUNTERS); - printf( - " -e EVENT --event=EVENT # symbolic-name abbreviations"); - - for (i = 0, e = PERF_HW_EVENTS_MAX; i < ARRAY_SIZE(event_symbols); i++) { - if (e != event_symbols[i].event) { - e = event_symbols[i].event; - printf( - "\n %2d: %-20s", e, event_symbols[i].symbol); - } else - printf(" %s", event_symbols[i].symbol); - } - - printf("\n" - " rNNN: raw event type\n\n" - " -s # system-wide collection\n\n" - " -c --command= # command+arguments to be timed.\n" - "\n"); - exit(0); -} - -static void process_options(int argc, char *argv[]) -{ - for (;;) { - int option_index = 0; - /** Options for getopt */ - static struct option long_options[] = { - {"event", required_argument, NULL, 'e'}, - {"help", no_argument, NULL, 'h'}, - {"command", no_argument, NULL, 'c'}, - {NULL, 0, NULL, 0 } - }; - int c = getopt_long(argc, argv, "+:e:c:s", - long_options, &option_index); - if (c == -1) - break; - - switch (c) { - case 'c': - break; - case 's': - system_wide = 1; - break; - case 'e': - parse_events(optarg); - break; - default: - break; - } - } - if (optind == argc) - goto err; - - if (!nr_counters) - nr_counters = 8; - return; - -err: - display_help(); -} - -char fault_here[1000000]; - -static int fd[MAX_NR_CPUS][MAX_COUNTERS]; - -static void create_counter(int counter) -{ - struct perf_counter_hw_event hw_event; - - memset(&hw_event, 0, sizeof(hw_event)); - hw_event.type = event_id[counter]; - hw_event.raw = event_raw[counter]; - hw_event.record_type = PERF_RECORD_SIMPLE; - hw_event.nmi = 0; - - if (system_wide) { - int cpu; - for (cpu = 0; cpu < nr_cpus; cpu ++) { - fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0); - if (fd[cpu][counter] < 0) { - printf("perfstat error: syscall returned with %d (%s)\n", - fd[cpu][counter], strerror(errno)); - exit(-1); - } - - } - } else { - hw_event.inherit = 1; - hw_event.disabled = 1; - - fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0); - if (fd[0][counter] < 0) { - printf("perfstat error: syscall returned with %d (%s)\n", - fd[0][counter], strerror(errno)); - exit(-1); - } - } -} - - -int main(int argc, char *argv[]) -{ - unsigned long long t0, t1; - int counter; - ssize_t res; - int status; - int pid; - - process_options(argc, argv); - - if (system_wide) { - nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); - assert(nr_cpus <= MAX_NR_CPUS); - assert(nr_cpus >= 0); - } else - nr_cpus = 1; - - for (counter = 0; counter < nr_counters; counter++) - create_counter(counter); - - argc -= optind; - argv += optind; - - /* - * Enable counters and exec the command: - */ - t0 = rdclock(); - prctl(PR_TASK_PERF_COUNTERS_ENABLE); - - if ((pid = fork()) < 0) - perror("failed to fork"); - if (!pid) { - if (execvp(argv[0], argv)) { - perror(argv[0]); - exit(-1); - } - } - while (wait(&status) >= 0) - ; - prctl(PR_TASK_PERF_COUNTERS_DISABLE); - t1 = rdclock(); - - fflush(stdout); - - fprintf(stderr, "\n"); - fprintf(stderr, " Performance counter stats for \'%s\':\n", - argv[0]); - fprintf(stderr, "\n"); - - for (counter = 0; counter < nr_counters; counter++) { - int cpu; - __u64 count, single_count; - - count = 0; - for (cpu = 0; cpu < nr_cpus; cpu ++) { - res = read(fd[cpu][counter], - (char *) &single_count, sizeof(single_count)); - assert(res == sizeof(single_count)); - count += single_count; - } - - if (!event_raw[counter] && - (event_id[counter] == PERF_COUNT_CPU_CLOCK || - event_id[counter] == PERF_COUNT_TASK_CLOCK)) { - - double msecs = (double)count / 1000000; - - fprintf(stderr, " %14.6f %-20s (msecs)\n", - msecs, event_name(counter)); - } else { - fprintf(stderr, " %14Ld %-20s (events)\n", - count, event_name(counter)); - } - if (!counter) - fprintf(stderr, "\n"); - } - fprintf(stderr, "\n"); - fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n", - (double)(t1-t0)/1e6); - fprintf(stderr, "\n"); - - return 0; -} -- cgit v0.10.2 From 193e8df1b465f206f8a286202680702e1c153367 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 23 Mar 2009 22:23:16 +0100 Subject: perf_counter tools: fix build warning in kerneltop.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix: kerneltop.c: In function ‘record_ip’: kerneltop.c:1005: warning: format ‘%016llx’ expects type ‘long long unsigned int’, but argument 2 has type ‘uint64_t’ Cc: Wu Fengguang Cc: Paul Mackerras Acked-by: Peter Zijlstra Orig-LKML-Reference: <20090323172417.677932499@chello.nl> Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c index 25e80bc..8f9a303 100644 --- a/Documentation/perf_counter/kerneltop.c +++ b/Documentation/perf_counter/kerneltop.c @@ -1002,7 +1002,7 @@ static void record_ip(uint64_t ip, int counter) assert(left <= middle && middle <= right); if (!(left <= ip && ip <= right)) { printf(" left: %016lx\n", left); - printf(" ip: %016llx\n", ip); + printf(" ip: %016lx\n", (unsigned long)ip); printf("right: %016lx\n", right); } assert(left <= ip && ip <= right); -- cgit v0.10.2 From 81cdbe0509542324ad7d3282ab67c2b6716df663 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 23 Mar 2009 22:29:50 +0100 Subject: perf_counter tools: increase cpu-cycles again Commit b7368fdd7d decreased the CPU cycles interval 100-fold, but this is causig kerneltop failures on my Nehalem box: aldebaran:/home/mingo/linux/linux/Documentation/perf_counter> ./kerneltop KernelTop refresh period: 2 seconds ERROR: failed to keep up with mmap data 10,000 cycles is way too short. What we should do instead on mostly-idle systems is some sort of read/poll timeout, so that we display something every 2 seconds for sure. Cc: Wu Fengguang Cc: Peter Zijlstra Cc: Paul Mackerras Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c index 8f9a303..2ab29b5 100644 --- a/Documentation/perf_counter/kerneltop.c +++ b/Documentation/perf_counter/kerneltop.c @@ -212,7 +212,7 @@ struct source_line { const unsigned int default_count[] = { - 10000, + 1000000, 1000000, 10000, 10000, -- cgit v0.10.2 From cbe46555dc4de6403cd757139d42289b5f21abb9 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 24 Mar 2009 16:52:34 +1100 Subject: perf_counter tools: remove glib dependency and fix bugs in kerneltop.c The glib dependency in kerneltop.c is only for a little bit of list manipulation, and I find it inconvenient. This adds a 'next' field to struct source_line, which lets us link them together into a list. The code to do the linking ourselves turns out to be no longer or more difficult than using glib. This also fixes a few other problems: - We need to #include to get PATH_MAX on powerpc. - We need to #include rather than have our own definitions of __u64 and __s64; on powerpc the installed headers define them to be unsigned long and long respectively, and if we have our own, different definition here that causes a compile error. - This takes out the x86 setting of errno from -ret in sys_perf_counter_open. My experiments on x86 indicate that the glibc syscall() does this for us already. - We had two CPU migration counters in the default set, which seems unnecessary; I changed one of them to a context switch counter. - In perfstat mode we were printing CPU cycles and instructions as milliseconds, and the cpu clock and task clock counters as events. This fixes that. - In perfstat mode we were still printing a blank line after the first counter, which was a holdover from when a task clock counter was automatically included as the first counter. This removes the blank line. - On a test machine here, parse_symbols() and parse_vmlinux() were taking long enough (almost 0.5 seconds) for the mmap buffer to overflow before we got to the first mmap_read() call, so this moves them before we open all the counters. - The error message if sys_perf_counter_open fails needs to use errno, not -fd[i][counter]. Signed-off-by: Paul Mackerras Acked-by: Peter Zijlstra Acked-by: Mike Galbraith Cc: Arjan van de Ven Orig-LKML-Reference: <18888.29986.340328.540512@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile index 666da95..194b662 100644 --- a/Documentation/perf_counter/Makefile +++ b/Documentation/perf_counter/Makefile @@ -3,7 +3,7 @@ BINS = kerneltop perfstat all: $(BINS) kerneltop: kerneltop.c ../../include/linux/perf_counter.h - cc -O6 -Wall -lrt `pkg-config --cflags --libs glib-2.0` -o $@ $< + cc -O6 -Wall -lrt -o $@ $< perfstat: kerneltop ln -sf kerneltop perfstat diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c index 2ab29b5..ea13e4e 100644 --- a/Documentation/perf_counter/kerneltop.c +++ b/Documentation/perf_counter/kerneltop.c @@ -3,7 +3,7 @@ Build with: - cc -O6 -Wall -lrt `pkg-config --cflags --libs glib-2.0` -o kerneltop kerneltop.c + cc -O6 -Wall -c -o kerneltop.o kerneltop.c -lrt Sample output: @@ -56,6 +56,7 @@ * Yanmin Zhang * Wu Fengguang * Mike Galbraith + * Paul Mackerras * * Released under the GPL v2. (and only v2, not any later version) */ @@ -68,6 +69,7 @@ #include #include #include +#include #include #include #include @@ -76,8 +78,6 @@ #include #include -#include - #include #include #include @@ -87,6 +87,7 @@ #include #include +#include #include "../../include/linux/perf_counter.h" @@ -114,11 +115,6 @@ #define __user #define asmlinkage -typedef unsigned int __u32; -typedef unsigned long long __u64; -typedef long long __s64; - - #ifdef __x86_64__ #define __NR_perf_counter_open 295 #define rmb() asm volatile("lfence" ::: "memory") @@ -146,17 +142,8 @@ asmlinkage int sys_perf_counter_open( int group_fd, unsigned long flags) { - int ret; - - ret = syscall( + return syscall( __NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags); -#if defined(__x86_64__) || defined(__i386__) - if (ret < 0 && ret > -4096) { - errno = -ret; - ret = -1; - } -#endif - return ret; } #define MAX_COUNTERS 64 @@ -170,7 +157,7 @@ static int system_wide = 0; static int nr_counters = 0; static __u64 event_id[MAX_COUNTERS] = { EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), - EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), + EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), @@ -202,14 +189,15 @@ static int delay_secs = 2; static int zero; static int dump_symtab; -static GList *lines; - struct source_line { uint64_t EIP; unsigned long count; char *line; + struct source_line *next; }; +static struct source_line *lines; +static struct source_line **lines_tail; const unsigned int default_count[] = { 1000000, @@ -519,9 +507,8 @@ int do_perfstat(int argc, char *argv[]) count += single_count; } - if (!PERF_COUNTER_RAW(event_id[counter]) && - (event_id[counter] == PERF_COUNT_CPU_CLOCK || - event_id[counter] == PERF_COUNT_TASK_CLOCK)) { + if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK) || + event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) { double msecs = (double)count / 1000000; @@ -531,8 +518,6 @@ int do_perfstat(int argc, char *argv[]) fprintf(stderr, " %14Ld %-20s (events)\n", count, event_name(counter)); } - if (!counter) - fprintf(stderr, "\n"); } fprintf(stderr, "\n"); fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n", @@ -554,7 +539,7 @@ struct sym_entry { char *sym; unsigned long count[MAX_COUNTERS]; int skip; - GList *source; + struct source_line *source; }; #define MAX_SYMS 100000 @@ -855,6 +840,7 @@ static void parse_vmlinux(char *filename) if (!file) return; + lines_tail = &lines; while (!feof(file)) { struct source_line *src; size_t dummy = 0; @@ -873,7 +859,9 @@ static void parse_vmlinux(char *filename) if (c) *c = 0; - lines = g_list_prepend(lines, src); + src->next = NULL; + *lines_tail = src; + lines_tail = &src->next; if (strlen(src->line)>8 && src->line[8] == ':') src->EIP = strtoull(src->line, NULL, 16); @@ -881,52 +869,43 @@ static void parse_vmlinux(char *filename) src->EIP = strtoull(src->line, NULL, 16); } pclose(file); - lines = g_list_reverse(lines); } static void record_precise_ip(uint64_t ip) { struct source_line *line; - GList *item; - item = g_list_first(lines); - while (item) { - line = item->data; + for (line = lines; line; line = line->next) { if (line->EIP == ip) line->count++; if (line->EIP > ip) break; - item = g_list_next(item); } } static void lookup_sym_in_vmlinux(struct sym_entry *sym) { struct source_line *line; - GList *item; char pattern[PATH_MAX]; sprintf(pattern, "<%s>:", sym->sym); - item = g_list_first(lines); - while (item) { - line = item->data; + for (line = lines; line; line = line->next) { if (strstr(line->line, pattern)) { - sym->source = item; + sym->source = line; break; } - item = g_list_next(item); } } -void show_lines(GList *item_queue, int item_queue_count) +static void show_lines(struct source_line *line_queue, int line_queue_count) { int i; struct source_line *line; - for (i = 0; i < item_queue_count; i++) { - line = item_queue->data; + line = line_queue; + for (i = 0; i < line_queue_count; i++) { printf("%8li\t%s\n", line->count, line->line); - item_queue = g_list_next(item_queue); + line = line->next; } } @@ -935,10 +914,9 @@ void show_lines(GList *item_queue, int item_queue_count) static void show_details(struct sym_entry *sym) { struct source_line *line; - GList *item; + struct source_line *line_queue = NULL; int displayed = 0; - GList *item_queue = NULL; - int item_queue_count = 0; + int line_queue_count = 0; if (!sym->source) lookup_sym_in_vmlinux(sym); @@ -947,30 +925,29 @@ static void show_details(struct sym_entry *sym) printf("Showing details for %s\n", sym->sym); - item = sym->source; - while (item) { - line = item->data; + line = sym->source; + while (line) { if (displayed && strstr(line->line, ">:")) break; - if (!item_queue_count) - item_queue = item; - item_queue_count ++; + if (!line_queue_count) + line_queue = line; + line_queue_count ++; if (line->count >= count_filter) { - show_lines(item_queue, item_queue_count); - item_queue_count = 0; - item_queue = NULL; - } else if (item_queue_count > TRACE_COUNT) { - item_queue = g_list_next(item_queue); - item_queue_count --; + show_lines(line_queue, line_queue_count); + line_queue_count = 0; + line_queue = NULL; + } else if (line_queue_count > TRACE_COUNT) { + line_queue = line_queue->next; + line_queue_count --; } line->count = 0; displayed++; if (displayed > 300) break; - item = g_list_next(item); + line = line->next; } } @@ -1201,6 +1178,10 @@ int main(int argc, char *argv[]) if (tid != -1 || profile_cpu != -1) nr_cpus = 1; + parse_symbols(); + if (vmlinux && sym_filter_entry) + parse_vmlinux(vmlinux); + for (i = 0; i < nr_cpus; i++) { group_fd = -1; for (counter = 0; counter < nr_counters; counter++) { @@ -1216,15 +1197,16 @@ int main(int argc, char *argv[]) hw_event.nmi = nmi; fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0); - fcntl(fd[i][counter], F_SETFL, O_NONBLOCK); if (fd[i][counter] < 0) { + int err = errno; printf("kerneltop error: syscall returned with %d (%s)\n", - fd[i][counter], strerror(-fd[i][counter])); - if (fd[i][counter] == -1) + fd[i][counter], strerror(err)); + if (err == EPERM) printf("Are you root?\n"); exit(-1); } assert(fd[i][counter] >= 0); + fcntl(fd[i][counter], F_SETFL, O_NONBLOCK); /* * First counter acts as the group leader: @@ -1248,10 +1230,6 @@ int main(int argc, char *argv[]) } } - parse_symbols(); - if (vmlinux && sym_filter_entry) - parse_vmlinux(vmlinux); - printf("KernelTop refresh period: %d seconds\n", delay_secs); last_refresh = time(NULL); -- cgit v0.10.2 From 0fd112e41cd6f6d4779cbe327c3632d087e31476 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 24 Mar 2009 10:50:24 +0100 Subject: perf_counter tools: remove glib dependency and fix bugs in kerneltop.c, fix poll() Paul Mackerras wrote: > I noticed the poll stuff is bogus - we have a 2D array of struct > pollfds (MAX_NR_CPUS x MAX_COUNTERS), we fill in a sub-array (with the > rest being uninitialized, since the array is on the stack) and then > pass the first nr_cpus elements to poll. Not what we really meant, I > suspect. :) Not even if we only have one counter, since it's the > counter dimension that varies fastest. This should fix the most obvious poll fubar.. not enough to fix the full problem though.. Reported-by: Paul Mackerras Reported-by: Mike Galbraith Signed-off-by: Peter Zijlstra Cc: Arjan van de Ven Orig-LKML-Reference: <18888.29986.340328.540512@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c index ea13e4e..7ebde7a 100644 --- a/Documentation/perf_counter/kerneltop.c +++ b/Documentation/perf_counter/kerneltop.c @@ -1157,10 +1157,10 @@ static void mmap_read(struct mmap_data *md) int main(int argc, char *argv[]) { - struct pollfd event_array[MAX_NR_CPUS][MAX_COUNTERS]; + struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; struct perf_counter_hw_event hw_event; - int i, counter, group_fd; + int i, counter, group_fd, nr_poll = 0; unsigned int cpu; int ret; @@ -1214,8 +1214,9 @@ int main(int argc, char *argv[]) if (group && group_fd == -1) group_fd = fd[i][counter]; - event_array[i][counter].fd = fd[i][counter]; - event_array[i][counter].events = POLLIN; + event_array[nr_poll].fd = fd[i][counter]; + event_array[nr_poll].events = POLLIN; + nr_poll++; mmap_array[i][counter].counter = counter; mmap_array[i][counter].prev = 0; @@ -1247,7 +1248,7 @@ int main(int argc, char *argv[]) } if (hits == events) - ret = poll(event_array[0], nr_cpus, 1000); + ret = poll(event_array, nr_poll, 1000); hits = events; } -- cgit v0.10.2 From f66c6b2066b44d4ab8e8ac1ee4cae543738fe2ac Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 23 Mar 2009 10:29:36 +1100 Subject: perf_counter: update documentation Impact: documentation fix This updates the perfcounter documentation to reflect recent changes. Signed-off-by: Paul Mackerras diff --git a/Documentation/perf_counter/design.txt b/Documentation/perf_counter/design.txt index fddd321..aaf105c 100644 --- a/Documentation/perf_counter/design.txt +++ b/Documentation/perf_counter/design.txt @@ -11,7 +11,9 @@ thus be used to profile the code that runs on that CPU. The Linux Performance Counter subsystem provides an abstraction of these hardware capabilities. It provides per task and per CPU counters, counter -groups, and it provides event capabilities on top of those. +groups, and it provides event capabilities on top of those. It +provides "virtual" 64-bit counters, regardless of the width of the +underlying hardware counters. Performance counters are accessed via special file descriptors. There's one file descriptor per virtual counter used. @@ -20,7 +22,8 @@ The special file descriptor is opened via the perf_counter_open() system call: int sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr, - pid_t pid, int cpu, int group_fd); + pid_t pid, int cpu, int group_fd, + unsigned long flags); The syscall returns the new fd. The fd can be used via the normal VFS system calls: read() can be used to read the counter, fcntl() @@ -32,90 +35,180 @@ can be poll()ed. When creating a new counter fd, 'perf_counter_hw_event' is: /* - * Hardware event to monitor via a performance monitoring counter: + * Event to monitor via a performance monitoring counter: */ struct perf_counter_hw_event { - s64 type; + __u64 event_config; - u64 irq_period; - u32 record_type; + __u64 irq_period; + __u64 record_type; + __u64 read_format; - u32 disabled : 1, /* off by default */ - nmi : 1, /* NMI sampling */ - raw : 1, /* raw event type */ - __reserved_1 : 29; + __u64 disabled : 1, /* off by default */ + nmi : 1, /* NMI sampling */ + inherit : 1, /* children inherit it */ + pinned : 1, /* must always be on PMU */ + exclusive : 1, /* only group on PMU */ + exclude_user : 1, /* don't count user */ + exclude_kernel : 1, /* ditto kernel */ + exclude_hv : 1, /* ditto hypervisor */ + exclude_idle : 1, /* don't count when idle */ - u64 __reserved_2; + __reserved_1 : 55; + + __u32 extra_config_len; + + __u32 __reserved_4; + __u64 __reserved_2; + __u64 __reserved_3; }; +The 'event_config' field specifies what the counter should count. It +is divided into 3 bit-fields: + +raw_type: 1 bit (most significant bit) 0x8000_0000_0000_0000 +type: 7 bits (next most significant) 0x7f00_0000_0000_0000 +event_id: 56 bits (least significant) 0x00ff_0000_0000_0000 + +If 'raw_type' is 1, then the counter will count a hardware event +specified by the remaining 63 bits of event_config. The encoding is +machine-specific. + +If 'raw_type' is 0, then the 'type' field says what kind of counter +this is, with the following encoding: + +enum perf_event_types { + PERF_TYPE_HARDWARE = 0, + PERF_TYPE_SOFTWARE = 1, + PERF_TYPE_TRACEPOINT = 2, +}; + +A counter of PERF_TYPE_HARDWARE will count the hardware event +specified by 'event_id': + /* - * Generalized performance counter event types, used by the hw_event.type + * Generalized performance counter event types, used by the hw_event.event_id * parameter of the sys_perf_counter_open() syscall: */ -enum hw_event_types { +enum hw_event_ids { /* * Common hardware events, generalized by the kernel: */ - PERF_COUNT_CYCLES = 0, - PERF_COUNT_INSTRUCTIONS = 1, - PERF_COUNT_CACHE_REFERENCES = 2, - PERF_COUNT_CACHE_MISSES = 3, - PERF_COUNT_BRANCH_INSTRUCTIONS = 4, - PERF_COUNT_BRANCH_MISSES = 5, - - /* - * Special "software" counters provided by the kernel, even if - * the hardware does not support performance counters. These - * counters measure various physical and sw events of the - * kernel (and allow the profiling of them as well): - */ - PERF_COUNT_CPU_CLOCK = -1, - PERF_COUNT_TASK_CLOCK = -2, - /* - * Future software events: - */ - /* PERF_COUNT_PAGE_FAULTS = -3, - PERF_COUNT_CONTEXT_SWITCHES = -4, */ + PERF_COUNT_CPU_CYCLES = 0, + PERF_COUNT_INSTRUCTIONS = 1, + PERF_COUNT_CACHE_REFERENCES = 2, + PERF_COUNT_CACHE_MISSES = 3, + PERF_COUNT_BRANCH_INSTRUCTIONS = 4, + PERF_COUNT_BRANCH_MISSES = 5, + PERF_COUNT_BUS_CYCLES = 6, }; -These are standardized types of events that work uniformly on all CPUs -that implements Performance Counters support under Linux. If a CPU is -not able to count branch-misses, then the system call will return --EINVAL. +These are standardized types of events that work relatively uniformly +on all CPUs that implement Performance Counters support under Linux, +although there may be variations (e.g., different CPUs might count +cache references and misses at different levels of the cache hierarchy). +If a CPU is not able to count the selected event, then the system call +will return -EINVAL. -More hw_event_types are supported as well, but they are CPU -specific and are enumerated via /sys on a per CPU basis. Raw hw event -types can be passed in under hw_event.type if hw_event.raw is 1. -For example, to count "External bus cycles while bus lock signal asserted" -events on Intel Core CPUs, pass in a 0x4064 event type value and set -hw_event.raw to 1. +More hw_event_types are supported as well, but they are CPU-specific +and accessed as raw events. For example, to count "External bus +cycles while bus lock signal asserted" events on Intel Core CPUs, pass +in a 0x4064 event_id value and set hw_event.raw_type to 1. -'record_type' is the type of data that a read() will provide for the -counter, and it can be one of: +A counter of type PERF_TYPE_SOFTWARE will count one of the available +software events, selected by 'event_id': /* - * IRQ-notification data record type: + * Special "software" counters provided by the kernel, even if the hardware + * does not support performance counters. These counters measure various + * physical and sw events of the kernel (and allow the profiling of them as + * well): */ -enum perf_counter_record_type { - PERF_RECORD_SIMPLE = 0, - PERF_RECORD_IRQ = 1, - PERF_RECORD_GROUP = 2, +enum sw_event_ids { + PERF_COUNT_CPU_CLOCK = 0, + PERF_COUNT_TASK_CLOCK = 1, + PERF_COUNT_PAGE_FAULTS = 2, + PERF_COUNT_CONTEXT_SWITCHES = 3, + PERF_COUNT_CPU_MIGRATIONS = 4, + PERF_COUNT_PAGE_FAULTS_MIN = 5, + PERF_COUNT_PAGE_FAULTS_MAJ = 6, }; -a "simple" counter is one that counts hardware events and allows -them to be read out into a u64 count value. (read() returns 8 on -a successful read of a simple counter.) +Counters come in two flavours: counting counters and sampling +counters. A "counting" counter is one that is used for counting the +number of events that occur, and is characterised by having +irq_period = 0 and record_type = PERF_RECORD_SIMPLE. A read() on a +counting counter simply returns the current value of the counter as +an 8-byte number. -An "irq" counter is one that will also provide an IRQ context information: -the IP of the interrupted context. In this case read() will return -the 8-byte counter value, plus the Instruction Pointer address of the -interrupted context. +A "sampling" counter is one that is set up to generate an interrupt +every N events, where N is given by 'irq_period'. A sampling counter +has irq_period > 0 and record_type != PERF_RECORD_SIMPLE. The +record_type controls what data is recorded on each interrupt, and the +available values are currently: -The parameter 'hw_event_period' is the number of events before waking up -a read() that is blocked on a counter fd. Zero value means a non-blocking -counter. +/* + * IRQ-notification data record type: + */ +enum perf_counter_record_type { + PERF_RECORD_SIMPLE = 0, + PERF_RECORD_IRQ = 1, + PERF_RECORD_GROUP = 2, +}; -The 'pid' parameter allows the counter to be specific to a task: +A record_type value of PERF_RECORD_IRQ will record the instruction +pointer (IP) at which the interrupt occurred. A record_type value of +PERF_RECORD_GROUP will record the event_config and counter value of +all of the other counters in the group, and should only be used on a +group leader (see below). Currently these two values are mutually +exclusive, but record_type will become a bit-mask in future and +support other values. + +A sampling counter has an event queue, into which an event is placed +on each interrupt. A read() on a sampling counter will read the next +event from the event queue. If the queue is empty, the read() will +either block or return an EAGAIN error, depending on whether the fd +has been set to non-blocking mode or not. + +The 'disabled' bit specifies whether the counter starts out disabled +or enabled. If it is initially disabled, it can be enabled by ioctl +or prctl (see below). + +The 'nmi' bit specifies, for hardware events, whether the counter +should be set up to request non-maskable interrupts (NMIs) or normal +interrupts. This bit is ignored if the user doesn't have +CAP_SYS_ADMIN privilege (i.e. is not root) or if the CPU doesn't +generate NMIs from hardware counters. + +The 'inherit' bit, if set, specifies that this counter should count +events on descendant tasks as well as the task specified. This only +applies to new descendents, not to any existing descendents at the +time the counter is created (nor to any new descendents of existing +descendents). + +The 'pinned' bit, if set, specifies that the counter should always be +on the CPU if at all possible. It only applies to hardware counters +and only to group leaders. If a pinned counter cannot be put onto the +CPU (e.g. because there are not enough hardware counters or because of +a conflict with some other event), then the counter goes into an +'error' state, where reads return end-of-file (i.e. read() returns 0) +until the counter is subsequently enabled or disabled. + +The 'exclusive' bit, if set, specifies that when this counter's group +is on the CPU, it should be the only group using the CPU's counters. +In future, this will allow sophisticated monitoring programs to supply +extra configuration information via 'extra_config_len' to exploit +advanced features of the CPU's Performance Monitor Unit (PMU) that are +not otherwise accessible and that might disrupt other hardware +counters. + +The 'exclude_user', 'exclude_kernel' and 'exclude_hv' bits provide a +way to request that counting of events be restricted to times when the +CPU is in user, kernel and/or hypervisor mode. + + +The 'pid' parameter to the perf_counter_open() system call allows the +counter to be specific to a task: pid == 0: if the pid parameter is zero, the counter is attached to the current task. @@ -125,8 +218,7 @@ The 'pid' parameter allows the counter to be specific to a task: pid < 0: all tasks are counted (per cpu counters) -The 'cpu' parameter allows a counter to be made specific to a full -CPU: +The 'cpu' parameter allows a counter to be made specific to a CPU: cpu >= 0: the counter is restricted to a specific CPU cpu == -1: the counter counts on all CPUs @@ -141,7 +233,51 @@ their own tasks. A 'pid == -1' and 'cpu == x' counter is a per CPU counter that counts all events on CPU-x. Per CPU counters need CAP_SYS_ADMIN privilege. -Group counters are created by passing in a group_fd of another counter. -Groups are scheduled at once and can be used with PERF_RECORD_GROUP -to record multi-dimensional timestamps. +The 'flags' parameter is currently unused and must be zero. + +The 'group_fd' parameter allows counter "groups" to be set up. A +counter group has one counter which is the group "leader". The leader +is created first, with group_fd = -1 in the perf_counter_open call +that creates it. The rest of the group members are created +subsequently, with group_fd giving the fd of the group leader. +(A single counter on its own is created with group_fd = -1 and is +considered to be a group with only 1 member.) + +A counter group is scheduled onto the CPU as a unit, that is, it will +only be put onto the CPU if all of the counters in the group can be +put onto the CPU. This means that the values of the member counters +can be meaningfully compared, added, divided (to get ratios), etc., +with each other, since they have counted events for the same set of +executed instructions. + +Counters can be enabled and disabled in two ways: via ioctl and via +prctl. When a counter is disabled, it doesn't count or generate +events but does continue to exist and maintain its count value. + +An individual counter or counter group can be enabled with + + ioctl(fd, PERF_COUNTER_IOC_ENABLE); + +or disabled with + + ioctl(fd, PERF_COUNTER_IOC_DISABLE); + +Enabling or disabling the leader of a group enables or disables the +whole group; that is, while the group leader is disabled, none of the +counters in the group will count. Enabling or disabling a member of a +group other than the leader only affects that counter - disabling an +non-leader stops that counter from counting but doesn't affect any +other counter. + +A process can enable or disable all the counter groups that are +attached to it, using prctl: + + prctl(PR_TASK_PERF_COUNTERS_ENABLE); + + prctl(PR_TASK_PERF_COUNTERS_DISABLE); + +This applies to all counters on the current process, whether created +by this process or by another, and doesn't affect any counters that +this process has created on other processes. It only enables or +disables the group leaders, not any other members in the groups. -- cgit v0.10.2 From c7138f37f905bb7987b1f9f5a8ee73667db39f25 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 24 Mar 2009 13:18:16 +0100 Subject: perf_counter: fix perf_poll() Impact: fix kerneltop 100% CPU usage Only return a poll event when there's actually been one, poll_wait() doesn't actually wait for the waitq you pass it, it only enqueues you on it. Only once all FDs have been iterated and none of thm returned a poll-event will it schedule(). Also make it return POLL_HUP when there's not mmap() area to read from. Further, fix a silly bug in the write code. Reported-by: Mike Galbraith Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Arjan van de Ven Orig-LKML-Reference: <1237897096.24918.181.camel@twins> Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 2b5e66d..48212c1 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -246,6 +246,7 @@ struct file; struct perf_mmap_data { struct rcu_head rcu_head; int nr_pages; + atomic_t wakeup; atomic_t head; struct perf_counter_mmap_page *user_page; void *data_pages[0]; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 0dfe910..affe227 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1161,7 +1161,16 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) static unsigned int perf_poll(struct file *file, poll_table *wait) { struct perf_counter *counter = file->private_data; - unsigned int events = POLLIN; + struct perf_mmap_data *data; + unsigned int events; + + rcu_read_lock(); + data = rcu_dereference(counter->data); + if (data) + events = atomic_xchg(&data->wakeup, 0); + else + events = POLL_HUP; + rcu_read_unlock(); poll_wait(file, &counter->waitq, wait); @@ -1425,7 +1434,7 @@ static int perf_output_write(struct perf_counter *counter, int nmi, do { offset = head = atomic_read(&data->head); - head += sizeof(u64); + head += size; } while (atomic_cmpxchg(&data->head, offset, head) != offset); wakeup = (offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT); @@ -1446,6 +1455,7 @@ static int perf_output_write(struct perf_counter *counter, int nmi, * generate a poll() wakeup for every page boundary crossed */ if (wakeup) { + atomic_xchg(&data->wakeup, POLL_IN); __perf_counter_update_userpage(counter, data); if (nmi) { counter->wakeup_pending = 1; -- cgit v0.10.2 From b9cacc7bf193df16532bfa7d7ca77fe50fc3c2e6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 25 Mar 2009 12:30:22 +0100 Subject: perf_counter: more elaborate write API Provide a begin, copy, end interface to the output buffer. begin() reserves the space, copy() copies the data over, considering page boundaries, end() finalizes the event and does the wakeup. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Mike Galbraith Cc: Arjan van de Ven Cc: Wu Fengguang Orig-LKML-Reference: <20090325113316.740550870@chello.nl> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index affe227..0422fd9 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -9,6 +9,7 @@ */ #include +#include #include #include #include @@ -16,15 +17,14 @@ #include #include #include +#include +#include +#include #include #include #include #include #include -#include -#include -#include -#include #include @@ -1411,16 +1411,20 @@ static const struct file_operations perf_fops = { * Output */ -static int perf_output_write(struct perf_counter *counter, int nmi, - void *buf, ssize_t size) +struct perf_output_handle { + struct perf_counter *counter; + struct perf_mmap_data *data; + unsigned int offset; + int wakeup; +}; + +static int perf_output_begin(struct perf_output_handle *handle, + struct perf_counter *counter, unsigned int size) { struct perf_mmap_data *data; - unsigned int offset, head, nr; - unsigned int len; - int ret, wakeup; + unsigned int offset, head; rcu_read_lock(); - ret = -ENOSPC; data = rcu_dereference(counter->data); if (!data) goto out; @@ -1428,45 +1432,82 @@ static int perf_output_write(struct perf_counter *counter, int nmi, if (!data->nr_pages) goto out; - ret = -EINVAL; - if (size > PAGE_SIZE) - goto out; - do { offset = head = atomic_read(&data->head); head += size; } while (atomic_cmpxchg(&data->head, offset, head) != offset); - wakeup = (offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT); + handle->counter = counter; + handle->data = data; + handle->offset = offset; + handle->wakeup = (offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT); - nr = (offset >> PAGE_SHIFT) & (data->nr_pages - 1); - offset &= PAGE_SIZE - 1; + return 0; - len = min_t(unsigned int, PAGE_SIZE - offset, size); - memcpy(data->data_pages[nr] + offset, buf, len); - size -= len; +out: + rcu_read_unlock(); - if (size) { - nr = (nr + 1) & (data->nr_pages - 1); - memcpy(data->data_pages[nr], buf + len, size); - } + return -ENOSPC; +} - /* - * generate a poll() wakeup for every page boundary crossed - */ - if (wakeup) { - atomic_xchg(&data->wakeup, POLL_IN); - __perf_counter_update_userpage(counter, data); +static void perf_output_copy(struct perf_output_handle *handle, + void *buf, unsigned int len) +{ + unsigned int pages_mask; + unsigned int offset; + unsigned int size; + void **pages; + + offset = handle->offset; + pages_mask = handle->data->nr_pages - 1; + pages = handle->data->data_pages; + + do { + unsigned int page_offset; + int nr; + + nr = (offset >> PAGE_SHIFT) & pages_mask; + page_offset = offset & (PAGE_SIZE - 1); + size = min_t(unsigned int, PAGE_SIZE - page_offset, len); + + memcpy(pages[nr] + page_offset, buf, size); + + len -= size; + buf += size; + offset += size; + } while (len); + + handle->offset = offset; +} + +static void perf_output_end(struct perf_output_handle *handle, int nmi) +{ + if (handle->wakeup) { + (void)atomic_xchg(&handle->data->wakeup, POLL_IN); + __perf_counter_update_userpage(handle->counter, handle->data); if (nmi) { - counter->wakeup_pending = 1; + handle->counter->wakeup_pending = 1; set_perf_counter_pending(); } else - wake_up(&counter->waitq); + wake_up(&handle->counter->waitq); } - ret = 0; -out: rcu_read_unlock(); +} + +static int perf_output_write(struct perf_counter *counter, int nmi, + void *buf, ssize_t size) +{ + struct perf_output_handle handle; + int ret; + ret = perf_output_begin(&handle, counter, size); + if (ret) + goto out; + + perf_output_copy(&handle, buf, size); + perf_output_end(&handle, nmi); + +out: return ret; } -- cgit v0.10.2 From 5c1481943250ab65fa5130e05ec479c93216e9f7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 25 Mar 2009 12:30:23 +0100 Subject: perf_counter: output objects Provide a {type,size} header for each output entry. This should provide extensible output, and the ability to mix multiple streams. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Mike Galbraith Cc: Arjan van de Ven Cc: Wu Fengguang Orig-LKML-Reference: <20090325113316.831607932@chello.nl> Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 48212c1..c256635 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -156,6 +156,16 @@ struct perf_counter_mmap_page { __u32 data_head; /* head in the data section */ }; +struct perf_event_header { + __u32 type; + __u32 size; +}; + +enum perf_event_type { + PERF_EVENT_IP = 0, + PERF_EVENT_GROUP = 1, +}; + #ifdef __KERNEL__ /* * Kernel-internal data types and definitions: @@ -260,6 +270,7 @@ struct perf_counter { struct list_head list_entry; struct list_head event_entry; struct list_head sibling_list; + int nr_siblings; struct perf_counter *group_leader; const struct hw_perf_counter_ops *hw_ops; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 0422fd9..d76e311 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -75,8 +75,10 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx) */ if (counter->group_leader == counter) list_add_tail(&counter->list_entry, &ctx->counter_list); - else + else { list_add_tail(&counter->list_entry, &group_leader->sibling_list); + group_leader->nr_siblings++; + } list_add_rcu(&counter->event_entry, &ctx->event_list); } @@ -89,6 +91,9 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx) list_del_init(&counter->list_entry); list_del_rcu(&counter->event_entry); + if (counter->group_leader != counter) + counter->group_leader->nr_siblings--; + /* * If this was a group counter with sibling counters then * upgrade the siblings to singleton counters by adding them @@ -381,9 +386,11 @@ static int is_software_only_group(struct perf_counter *leader) if (!is_software_counter(leader)) return 0; + list_for_each_entry(counter, &leader->sibling_list, list_entry) if (!is_software_counter(counter)) return 0; + return 1; } @@ -1480,6 +1487,9 @@ static void perf_output_copy(struct perf_output_handle *handle, handle->offset = offset; } +#define perf_output_put(handle, x) \ + perf_output_copy((handle), &(x), sizeof(x)) + static void perf_output_end(struct perf_output_handle *handle, int nmi) { if (handle->wakeup) { @@ -1514,34 +1524,53 @@ out: static void perf_output_simple(struct perf_counter *counter, int nmi, struct pt_regs *regs) { - u64 entry; + struct { + struct perf_event_header header; + u64 ip; + } event; - entry = instruction_pointer(regs); + event.header.type = PERF_EVENT_IP; + event.header.size = sizeof(event); + event.ip = instruction_pointer(regs); - perf_output_write(counter, nmi, &entry, sizeof(entry)); + perf_output_write(counter, nmi, &event, sizeof(event)); } -struct group_entry { - u64 event; - u64 counter; -}; - static void perf_output_group(struct perf_counter *counter, int nmi) { + struct perf_output_handle handle; + struct perf_event_header header; struct perf_counter *leader, *sub; + unsigned int size; + struct { + u64 event; + u64 counter; + } entry; + int ret; + + size = sizeof(header) + counter->nr_siblings * sizeof(entry); + + ret = perf_output_begin(&handle, counter, size); + if (ret) + return; + + header.type = PERF_EVENT_GROUP; + header.size = size; + + perf_output_put(&handle, header); leader = counter->group_leader; list_for_each_entry(sub, &leader->sibling_list, list_entry) { - struct group_entry entry; - if (sub != counter) sub->hw_ops->read(sub); entry.event = sub->hw_event.config; entry.counter = atomic64_read(&sub->count); - perf_output_write(counter, nmi, &entry, sizeof(entry)); + perf_output_put(&handle, entry); } + + perf_output_end(&handle, nmi); } void perf_counter_output(struct perf_counter *counter, -- cgit v0.10.2 From 63e35b25d6b5c3136d22ef249dbbf96716aa08bf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 25 Mar 2009 12:30:24 +0100 Subject: perf_counter: sanity check on the output API Ensure we never write more than we said we would. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Mike Galbraith Cc: Arjan van de Ven Cc: Wu Fengguang Orig-LKML-Reference: <20090325113316.921433024@chello.nl> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index d76e311..7669afe 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1422,6 +1422,7 @@ struct perf_output_handle { struct perf_counter *counter; struct perf_mmap_data *data; unsigned int offset; + unsigned int head; int wakeup; }; @@ -1447,6 +1448,7 @@ static int perf_output_begin(struct perf_output_handle *handle, handle->counter = counter; handle->data = data; handle->offset = offset; + handle->head = head; handle->wakeup = (offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT); return 0; @@ -1485,6 +1487,8 @@ static void perf_output_copy(struct perf_output_handle *handle, } while (len); handle->offset = offset; + + WARN_ON_ONCE(handle->offset > handle->head); } #define perf_output_put(handle, x) \ -- cgit v0.10.2 From ea5d20cf99db5d26d43b6d322d3ace17e08a6552 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 25 Mar 2009 12:30:25 +0100 Subject: perf_counter: optionally provide the pid/tid of the sampled task Allow cpu wide counters to profile userspace by providing what process the sample belongs to. This raises the first issue with the output type, lots of these options: group, tid, callchain, etc.. are non-exclusive and could be combined, suggesting a bitfield. However, things like the mmap() data stream doesn't fit in that. How to split the type field... Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Mike Galbraith Cc: Arjan van de Ven Cc: Wu Fengguang Orig-LKML-Reference: <20090325113317.013775235@chello.nl> Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index c256635..7fdbdf8 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -127,8 +127,9 @@ struct perf_counter_hw_event { exclude_kernel : 1, /* ditto kernel */ exclude_hv : 1, /* ditto hypervisor */ exclude_idle : 1, /* don't count when idle */ + include_tid : 1, /* include the tid */ - __reserved_1 : 55; + __reserved_1 : 54; __u32 extra_config_len; __u32 __reserved_4; @@ -164,6 +165,8 @@ struct perf_event_header { enum perf_event_type { PERF_EVENT_IP = 0, PERF_EVENT_GROUP = 1, + + __PERF_EVENT_TID = 0x100, }; #ifdef __KERNEL__ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 7669afe..f3e1b27 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1528,16 +1528,30 @@ out: static void perf_output_simple(struct perf_counter *counter, int nmi, struct pt_regs *regs) { + unsigned int size; struct { struct perf_event_header header; u64 ip; + u32 pid, tid; } event; event.header.type = PERF_EVENT_IP; - event.header.size = sizeof(event); event.ip = instruction_pointer(regs); - perf_output_write(counter, nmi, &event, sizeof(event)); + size = sizeof(event); + + if (counter->hw_event.include_tid) { + /* namespace issues */ + event.pid = current->group_leader->pid; + event.tid = current->pid; + + event.header.type |= __PERF_EVENT_TID; + } else + size -= sizeof(u64); + + event.header.size = size; + + perf_output_write(counter, nmi, &event, size); } static void perf_output_group(struct perf_counter *counter, int nmi) -- cgit v0.10.2 From 4c4ba21d2c3659e4c0421533939b58a8fd9f06c9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 25 Mar 2009 12:30:26 +0100 Subject: perf_counter: kerneltop: mmap_pages argument provide a knob to set the number of mmap data pages. Signed-off-by: Mike Galbraith Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Mike Galbraith Cc: Arjan van de Ven Cc: Wu Fengguang Orig-LKML-Reference: <20090325113317.104545398@chello.nl> Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c index 7ebde7a..3e45bf6 100644 --- a/Documentation/perf_counter/kerneltop.c +++ b/Documentation/perf_counter/kerneltop.c @@ -178,6 +178,7 @@ static int nr_cpus = 0; static int nmi = 1; static int group = 0; static unsigned int page_size; +static unsigned int mmap_pages = 4; static char *vmlinux; @@ -326,6 +327,7 @@ static void display_help(void) " -x path --vmlinux= # the vmlinux binary, required for -s use\n" " -z --zero # zero counts after display\n" " -D --dump_symtab # dump symbol table to stderr on startup\n" + " -m pages --mmap_pages= # number of mmap data pages\n" ); exit(0); @@ -732,7 +734,9 @@ static int read_symbol(FILE *in, struct sym_entry *s) /* Tag events to be skipped. */ if (!strcmp("default_idle", s->sym) || !strcmp("cpu_idle", s->sym)) s->skip = 1; - if (!strcmp("enter_idle", s->sym) || !strcmp("exit_idle", s->sym)) + else if (!strcmp("enter_idle", s->sym) || !strcmp("exit_idle", s->sym)) + s->skip = 1; + else if (!strcmp("mwait_idle", s->sym)) s->skip = 1; if (filter_match == 1) { @@ -1042,9 +1046,10 @@ static void process_options(int argc, char *argv[]) {"symbol", required_argument, NULL, 's'}, {"stat", no_argument, NULL, 'S'}, {"zero", no_argument, NULL, 'z'}, + {"mmap_pages", required_argument, NULL, 'm'}, {NULL, 0, NULL, 0 } }; - int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hn:p:s:Sx:z", + int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hn:m:p:s:Sx:z", long_options, &option_index); if (c == -1) break; @@ -1081,6 +1086,7 @@ static void process_options(int argc, char *argv[]) case 'S': run_perfstat = 1; break; case 'x': vmlinux = strdup(optarg); break; case 'z': zero = 1; break; + case 'm': mmap_pages = atoi(optarg); break; default: error = 1; break; } } @@ -1134,17 +1140,30 @@ repeat: return head; } +struct timeval last_read, this_read; + static void mmap_read(struct mmap_data *md) { unsigned int head = mmap_read_head(md); unsigned int old = md->prev; unsigned char *data = md->base + page_size; + gettimeofday(&this_read, NULL); + if (head - old > md->mask) { - printf("ERROR: failed to keep up with mmap data\n"); - exit(-1); + struct timeval iv; + unsigned long msecs; + + timersub(&this_read, &last_read, &iv); + msecs = iv.tv_sec*1000 + iv.tv_usec/1000; + + fprintf(stderr, "WARNING: failed to keep up with mmap data. Last read %lu msecs ago.\n", msecs); + + old = head; } + last_read = this_read; + for (; old != head;) { __u64 *ptr = (__u64 *)&data[old & md->mask]; old += sizeof(__u64); @@ -1220,8 +1239,8 @@ int main(int argc, char *argv[]) mmap_array[i][counter].counter = counter; mmap_array[i][counter].prev = 0; - mmap_array[i][counter].mask = 2*page_size - 1; - mmap_array[i][counter].base = mmap(NULL, 3*page_size, + mmap_array[i][counter].mask = mmap_pages*page_size - 1; + mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size, PROT_READ, MAP_SHARED, fd[i][counter], 0); if (mmap_array[i][counter].base == MAP_FAILED) { printf("kerneltop error: failed to mmap with %d (%s)\n", -- cgit v0.10.2 From 00f0ad73ac90e3fba8b4cbe4cf21b2fb9a56cb72 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 25 Mar 2009 12:30:27 +0100 Subject: perf_counter: kerneltop: output event support Teach kerneltop about the new output ABI. XXX: anybody fancy integrating the PID/TID data into the output? Bump the mmap_data pages a little because we bloated the output and have to be more careful about overruns with structured data. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Mike Galbraith Cc: Arjan van de Ven Cc: Wu Fengguang Orig-LKML-Reference: <20090325113317.192910290@chello.nl> Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c index 3e45bf6..fda1438 100644 --- a/Documentation/perf_counter/kerneltop.c +++ b/Documentation/perf_counter/kerneltop.c @@ -134,6 +134,11 @@ #endif #define unlikely(x) __builtin_expect(!!(x), 0) +#define min(x, y) ({ \ + typeof(x) _min1 = (x); \ + typeof(y) _min2 = (y); \ + (void) (&_min1 == &_min2); \ + _min1 < _min2 ? _min1 : _min2; }) asmlinkage int sys_perf_counter_open( struct perf_counter_hw_event *hw_event_uptr __user, @@ -178,7 +183,7 @@ static int nr_cpus = 0; static int nmi = 1; static int group = 0; static unsigned int page_size; -static unsigned int mmap_pages = 4; +static unsigned int mmap_pages = 16; static char *vmlinux; @@ -1147,28 +1152,75 @@ static void mmap_read(struct mmap_data *md) unsigned int head = mmap_read_head(md); unsigned int old = md->prev; unsigned char *data = md->base + page_size; + int diff; gettimeofday(&this_read, NULL); - if (head - old > md->mask) { + /* + * If we're further behind than half the buffer, there's a chance + * the writer will bite our tail and screw up the events under us. + * + * If we somehow ended up ahead of the head, we got messed up. + * + * In either case, truncate and restart at head. + */ + diff = head - old; + if (diff > md->mask / 2 || diff < 0) { struct timeval iv; unsigned long msecs; timersub(&this_read, &last_read, &iv); msecs = iv.tv_sec*1000 + iv.tv_usec/1000; - fprintf(stderr, "WARNING: failed to keep up with mmap data. Last read %lu msecs ago.\n", msecs); + fprintf(stderr, "WARNING: failed to keep up with mmap data." + " Last read %lu msecs ago.\n", msecs); + /* + * head points to a known good entry, start there. + */ old = head; } last_read = this_read; for (; old != head;) { - __u64 *ptr = (__u64 *)&data[old & md->mask]; - old += sizeof(__u64); + struct event_struct { + struct perf_event_header header; + __u64 ip; + __u32 pid, tid; + } *event = (struct event_struct *)&data[old & md->mask]; + struct event_struct event_copy; + + unsigned int size = event->header.size; + + /* + * Event straddles the mmap boundary -- header should always + * be inside due to u64 alignment of output. + */ + if ((old & md->mask) + size != ((old + size) & md->mask)) { + unsigned int offset = old; + unsigned int len = sizeof(*event), cpy; + void *dst = &event_copy; + + do { + cpy = min(md->mask + 1 - (offset & md->mask), len); + memcpy(dst, &data[offset & md->mask], cpy); + offset += cpy; + dst += cpy; + len -= cpy; + } while (len); + + event = &event_copy; + } - process_event(*ptr, md->counter); + old += size; + + switch (event->header.type) { + case PERF_EVENT_IP: + case PERF_EVENT_IP | __PERF_EVENT_TID: + process_event(event->ip, md->counter); + break; + } } md->prev = old; @@ -1214,6 +1266,7 @@ int main(int argc, char *argv[]) hw_event.irq_period = event_count[counter]; hw_event.record_type = PERF_RECORD_IRQ; hw_event.nmi = nmi; + hw_event.include_tid = 1; fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0); if (fd[i][counter] < 0) { -- cgit v0.10.2 From 7730d8655880f41f2ea519aca2ca6a1413dfd2c9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 25 Mar 2009 12:48:31 +0100 Subject: perf_counter: allow and require one-page mmap on counting counters A brainfart stopped single page mmap()s working. The rest of the code should be perfectly fine with not having any data pages. Reported-by: Paul Mackerras Signed-off-by: Peter Zijlstra Orig-LKML-Reference: <1237981712.7972.812.camel@twins> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index f3e1b27..95e0257 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1369,7 +1369,11 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) vma_size = vma->vm_end - vma->vm_start; nr_pages = (vma_size / PAGE_SIZE) - 1; - if (nr_pages == 0 || !is_power_of_2(nr_pages)) + /* + * If we have data pages ensure they're a power-of-two number, so we + * can do bitmasks instead of modulo. + */ + if (nr_pages != 0 && !is_power_of_2(nr_pages)) return -EINVAL; if (vma_size != PAGE_SIZE * (1 + nr_pages)) -- cgit v0.10.2 From 53cfbf593758916aac41db728f029986a62f1254 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 25 Mar 2009 22:46:58 +1100 Subject: perf_counter: record time running and time enabled for each counter Impact: new functionality Currently, if there are more counters enabled than can fit on the CPU, the kernel will multiplex the counters on to the hardware using round-robin scheduling. That isn't too bad for sampling counters, but for counting counters it means that the value read from a counter represents some unknown fraction of the true count of events that occurred while the counter was enabled. This remedies the situation by keeping track of how long each counter is enabled for, and how long it is actually on the cpu and counting events. These times are recorded in nanoseconds using the task clock for per-task counters and the cpu clock for per-cpu counters. These values can be supplied to userspace on a read from the counter. Userspace requests that they be supplied after the counter value by setting the PERF_FORMAT_TOTAL_TIME_ENABLED and/or PERF_FORMAT_TOTAL_TIME_RUNNING bits in the hw_event.read_format field when creating the counter. (There is no way to change the read format after the counter is created, though it would be possible to add some way to do that.) Using this information it is possible for userspace to scale the count it reads from the counter to get an estimate of the true count: true_count_estimate = count * total_time_enabled / total_time_running This also lets userspace detect the situation where the counter never got to go on the cpu: total_time_running == 0. This functionality has been requested by the PAPI developers, and will be generally needed for interpreting the count values from counting counters correctly. In the implementation, this keeps 5 time values (in nanoseconds) for each counter: total_time_enabled and total_time_running are used when the counter is in state OFF or ERROR and for reporting back to userspace. When the counter is in state INACTIVE or ACTIVE, it is the tstamp_enabled, tstamp_running and tstamp_stopped values that are relevant, and total_time_enabled and total_time_running are determined from them. (tstamp_stopped is only used in INACTIVE state.) The reason for doing it like this is that it means that only counters being enabled or disabled at sched-in and sched-out time need to be updated. There are no new loops that iterate over all counters to update total_time_enabled or total_time_running. This also keeps separate child_total_time_running and child_total_time_enabled fields that get added in when reporting the totals to userspace. They are separate fields so that they can be atomic. We don't want to use atomics for total_time_running, total_time_enabled etc., because then we would have to use atomic sequences to update them, which are slower than regular arithmetic and memory accesses. It is possible to measure total_time_running by adding a task_clock counter to each group of counters, and total_time_enabled can be measured approximately with a top-level task_clock counter (though inaccuracies will creep in if you need to disable and enable groups since it is not possible in general to disable/enable the top-level task_clock counter simultaneously with another group). However, that adds extra overhead - I measured around 15% increase in the context switch latency reported by lat_ctx (from lmbench) when a task_clock counter was added to each of 2 groups, and around 25% increase when a task_clock counter was added to each of 4 groups. (In both cases a top-level task-clock counter was also added.) In contrast, the code added in this commit gives better information with no overhead that I could measure (in fact in some cases I measured lower times with this code, but the differences were all less than one standard deviation). [ v2: address review comments by Andrew Morton. ] Signed-off-by: Paul Mackerras Acked-by: Peter Zijlstra Cc: Andrew Morton Orig-LKML-Reference: <18890.6578.728637.139402@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index d48596a..df007fe 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -455,6 +455,8 @@ static void counter_sched_in(struct perf_counter *counter, int cpu) { counter->state = PERF_COUNTER_STATE_ACTIVE; counter->oncpu = cpu; + counter->tstamp_running += counter->ctx->time_now - + counter->tstamp_stopped; if (is_software_counter(counter)) counter->hw_ops->enable(counter); } diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 7fdbdf8..6bf67ce 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -103,6 +103,16 @@ enum perf_counter_record_type { #define PERF_COUNTER_EVENT_MASK __PERF_COUNTER_MASK(EVENT) /* + * Bits that can be set in hw_event.read_format to request that + * reads on the counter should return the indicated quantities, + * in increasing order of bit value, after the counter value. + */ +enum perf_counter_read_format { + PERF_FORMAT_TOTAL_TIME_ENABLED = 1, + PERF_FORMAT_TOTAL_TIME_RUNNING = 2, +}; + +/* * Hardware event to monitor via a performance monitoring counter: */ struct perf_counter_hw_event { @@ -281,6 +291,32 @@ struct perf_counter { enum perf_counter_active_state prev_state; atomic64_t count; + /* + * These are the total time in nanoseconds that the counter + * has been enabled (i.e. eligible to run, and the task has + * been scheduled in, if this is a per-task counter) + * and running (scheduled onto the CPU), respectively. + * + * They are computed from tstamp_enabled, tstamp_running and + * tstamp_stopped when the counter is in INACTIVE or ACTIVE state. + */ + u64 total_time_enabled; + u64 total_time_running; + + /* + * These are timestamps used for computing total_time_enabled + * and total_time_running when the counter is in INACTIVE or + * ACTIVE state, measured in nanoseconds from an arbitrary point + * in time. + * tstamp_enabled: the notional time when the counter was enabled + * tstamp_running: the notional time when the counter was scheduled on + * tstamp_stopped: in INACTIVE state, the notional time when the + * counter was scheduled off. + */ + u64 tstamp_enabled; + u64 tstamp_running; + u64 tstamp_stopped; + struct perf_counter_hw_event hw_event; struct hw_perf_counter hw; @@ -292,6 +328,13 @@ struct perf_counter { struct list_head child_list; /* + * These accumulate total time (in nanoseconds) that children + * counters have been enabled and running, respectively. + */ + atomic64_t child_total_time_enabled; + atomic64_t child_total_time_running; + + /* * Protect attach/detach and child_list: */ struct mutex mutex; @@ -339,6 +382,16 @@ struct perf_counter_context { int nr_active; int is_active; struct task_struct *task; + + /* + * time_now is the current time in nanoseconds since an arbitrary + * point in the past. For per-task counters, this is based on the + * task clock, and for per-cpu counters it is based on the cpu clock. + * time_lost is an offset from the task/cpu clock, used to make it + * appear that time only passes while the context is scheduled in. + */ + u64 time_now; + u64 time_lost; #endif }; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 95e0257..3b862a7 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -116,6 +116,7 @@ counter_sched_out(struct perf_counter *counter, return; counter->state = PERF_COUNTER_STATE_INACTIVE; + counter->tstamp_stopped = ctx->time_now; counter->hw_ops->disable(counter); counter->oncpu = -1; @@ -252,6 +253,60 @@ retry: } /* + * Get the current time for this context. + * If this is a task context, we use the task's task clock, + * or for a per-cpu context, we use the cpu clock. + */ +static u64 get_context_time(struct perf_counter_context *ctx, int update) +{ + struct task_struct *curr = ctx->task; + + if (!curr) + return cpu_clock(smp_processor_id()); + + return __task_delta_exec(curr, update) + curr->se.sum_exec_runtime; +} + +/* + * Update the record of the current time in a context. + */ +static void update_context_time(struct perf_counter_context *ctx, int update) +{ + ctx->time_now = get_context_time(ctx, update) - ctx->time_lost; +} + +/* + * Update the total_time_enabled and total_time_running fields for a counter. + */ +static void update_counter_times(struct perf_counter *counter) +{ + struct perf_counter_context *ctx = counter->ctx; + u64 run_end; + + if (counter->state >= PERF_COUNTER_STATE_INACTIVE) { + counter->total_time_enabled = ctx->time_now - + counter->tstamp_enabled; + if (counter->state == PERF_COUNTER_STATE_INACTIVE) + run_end = counter->tstamp_stopped; + else + run_end = ctx->time_now; + counter->total_time_running = run_end - counter->tstamp_running; + } +} + +/* + * Update total_time_enabled and total_time_running for all counters in a group. + */ +static void update_group_times(struct perf_counter *leader) +{ + struct perf_counter *counter; + + update_counter_times(leader); + list_for_each_entry(counter, &leader->sibling_list, list_entry) + update_counter_times(counter); +} + +/* * Cross CPU call to disable a performance counter */ static void __perf_counter_disable(void *info) @@ -276,6 +331,8 @@ static void __perf_counter_disable(void *info) * If it is in error state, leave it in error state. */ if (counter->state >= PERF_COUNTER_STATE_INACTIVE) { + update_context_time(ctx, 1); + update_counter_times(counter); if (counter == counter->group_leader) group_sched_out(counter, cpuctx, ctx); else @@ -320,8 +377,10 @@ static void perf_counter_disable(struct perf_counter *counter) * Since we have the lock this context can't be scheduled * in, so we can change the state safely. */ - if (counter->state == PERF_COUNTER_STATE_INACTIVE) + if (counter->state == PERF_COUNTER_STATE_INACTIVE) { + update_counter_times(counter); counter->state = PERF_COUNTER_STATE_OFF; + } spin_unlock_irq(&ctx->lock); } @@ -366,6 +425,8 @@ counter_sched_in(struct perf_counter *counter, return -EAGAIN; } + counter->tstamp_running += ctx->time_now - counter->tstamp_stopped; + if (!is_software_counter(counter)) cpuctx->active_oncpu++; ctx->nr_active++; @@ -425,6 +486,17 @@ static int group_can_go_on(struct perf_counter *counter, return can_add_hw; } +static void add_counter_to_ctx(struct perf_counter *counter, + struct perf_counter_context *ctx) +{ + list_add_counter(counter, ctx); + ctx->nr_counters++; + counter->prev_state = PERF_COUNTER_STATE_OFF; + counter->tstamp_enabled = ctx->time_now; + counter->tstamp_running = ctx->time_now; + counter->tstamp_stopped = ctx->time_now; +} + /* * Cross CPU call to install and enable a performance counter */ @@ -449,6 +521,7 @@ static void __perf_install_in_context(void *info) curr_rq_lock_irq_save(&flags); spin_lock(&ctx->lock); + update_context_time(ctx, 1); /* * Protect the list operation against NMI by disabling the @@ -456,9 +529,7 @@ static void __perf_install_in_context(void *info) */ perf_flags = hw_perf_save_disable(); - list_add_counter(counter, ctx); - ctx->nr_counters++; - counter->prev_state = PERF_COUNTER_STATE_OFF; + add_counter_to_ctx(counter, ctx); /* * Don't put the counter on if it is disabled or if @@ -486,8 +557,10 @@ static void __perf_install_in_context(void *info) */ if (leader != counter) group_sched_out(leader, cpuctx, ctx); - if (leader->hw_event.pinned) + if (leader->hw_event.pinned) { + update_group_times(leader); leader->state = PERF_COUNTER_STATE_ERROR; + } } if (!err && !ctx->task && cpuctx->max_pertask) @@ -548,10 +621,8 @@ retry: * can add the counter safely, if it the call above did not * succeed. */ - if (list_empty(&counter->list_entry)) { - list_add_counter(counter, ctx); - ctx->nr_counters++; - } + if (list_empty(&counter->list_entry)) + add_counter_to_ctx(counter, ctx); spin_unlock_irq(&ctx->lock); } @@ -576,11 +647,13 @@ static void __perf_counter_enable(void *info) curr_rq_lock_irq_save(&flags); spin_lock(&ctx->lock); + update_context_time(ctx, 1); counter->prev_state = counter->state; if (counter->state >= PERF_COUNTER_STATE_INACTIVE) goto unlock; counter->state = PERF_COUNTER_STATE_INACTIVE; + counter->tstamp_enabled = ctx->time_now - counter->total_time_enabled; /* * If the counter is in a group and isn't the group leader, @@ -602,8 +675,10 @@ static void __perf_counter_enable(void *info) */ if (leader != counter) group_sched_out(leader, cpuctx, ctx); - if (leader->hw_event.pinned) + if (leader->hw_event.pinned) { + update_group_times(leader); leader->state = PERF_COUNTER_STATE_ERROR; + } } unlock: @@ -659,8 +734,11 @@ static void perf_counter_enable(struct perf_counter *counter) * Since we have the lock this context can't be scheduled * in, so we can change the state safely. */ - if (counter->state == PERF_COUNTER_STATE_OFF) + if (counter->state == PERF_COUNTER_STATE_OFF) { counter->state = PERF_COUNTER_STATE_INACTIVE; + counter->tstamp_enabled = ctx->time_now - + counter->total_time_enabled; + } out: spin_unlock_irq(&ctx->lock); } @@ -693,6 +771,7 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx, ctx->is_active = 0; if (likely(!ctx->nr_counters)) goto out; + update_context_time(ctx, 0); flags = hw_perf_save_disable(); if (ctx->nr_active) { @@ -797,6 +876,13 @@ __perf_counter_sched_in(struct perf_counter_context *ctx, if (likely(!ctx->nr_counters)) goto out; + /* + * Add any time since the last sched_out to the lost time + * so it doesn't get included in the total_time_enabled and + * total_time_running measures for counters in the context. + */ + ctx->time_lost = get_context_time(ctx, 0) - ctx->time_now; + flags = hw_perf_save_disable(); /* @@ -817,8 +903,10 @@ __perf_counter_sched_in(struct perf_counter_context *ctx, * If this pinned group hasn't been scheduled, * put it in error state. */ - if (counter->state == PERF_COUNTER_STATE_INACTIVE) + if (counter->state == PERF_COUNTER_STATE_INACTIVE) { + update_group_times(counter); counter->state = PERF_COUNTER_STATE_ERROR; + } } list_for_each_entry(counter, &ctx->counter_list, list_entry) { @@ -902,8 +990,10 @@ int perf_counter_task_disable(void) perf_flags = hw_perf_save_disable(); list_for_each_entry(counter, &ctx->counter_list, list_entry) { - if (counter->state != PERF_COUNTER_STATE_ERROR) + if (counter->state != PERF_COUNTER_STATE_ERROR) { + update_group_times(counter); counter->state = PERF_COUNTER_STATE_OFF; + } } hw_perf_restore(perf_flags); @@ -946,6 +1036,8 @@ int perf_counter_task_enable(void) if (counter->state > PERF_COUNTER_STATE_OFF) continue; counter->state = PERF_COUNTER_STATE_INACTIVE; + counter->tstamp_enabled = ctx->time_now - + counter->total_time_enabled; counter->hw_event.disabled = 0; } hw_perf_restore(perf_flags); @@ -1009,10 +1101,14 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu) static void __read(void *info) { struct perf_counter *counter = info; + struct perf_counter_context *ctx = counter->ctx; unsigned long flags; curr_rq_lock_irq_save(&flags); + if (ctx->is_active) + update_context_time(ctx, 1); counter->hw_ops->read(counter); + update_counter_times(counter); curr_rq_unlock_irq_restore(&flags); } @@ -1025,6 +1121,8 @@ static u64 perf_counter_read(struct perf_counter *counter) if (counter->state == PERF_COUNTER_STATE_ACTIVE) { smp_call_function_single(counter->oncpu, __read, counter, 1); + } else if (counter->state == PERF_COUNTER_STATE_INACTIVE) { + update_counter_times(counter); } return atomic64_read(&counter->count); @@ -1137,10 +1235,8 @@ static int perf_release(struct inode *inode, struct file *file) static ssize_t perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) { - u64 cntval; - - if (count < sizeof(cntval)) - return -EINVAL; + u64 values[3]; + int n; /* * Return end-of-file for a read on a counter that is in @@ -1151,10 +1247,24 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) return 0; mutex_lock(&counter->mutex); - cntval = perf_counter_read(counter); + values[0] = perf_counter_read(counter); + n = 1; + if (counter->hw_event.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + values[n++] = counter->total_time_enabled + + atomic64_read(&counter->child_total_time_enabled); + if (counter->hw_event.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + values[n++] = counter->total_time_running + + atomic64_read(&counter->child_total_time_running); mutex_unlock(&counter->mutex); - return put_user(cntval, (u64 __user *) buf) ? -EFAULT : sizeof(cntval); + if (count < n * sizeof(u64)) + return -EINVAL; + count = n * sizeof(u64); + + if (copy_to_user(buf, values, count)) + return -EFAULT; + + return count; } static ssize_t @@ -2290,8 +2400,7 @@ inherit_counter(struct perf_counter *parent_counter, * Link it up in the child's context: */ child_counter->task = child; - list_add_counter(child_counter, child_ctx); - child_ctx->nr_counters++; + add_counter_to_ctx(child_counter, child_ctx); child_counter->parent = parent_counter; /* @@ -2361,6 +2470,10 @@ static void sync_child_counter(struct perf_counter *child_counter, * Add back the child's count to the parent's count: */ atomic64_add(child_val, &parent_counter->count); + atomic64_add(child_counter->total_time_enabled, + &parent_counter->child_total_time_enabled); + atomic64_add(child_counter->total_time_running, + &parent_counter->child_total_time_running); /* * Remove this counter from the parent's list @@ -2395,6 +2508,7 @@ __perf_counter_exit_task(struct task_struct *child, if (child != current) { wait_task_inactive(child, 0); list_del_init(&child_counter->list_entry); + update_counter_times(child_counter); } else { struct perf_cpu_context *cpuctx; unsigned long flags; @@ -2412,6 +2526,7 @@ __perf_counter_exit_task(struct task_struct *child, cpuctx = &__get_cpu_var(perf_cpu_context); group_sched_out(child_counter, cpuctx, child_ctx); + update_counter_times(child_counter); list_del_init(&child_counter->list_entry); -- cgit v0.10.2 From 925d519ab82b6dd7aca9420d809ee83819c08db2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 Mar 2009 19:07:02 +0200 Subject: perf_counter: unify and fix delayed counter wakeup While going over the wakeup code I noticed delayed wakeups only work for hardware counters but basically all software counters rely on them. This patch unifies and generalizes the delayed wakeup to fix this issue. Since we're dealing with NMI context bits here, use a cmpxchg() based single link list implementation to track counters that have pending wakeups. [ This should really be generic code for delayed wakeups, but since we cannot use cmpxchg()/xchg() in generic code, I've let it live in the perf_counter code. -- Eric Dumazet could use it to aggregate the network wakeups. ] Furthermore, the x86 method of using TIF flags was flawed in that its quite possible to end up setting the bit on the idle task, loosing the wakeup. The powerpc method uses per-cpu storage and does appear to be sufficient. Signed-off-by: Peter Zijlstra Acked-by: Paul Mackerras Orig-LKML-Reference: <20090330171023.153932974@chello.nl> Signed-off-by: Ingo Molnar diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index cb32d57..20a44d0 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -132,7 +132,7 @@ static inline int irqs_disabled_flags(unsigned long flags) struct irq_chip; #ifdef CONFIG_PERF_COUNTERS -static inline unsigned long get_perf_counter_pending(void) +static inline unsigned long test_perf_counter_pending(void) { unsigned long x; @@ -160,7 +160,7 @@ extern void perf_counter_do_pending(void); #else -static inline unsigned long get_perf_counter_pending(void) +static inline unsigned long test_perf_counter_pending(void) { return 0; } diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 469e963..2cd471f 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -135,7 +135,7 @@ notrace void raw_local_irq_restore(unsigned long en) iseries_handle_interrupts(); } - if (get_perf_counter_pending()) { + if (test_perf_counter_pending()) { clear_perf_counter_pending(); perf_counter_do_pending(); } diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index df007fe..cde720f 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -650,24 +650,6 @@ hw_perf_counter_init(struct perf_counter *counter) } /* - * Handle wakeups. - */ -void perf_counter_do_pending(void) -{ - int i; - struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters); - struct perf_counter *counter; - - for (i = 0; i < cpuhw->n_counters; ++i) { - counter = cpuhw->counter[i]; - if (counter && counter->wakeup_pending) { - counter->wakeup_pending = 0; - wake_up(&counter->waitq); - } - } -} - -/* * A counter has overflowed; update its count and record * things if requested. Note that interrupts are hard-disabled * here so there is no possibility of being interrupted. @@ -720,7 +702,7 @@ static void perf_counter_interrupt(struct pt_regs *regs) struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters); struct perf_counter *counter; long val; - int need_wakeup = 0, found = 0; + int found = 0; for (i = 0; i < cpuhw->n_counters; ++i) { counter = cpuhw->counter[i]; @@ -761,7 +743,7 @@ static void perf_counter_interrupt(struct pt_regs *regs) * immediately; otherwise we'll have do the wakeup when interrupts * get soft-enabled. */ - if (get_perf_counter_pending() && regs->softe) { + if (test_perf_counter_pending() && regs->softe) { irq_enter(); clear_perf_counter_pending(); perf_counter_do_pending(); diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index 1662043..e2b0e66 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -84,8 +84,9 @@ union cpuid10_edx { #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) -#define set_perf_counter_pending() \ - set_tsk_thread_flag(current, TIF_PERF_COUNTERS); +#define set_perf_counter_pending() do { } while (0) +#define clear_perf_counter_pending() do { } while (0) +#define test_perf_counter_pending() (0) #ifdef CONFIG_PERF_COUNTERS extern void init_hw_perf_counters(void); diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 3ffd5d2..8820a73 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -83,7 +83,6 @@ struct thread_info { #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ -#define TIF_PERF_COUNTERS 11 /* notify perf counter work */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* 32bit process */ #define TIF_FORK 18 /* ret_from_fork */ @@ -107,7 +106,6 @@ struct thread_info { #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) -#define _TIF_PERF_COUNTERS (1 << TIF_PERF_COUNTERS) #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_FORK (1 << TIF_FORK) @@ -141,7 +139,7 @@ struct thread_info { /* Only used for 64 bit */ #define _TIF_DO_NOTIFY_MASK \ - (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_PERF_COUNTERS|_TIF_NOTIFY_RESUME) + (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME) /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 3f95b0c..7aab177 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -227,7 +227,6 @@ static int __hw_perf_counter_init(struct perf_counter *counter) */ hwc->config |= pmc_ops->event_map(perf_event_id(hw_event)); } - counter->wakeup_pending = 0; return 0; } @@ -773,34 +772,6 @@ void smp_perf_counter_interrupt(struct pt_regs *regs) irq_exit(); } -/* - * This handler is triggered by NMI contexts: - */ -void perf_counter_notify(struct pt_regs *regs) -{ - struct cpu_hw_counters *cpuc; - unsigned long flags; - int bit, cpu; - - local_irq_save(flags); - cpu = smp_processor_id(); - cpuc = &per_cpu(cpu_hw_counters, cpu); - - for_each_bit(bit, cpuc->used, X86_PMC_IDX_MAX) { - struct perf_counter *counter = cpuc->counters[bit]; - - if (!counter) - continue; - - if (counter->wakeup_pending) { - counter->wakeup_pending = 0; - wake_up(&counter->waitq); - } - } - - local_irq_restore(flags); -} - void perf_counters_lapic_init(int nmi) { u32 apic_val; diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 611615a..0a813b1 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -6,7 +6,6 @@ * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes * 2000-2002 x86-64 support by Andi Kleen */ -#include #include #include #include @@ -872,11 +871,6 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) tracehook_notify_resume(regs); } - if (thread_info_flags & _TIF_PERF_COUNTERS) { - clear_thread_flag(TIF_PERF_COUNTERS); - perf_counter_notify(regs); - } - #ifdef CONFIG_X86_32 clear_thread_flag(TIF_IRET); #endif /* CONFIG_X86_32 */ diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 6bf67ce..0d83322 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -275,6 +275,10 @@ struct perf_mmap_data { void *data_pages[0]; }; +struct perf_wakeup_entry { + struct perf_wakeup_entry *next; +}; + /** * struct perf_counter - performance counter kernel representation: */ @@ -350,7 +354,7 @@ struct perf_counter { /* poll related */ wait_queue_head_t waitq; /* optional: for NMIs */ - int wakeup_pending; + struct perf_wakeup_entry wakeup; void (*destroy)(struct perf_counter *); struct rcu_head rcu_head; @@ -427,7 +431,7 @@ extern void perf_counter_task_sched_out(struct task_struct *task, int cpu); extern void perf_counter_task_tick(struct task_struct *task, int cpu); extern void perf_counter_init_task(struct task_struct *child); extern void perf_counter_exit_task(struct task_struct *child); -extern void perf_counter_notify(struct pt_regs *regs); +extern void perf_counter_do_pending(void); extern void perf_counter_print_debug(void); extern void perf_counter_unthrottle(void); extern u64 hw_perf_save_disable(void); @@ -461,7 +465,7 @@ static inline void perf_counter_task_tick(struct task_struct *task, int cpu) { } static inline void perf_counter_init_task(struct task_struct *child) { } static inline void perf_counter_exit_task(struct task_struct *child) { } -static inline void perf_counter_notify(struct pt_regs *regs) { } +static inline void perf_counter_do_pending(void) { } static inline void perf_counter_print_debug(void) { } static inline void perf_counter_unthrottle(void) { } static inline void hw_perf_restore(u64 ctrl) { } @@ -469,8 +473,9 @@ static inline u64 hw_perf_save_disable(void) { return 0; } static inline int perf_counter_task_disable(void) { return -EINVAL; } static inline int perf_counter_task_enable(void) { return -EINVAL; } -static inline void perf_swcounter_event(u32 event, u64 nr, - int nmi, struct pt_regs *regs) { } +static inline void +perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs) { } + #endif #endif /* __KERNEL__ */ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 3b862a7..f70ff80 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1197,8 +1197,12 @@ static void free_counter_rcu(struct rcu_head *head) kfree(counter); } +static void perf_pending_sync(struct perf_counter *counter); + static void free_counter(struct perf_counter *counter) { + perf_pending_sync(counter); + if (counter->destroy) counter->destroy(counter); @@ -1529,6 +1533,118 @@ static const struct file_operations perf_fops = { }; /* + * Perf counter wakeup + * + * If there's data, ensure we set the poll() state and publish everything + * to user-space before waking everybody up. + */ + +void perf_counter_wakeup(struct perf_counter *counter) +{ + struct perf_mmap_data *data; + + rcu_read_lock(); + data = rcu_dereference(counter->data); + if (data) { + (void)atomic_xchg(&data->wakeup, POLL_IN); + __perf_counter_update_userpage(counter, data); + } + rcu_read_unlock(); + + wake_up_all(&counter->waitq); +} + +/* + * Pending wakeups + * + * Handle the case where we need to wakeup up from NMI (or rq->lock) context. + * + * The NMI bit means we cannot possibly take locks. Therefore, maintain a + * single linked list and use cmpxchg() to add entries lockless. + */ + +#define PENDING_TAIL ((struct perf_wakeup_entry *)-1UL) + +static DEFINE_PER_CPU(struct perf_wakeup_entry *, perf_wakeup_head) = { + PENDING_TAIL, +}; + +static void perf_pending_queue(struct perf_counter *counter) +{ + struct perf_wakeup_entry **head; + struct perf_wakeup_entry *prev, *next; + + if (cmpxchg(&counter->wakeup.next, NULL, PENDING_TAIL) != NULL) + return; + + head = &get_cpu_var(perf_wakeup_head); + + do { + prev = counter->wakeup.next = *head; + next = &counter->wakeup; + } while (cmpxchg(head, prev, next) != prev); + + set_perf_counter_pending(); + + put_cpu_var(perf_wakeup_head); +} + +static int __perf_pending_run(void) +{ + struct perf_wakeup_entry *list; + int nr = 0; + + list = xchg(&__get_cpu_var(perf_wakeup_head), PENDING_TAIL); + while (list != PENDING_TAIL) { + struct perf_counter *counter = container_of(list, + struct perf_counter, wakeup); + + list = list->next; + + counter->wakeup.next = NULL; + /* + * Ensure we observe the unqueue before we issue the wakeup, + * so that we won't be waiting forever. + * -- see perf_not_pending(). + */ + smp_wmb(); + + perf_counter_wakeup(counter); + nr++; + } + + return nr; +} + +static inline int perf_not_pending(struct perf_counter *counter) +{ + /* + * If we flush on whatever cpu we run, there is a chance we don't + * need to wait. + */ + get_cpu(); + __perf_pending_run(); + put_cpu(); + + /* + * Ensure we see the proper queue state before going to sleep + * so that we do not miss the wakeup. -- see perf_pending_handle() + */ + smp_rmb(); + return counter->wakeup.next == NULL; +} + +static void perf_pending_sync(struct perf_counter *counter) +{ + wait_event(counter->waitq, perf_not_pending(counter)); +} + +void perf_counter_do_pending(void) +{ + __perf_pending_run(); +} + +/* * Output */ @@ -1611,13 +1727,10 @@ static void perf_output_copy(struct perf_output_handle *handle, static void perf_output_end(struct perf_output_handle *handle, int nmi) { if (handle->wakeup) { - (void)atomic_xchg(&handle->data->wakeup, POLL_IN); - __perf_counter_update_userpage(handle->counter, handle->data); - if (nmi) { - handle->counter->wakeup_pending = 1; - set_perf_counter_pending(); - } else - wake_up(&handle->counter->waitq); + if (nmi) + perf_pending_queue(handle->counter); + else + perf_counter_wakeup(handle->counter); } rcu_read_unlock(); } @@ -2211,7 +2324,6 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, counter->cpu = cpu; counter->hw_event = *hw_event; - counter->wakeup_pending = 0; counter->group_leader = group_leader; counter->hw_ops = NULL; counter->ctx = ctx; diff --git a/kernel/timer.c b/kernel/timer.c index b455556..672ca25 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -1167,6 +1168,8 @@ static void run_timer_softirq(struct softirq_action *h) { struct tvec_base *base = __get_cpu_var(tvec_bases); + perf_counter_do_pending(); + hrtimer_run_pending(); if (time_after_eq(jiffies, base->timer_jiffies)) -- cgit v0.10.2 From 38ff667b321b00f5e6830e93fb4ab11a653a2920 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 Mar 2009 19:07:03 +0200 Subject: perf_counter: fix update_userpage() It just occured to me it is possible to have multiple contending updates of the userpage (mmap information vs overflow vs counter). This would break the seqlock logic. It appear the arch code uses this from NMI context, so we cannot possibly serialize its use, therefore separate the data_head update from it and let it return to its original use. The arch code needs to make sure there are no contending callers by disabling the counter before using it -- powerpc appears to do this nicely. Signed-off-by: Peter Zijlstra Acked-by: Paul Mackerras Orig-LKML-Reference: <20090330171023.241410660@chello.nl> Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 0d83322..8ac1885 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -160,10 +160,45 @@ struct perf_counter_hw_event { struct perf_counter_mmap_page { __u32 version; /* version number of this structure */ __u32 compat_version; /* lowest version this is compat with */ + + /* + * Bits needed to read the hw counters in user-space. + * + * The index and offset should be read atomically using the seqlock: + * + * __u32 seq, index; + * __s64 offset; + * + * again: + * rmb(); + * seq = pc->lock; + * + * if (unlikely(seq & 1)) { + * cpu_relax(); + * goto again; + * } + * + * index = pc->index; + * offset = pc->offset; + * + * rmb(); + * if (pc->lock != seq) + * goto again; + * + * After this, index contains architecture specific counter index + 1, + * so that 0 means unavailable, offset contains the value to be added + * to the result of the raw timer read to obtain this counter's value. + */ __u32 lock; /* seqlock for synchronization */ __u32 index; /* hardware counter identifier */ __s64 offset; /* add to hardware counter value */ + /* + * Control data for the mmap() data buffer. + * + * User-space reading this value should issue an rmb(), on SMP capable + * platforms, after reading this value -- see perf_counter_wakeup(). + */ __u32 data_head; /* head in the data section */ }; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index f70ff80..c95e923 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1316,10 +1316,22 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return err; } -static void __perf_counter_update_userpage(struct perf_counter *counter, - struct perf_mmap_data *data) +/* + * Callers need to ensure there can be no nesting of this function, otherwise + * the seqlock logic goes bad. We can not serialize this because the arch + * code calls this from NMI context. + */ +void perf_counter_update_userpage(struct perf_counter *counter) { - struct perf_counter_mmap_page *userpg = data->user_page; + struct perf_mmap_data *data; + struct perf_counter_mmap_page *userpg; + + rcu_read_lock(); + data = rcu_dereference(counter->data); + if (!data) + goto unlock; + + userpg = data->user_page; /* * Disable preemption so as to not let the corresponding user-space @@ -1333,20 +1345,10 @@ static void __perf_counter_update_userpage(struct perf_counter *counter, if (counter->state == PERF_COUNTER_STATE_ACTIVE) userpg->offset -= atomic64_read(&counter->hw.prev_count); - userpg->data_head = atomic_read(&data->head); smp_wmb(); ++userpg->lock; preempt_enable(); -} - -void perf_counter_update_userpage(struct perf_counter *counter) -{ - struct perf_mmap_data *data; - - rcu_read_lock(); - data = rcu_dereference(counter->data); - if (data) - __perf_counter_update_userpage(counter, data); +unlock: rcu_read_unlock(); } @@ -1547,7 +1549,13 @@ void perf_counter_wakeup(struct perf_counter *counter) data = rcu_dereference(counter->data); if (data) { (void)atomic_xchg(&data->wakeup, POLL_IN); - __perf_counter_update_userpage(counter, data); + /* + * Ensure all data writes are issued before updating the + * user-space data head information. The matching rmb() + * will be in userspace after reading this value. + */ + smp_wmb(); + data->user_page->data_head = atomic_read(&data->head); } rcu_read_unlock(); -- cgit v0.10.2 From 195564390210977954fe4ef45b39cdee34f41b59 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 Mar 2009 19:07:04 +0200 Subject: perf_counter: kerneltop: simplify data_head read Now that the kernel side changed, match up again. Signed-off-by: Peter Zijlstra Acked-by: Paul Mackerras Orig-LKML-Reference: <20090330171023.327144324@chello.nl> Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c index fda1438..2779c57 100644 --- a/Documentation/perf_counter/kerneltop.c +++ b/Documentation/perf_counter/kerneltop.c @@ -1125,22 +1125,10 @@ struct mmap_data { static unsigned int mmap_read_head(struct mmap_data *md) { struct perf_counter_mmap_page *pc = md->base; - unsigned int seq, head; - -repeat: - rmb(); - seq = pc->lock; - - if (unlikely(seq & 1)) { - cpu_relax(); - goto repeat; - } + int head; head = pc->data_head; - rmb(); - if (pc->lock != seq) - goto repeat; return head; } -- cgit v0.10.2 From 0a4a93919bdc5cee48fe4367591e8e0449c1086c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 Mar 2009 19:07:05 +0200 Subject: perf_counter: executable mmap() information Currently the profiling information returns userspace IPs but no way to correlate them to userspace code. Userspace could look into /proc/$pid/maps but that might not be current or even present anymore at the time of analyzing the IPs. Therefore provide means to track the mmap information and provide it in the output stream. XXX: only covers mmap()/munmap(), mremap() and mprotect() are missing. Signed-off-by: Peter Zijlstra Acked-by: Paul Mackerras Cc: Andrew Morton Orig-LKML-Reference: <20090330171023.417259499@chello.nl> Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 8ac1885..037a811 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -137,9 +137,11 @@ struct perf_counter_hw_event { exclude_kernel : 1, /* ditto kernel */ exclude_hv : 1, /* ditto hypervisor */ exclude_idle : 1, /* don't count when idle */ - include_tid : 1, /* include the tid */ + include_tid : 1, /* include the tid */ + mmap : 1, /* include mmap data */ + munmap : 1, /* include munmap data */ - __reserved_1 : 54; + __reserved_1 : 52; __u32 extra_config_len; __u32 __reserved_4; @@ -211,6 +213,9 @@ enum perf_event_type { PERF_EVENT_IP = 0, PERF_EVENT_GROUP = 1, + PERF_EVENT_MMAP = 2, + PERF_EVENT_MUNMAP = 3, + __PERF_EVENT_TID = 0x100, }; @@ -491,6 +496,12 @@ static inline int is_software_counter(struct perf_counter *counter) extern void perf_swcounter_event(u32, u64, int, struct pt_regs *); +extern void perf_counter_mmap(unsigned long addr, unsigned long len, + unsigned long pgoff, struct file *file); + +extern void perf_counter_munmap(unsigned long addr, unsigned long len, + unsigned long pgoff, struct file *file); + #else static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } @@ -511,6 +522,15 @@ static inline int perf_counter_task_enable(void) { return -EINVAL; } static inline void perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs) { } + +static inline void +perf_counter_mmap(unsigned long addr, unsigned long len, + unsigned long pgoff, struct file *file) { } + +static inline void +perf_counter_munmap(unsigned long addr, unsigned long len, + unsigned long pgoff, struct file *file) { } + #endif #endif /* __KERNEL__ */ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index c95e923..f35e89e 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -1844,6 +1845,150 @@ void perf_counter_output(struct perf_counter *counter, } /* + * mmap tracking + */ + +struct perf_mmap_event { + struct file *file; + char *file_name; + int file_size; + + struct { + struct perf_event_header header; + + u32 pid; + u32 tid; + u64 start; + u64 len; + u64 pgoff; + } event; +}; + +static void perf_counter_mmap_output(struct perf_counter *counter, + struct perf_mmap_event *mmap_event) +{ + struct perf_output_handle handle; + int size = mmap_event->event.header.size; + int ret = perf_output_begin(&handle, counter, size); + + if (ret) + return; + + perf_output_put(&handle, mmap_event->event); + perf_output_copy(&handle, mmap_event->file_name, + mmap_event->file_size); + perf_output_end(&handle, 0); +} + +static int perf_counter_mmap_match(struct perf_counter *counter, + struct perf_mmap_event *mmap_event) +{ + if (counter->hw_event.mmap && + mmap_event->event.header.type == PERF_EVENT_MMAP) + return 1; + + if (counter->hw_event.munmap && + mmap_event->event.header.type == PERF_EVENT_MUNMAP) + return 1; + + return 0; +} + +static void perf_counter_mmap_ctx(struct perf_counter_context *ctx, + struct perf_mmap_event *mmap_event) +{ + struct perf_counter *counter; + + if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) + return; + + rcu_read_lock(); + list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { + if (perf_counter_mmap_match(counter, mmap_event)) + perf_counter_mmap_output(counter, mmap_event); + } + rcu_read_unlock(); +} + +static void perf_counter_mmap_event(struct perf_mmap_event *mmap_event) +{ + struct perf_cpu_context *cpuctx; + struct file *file = mmap_event->file; + unsigned int size; + char tmp[16]; + char *buf = NULL; + char *name; + + if (file) { + buf = kzalloc(PATH_MAX, GFP_KERNEL); + if (!buf) { + name = strncpy(tmp, "//enomem", sizeof(tmp)); + goto got_name; + } + name = dentry_path(file->f_dentry, buf, PATH_MAX); + if (IS_ERR(name)) { + name = strncpy(tmp, "//toolong", sizeof(tmp)); + goto got_name; + } + } else { + name = strncpy(tmp, "//anon", sizeof(tmp)); + goto got_name; + } + +got_name: + size = ALIGN(strlen(name), sizeof(u64)); + + mmap_event->file_name = name; + mmap_event->file_size = size; + + mmap_event->event.header.size = sizeof(mmap_event->event) + size; + + cpuctx = &get_cpu_var(perf_cpu_context); + perf_counter_mmap_ctx(&cpuctx->ctx, mmap_event); + put_cpu_var(perf_cpu_context); + + perf_counter_mmap_ctx(¤t->perf_counter_ctx, mmap_event); + + kfree(buf); +} + +void perf_counter_mmap(unsigned long addr, unsigned long len, + unsigned long pgoff, struct file *file) +{ + struct perf_mmap_event mmap_event = { + .file = file, + .event = { + .header = { .type = PERF_EVENT_MMAP, }, + .pid = current->group_leader->pid, + .tid = current->pid, + .start = addr, + .len = len, + .pgoff = pgoff, + }, + }; + + perf_counter_mmap_event(&mmap_event); +} + +void perf_counter_munmap(unsigned long addr, unsigned long len, + unsigned long pgoff, struct file *file) +{ + struct perf_mmap_event mmap_event = { + .file = file, + .event = { + .header = { .type = PERF_EVENT_MUNMAP, }, + .pid = current->group_leader->pid, + .tid = current->pid, + .start = addr, + .len = len, + .pgoff = pgoff, + }, + }; + + perf_counter_mmap_event(&mmap_event); +} + +/* * Generic software counter infrastructure */ diff --git a/mm/mmap.c b/mm/mmap.c index 4a38411..1df63f6 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -1223,6 +1224,9 @@ munmap_back: if (correct_wcount) atomic_inc(&inode->i_writecount); out: + if (vm_flags & VM_EXEC) + perf_counter_mmap(addr, len, pgoff, file); + mm->total_vm += len >> PAGE_SHIFT; vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT); if (vm_flags & VM_LOCKED) { @@ -1756,6 +1760,12 @@ static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma) do { long nrpages = vma_pages(vma); + if (vma->vm_flags & VM_EXEC) { + perf_counter_munmap(vma->vm_start, + nrpages << PAGE_SHIFT, + vma->vm_pgoff, vma->vm_file); + } + mm->total_vm -= nrpages; vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages); vma = remove_vma(vma); -- cgit v0.10.2 From 3c1ba6fafecaed295017881f8863a18602f32c1d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 Mar 2009 19:07:06 +0200 Subject: perf_counter: kerneltop: parse the mmap data stream frob the kerneltop code to print the mmap data in the stream Better use would be collecting the IPs per PID and mapping them onto the provided userspace code.. TODO Signed-off-by: Peter Zijlstra Acked-by: Paul Mackerras Orig-LKML-Reference: <20090330171023.501902515@chello.nl> Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c index 2779c57..995111d 100644 --- a/Documentation/perf_counter/kerneltop.c +++ b/Documentation/perf_counter/kerneltop.c @@ -184,6 +184,8 @@ static int nmi = 1; static int group = 0; static unsigned int page_size; static unsigned int mmap_pages = 16; +static int use_mmap = 0; +static int use_munmap = 0; static char *vmlinux; @@ -333,6 +335,8 @@ static void display_help(void) " -z --zero # zero counts after display\n" " -D --dump_symtab # dump symbol table to stderr on startup\n" " -m pages --mmap_pages= # number of mmap data pages\n" + " -M --mmap_info # print mmap info stream\n" + " -U --munmap_info # print munmap info stream\n" ); exit(0); @@ -1052,9 +1056,11 @@ static void process_options(int argc, char *argv[]) {"stat", no_argument, NULL, 'S'}, {"zero", no_argument, NULL, 'z'}, {"mmap_pages", required_argument, NULL, 'm'}, + {"mmap_info", no_argument, NULL, 'M'}, + {"munmap_info", no_argument, NULL, 'U'}, {NULL, 0, NULL, 0 } }; - int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hn:m:p:s:Sx:z", + int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hn:m:p:s:Sx:zMU", long_options, &option_index); if (c == -1) break; @@ -1092,6 +1098,8 @@ static void process_options(int argc, char *argv[]) case 'x': vmlinux = strdup(optarg); break; case 'z': zero = 1; break; case 'm': mmap_pages = atoi(optarg); break; + case 'M': use_mmap = 1; break; + case 'U': use_munmap = 1; break; default: error = 1; break; } } @@ -1172,12 +1180,29 @@ static void mmap_read(struct mmap_data *md) last_read = this_read; for (; old != head;) { - struct event_struct { + struct ip_event { struct perf_event_header header; __u64 ip; __u32 pid, tid; - } *event = (struct event_struct *)&data[old & md->mask]; - struct event_struct event_copy; + }; + struct mmap_event { + struct perf_event_header header; + __u32 pid, tid; + __u64 start; + __u64 len; + __u64 pgoff; + char filename[PATH_MAX]; + }; + + typedef union event_union { + struct perf_event_header header; + struct ip_event ip; + struct mmap_event mmap; + } event_t; + + event_t *event = (event_t *)&data[old & md->mask]; + + event_t event_copy; unsigned int size = event->header.size; @@ -1187,7 +1212,7 @@ static void mmap_read(struct mmap_data *md) */ if ((old & md->mask) + size != ((old + size) & md->mask)) { unsigned int offset = old; - unsigned int len = sizeof(*event), cpy; + unsigned int len = min(sizeof(*event), size), cpy; void *dst = &event_copy; do { @@ -1206,7 +1231,18 @@ static void mmap_read(struct mmap_data *md) switch (event->header.type) { case PERF_EVENT_IP: case PERF_EVENT_IP | __PERF_EVENT_TID: - process_event(event->ip, md->counter); + process_event(event->ip.ip, md->counter); + break; + + case PERF_EVENT_MMAP: + case PERF_EVENT_MUNMAP: + printf("%s: %Lu %Lu %Lu %s\n", + event->header.type == PERF_EVENT_MMAP + ? "mmap" : "munmap", + event->mmap.start, + event->mmap.len, + event->mmap.pgoff, + event->mmap.filename); break; } } @@ -1255,6 +1291,8 @@ int main(int argc, char *argv[]) hw_event.record_type = PERF_RECORD_IRQ; hw_event.nmi = nmi; hw_event.include_tid = 1; + hw_event.mmap = use_mmap; + hw_event.munmap = use_munmap; fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0); if (fd[i][counter] < 0) { -- cgit v0.10.2 From 7595d63b3a9ce65d14c4fbd0e7de448a343d7215 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 30 Mar 2009 19:07:07 +0200 Subject: perf_counter: powerpc: only reserve PMU hardware when we need it Impact: cooperate with oprofile At present, on PowerPC, if you have perf_counters compiled in, oprofile doesn't work. There is code to allow the PMU to be shared between competing subsystems, such as perf_counters and oprofile, but currently the perf_counter subsystem reserves the PMU for itself at boot time, and never releases it. This makes perf_counter play nicely with oprofile. Now we keep a count of how many perf_counter instances are counting hardware events, and reserve the PMU when that count becomes non-zero, and release the PMU when that count becomes zero. This means that it is possible to have perf_counters compiled in and still use oprofile, as long as there are no hardware perf_counters active. This also means that if oprofile is active, sys_perf_counter_open will fail if the hw_event specifies a hardware event. To avoid races with other tasks creating and destroying perf_counters, we use a mutex. We use atomic_inc_not_zero and atomic_add_unless to avoid having to take the mutex unless there is a possibility of the count going between 0 and 1. Signed-off-by: Paul Mackerras Signed-off-by: Peter Zijlstra Orig-LKML-Reference: <20090330171023.627912475@chello.nl> Signed-off-by: Ingo Molnar diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index cde720f..560dd1e 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -41,6 +41,8 @@ struct power_pmu *ppmu; */ static unsigned int freeze_counters_kernel = MMCR0_FCS; +static void perf_counter_interrupt(struct pt_regs *regs); + void perf_counter_print_debug(void) { } @@ -594,6 +596,24 @@ struct hw_perf_counter_ops power_perf_ops = { .read = power_perf_read }; +/* Number of perf_counters counting hardware events */ +static atomic_t num_counters; +/* Used to avoid races in calling reserve/release_pmc_hardware */ +static DEFINE_MUTEX(pmc_reserve_mutex); + +/* + * Release the PMU if this is the last perf_counter. + */ +static void hw_perf_counter_destroy(struct perf_counter *counter) +{ + if (!atomic_add_unless(&num_counters, -1, 1)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_dec_return(&num_counters) == 0) + release_pmc_hardware(); + mutex_unlock(&pmc_reserve_mutex); + } +} + const struct hw_perf_counter_ops * hw_perf_counter_init(struct perf_counter *counter) { @@ -601,6 +621,7 @@ hw_perf_counter_init(struct perf_counter *counter) struct perf_counter *ctrs[MAX_HWCOUNTERS]; unsigned int events[MAX_HWCOUNTERS]; int n; + int err; if (!ppmu) return NULL; @@ -646,6 +667,27 @@ hw_perf_counter_init(struct perf_counter *counter) counter->hw.config = events[n]; atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period); + + /* + * See if we need to reserve the PMU. + * If no counters are currently in use, then we have to take a + * mutex to ensure that we don't race with another task doing + * reserve_pmc_hardware or release_pmc_hardware. + */ + err = 0; + if (!atomic_inc_not_zero(&num_counters)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_read(&num_counters) == 0 && + reserve_pmc_hardware(perf_counter_interrupt)) + err = -EBUSY; + else + atomic_inc(&num_counters); + mutex_unlock(&pmc_reserve_mutex); + } + counter->destroy = hw_perf_counter_destroy; + + if (err) + return NULL; return &power_perf_ops; } @@ -769,11 +811,6 @@ static int init_perf_counters(void) { unsigned long pvr; - if (reserve_pmc_hardware(perf_counter_interrupt)) { - printk(KERN_ERR "Couldn't init performance monitor subsystem\n"); - return -EBUSY; - } - /* XXX should get this from cputable */ pvr = mfspr(SPRN_PVR); switch (PVR_VER(pvr)) { -- cgit v0.10.2 From d5d2bc0dd0379deddb9ede66fec90a3083eaec57 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 30 Mar 2009 19:07:08 +0200 Subject: perf_counter: make it possible for hw_perf_counter_init to return error codes Impact: better error reporting At present, if hw_perf_counter_init encounters an error, all it can do is return NULL, which causes sys_perf_counter_open to return an EINVAL error to userspace. This isn't very informative for userspace; it means that userspace can't tell the difference between "sorry, oprofile is already using the PMU" and "we don't support this CPU" and "this CPU doesn't support the requested generic hardware event". This commit uses the PTR_ERR/ERR_PTR/IS_ERR set of macros to let hw_perf_counter_init return an error code on error rather than just NULL if it wishes. If it does so, that error code will be returned from sys_perf_counter_open to userspace. If it returns NULL, an EINVAL error will be returned to userspace, as before. This also adapts the powerpc hw_perf_counter_init to make use of this to return ENXIO, EINVAL, EBUSY, or EOPNOTSUPP as appropriate. It would be good to add extra error numbers in future to allow userspace to distinguish the various errors that are currently reported as EINVAL, i.e. irq_period < 0, too many events in a group, conflict between exclude_* settings in a group, and PMU resource conflict in a group. [ v2: fix a bug pointed out by Corey Ashford where error returns from hw_perf_counter_init were not handled correctly in the case of raw hardware events.] Signed-off-by: Paul Mackerras Signed-off-by: Peter Zijlstra Orig-LKML-Reference: <20090330171023.682428180@chello.nl> Signed-off-by: Ingo Molnar diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 560dd1e..0a4d14f 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -624,13 +624,13 @@ hw_perf_counter_init(struct perf_counter *counter) int err; if (!ppmu) - return NULL; + return ERR_PTR(-ENXIO); if ((s64)counter->hw_event.irq_period < 0) - return NULL; + return ERR_PTR(-EINVAL); if (!perf_event_raw(&counter->hw_event)) { ev = perf_event_id(&counter->hw_event); if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) - return NULL; + return ERR_PTR(-EOPNOTSUPP); ev = ppmu->generic_events[ev]; } else { ev = perf_event_config(&counter->hw_event); @@ -656,14 +656,14 @@ hw_perf_counter_init(struct perf_counter *counter) n = collect_events(counter->group_leader, ppmu->n_counter - 1, ctrs, events); if (n < 0) - return NULL; + return ERR_PTR(-EINVAL); } events[n] = ev; ctrs[n] = counter; if (check_excludes(ctrs, n, 1)) - return NULL; + return ERR_PTR(-EINVAL); if (power_check_constraints(events, n + 1)) - return NULL; + return ERR_PTR(-EINVAL); counter->hw.config = events[n]; atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period); @@ -687,7 +687,7 @@ hw_perf_counter_init(struct perf_counter *counter) counter->destroy = hw_perf_counter_destroy; if (err) - return NULL; + return ERR_PTR(err); return &power_perf_ops; } diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index f35e89e..d07b452 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -2453,10 +2453,11 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, { const struct hw_perf_counter_ops *hw_ops; struct perf_counter *counter; + long err; counter = kzalloc(sizeof(*counter), gfpflags); if (!counter) - return NULL; + return ERR_PTR(-ENOMEM); /* * Single counters are their own group leaders, with an @@ -2505,12 +2506,18 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, hw_ops = tp_perf_counter_init(counter); break; } +done: + err = 0; + if (!hw_ops) + err = -EINVAL; + else if (IS_ERR(hw_ops)) + err = PTR_ERR(hw_ops); - if (!hw_ops) { + if (err) { kfree(counter); - return NULL; + return ERR_PTR(err); } -done: + counter->hw_ops = hw_ops; return counter; @@ -2583,10 +2590,10 @@ SYSCALL_DEFINE5(perf_counter_open, goto err_put_context; } - ret = -EINVAL; counter = perf_counter_alloc(&hw_event, cpu, ctx, group_leader, GFP_KERNEL); - if (!counter) + ret = PTR_ERR(counter); + if (IS_ERR(counter)) goto err_put_context; ret = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0); @@ -2658,8 +2665,8 @@ inherit_counter(struct perf_counter *parent_counter, child_counter = perf_counter_alloc(&parent_counter->hw_event, parent_counter->cpu, child_ctx, group_leader, GFP_KERNEL); - if (!child_counter) - return NULL; + if (IS_ERR(child_counter)) + return child_counter; /* * Link it up in the child's context: @@ -2710,15 +2717,17 @@ static int inherit_group(struct perf_counter *parent_counter, { struct perf_counter *leader; struct perf_counter *sub; + struct perf_counter *child_ctr; leader = inherit_counter(parent_counter, parent, parent_ctx, child, NULL, child_ctx); - if (!leader) - return -ENOMEM; + if (IS_ERR(leader)) + return PTR_ERR(leader); list_for_each_entry(sub, &parent_counter->sibling_list, list_entry) { - if (!inherit_counter(sub, parent, parent_ctx, - child, leader, child_ctx)) - return -ENOMEM; + child_ctr = inherit_counter(sub, parent, parent_ctx, + child, leader, child_ctx); + if (IS_ERR(child_ctr)) + return PTR_ERR(child_ctr); } return 0; } -- cgit v0.10.2 From 9ea98e191255ee642e64a5745014424fc63f83b0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 Mar 2009 19:07:09 +0200 Subject: perf_counter: x86: proper error propagation for the x86 hw_perf_counter_init() Now that Paul cleaned up the error propagation paths, pass down the x86 error as well. Signed-off-by: Peter Zijlstra Acked-by: Paul Mackerras Orig-LKML-Reference: <20090330171023.792822360@chello.nl> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 7aab177..b8885cc 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -954,7 +954,7 @@ hw_perf_counter_init(struct perf_counter *counter) err = __hw_perf_counter_init(counter); if (err) - return NULL; + return ERR_PTR(err); return &x86_perf_counter_ops; } -- cgit v0.10.2 From 31f004df8d14212f0a8a2fb12a8ed44a3d80e2fb Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 30 Mar 2009 19:07:10 +0200 Subject: perf_counter tools: optionally scale counter values in perfstat mode Impact: new functionality This adds add an option to the perfstat mode of kerneltop to scale the reported counter values according to the fraction of time that each counter gets to count. This is invoked with the -l option (I used 'l' because s, c, a and e were all taken already.) This uses the new PERF_RECORD_TOTAL_TIME_{ENABLED,RUNNING} read format options. With this, we get output like this: $ ./perfstat -l -e 0:0,0:1,0:2,0:3,0:4,0:5 ./spin Performance counter stats for './spin': 4016072055 CPU cycles (events) (scaled from 66.53%) 2005887318 instructions (events) (scaled from 66.53%) 1762849 cache references (events) (scaled from 66.69%) 165229 cache misses (events) (scaled from 66.85%) 1001298009 branches (events) (scaled from 66.78%) 41566 branch misses (events) (scaled from 66.61%) Wall-clock time elapsed: 2438.227446 msecs This also lets us detect when a counter is zero because the counter never got to go on the CPU at all. In that case we print rather than 0. Signed-off-by: Paul Mackerras Signed-off-by: Peter Zijlstra Orig-LKML-Reference: <20090330171023.871484899@chello.nl> Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c index 995111d..c0ca015 100644 --- a/Documentation/perf_counter/kerneltop.c +++ b/Documentation/perf_counter/kerneltop.c @@ -197,6 +197,8 @@ static int delay_secs = 2; static int zero; static int dump_symtab; +static int scale; + struct source_line { uint64_t EIP; unsigned long count; @@ -305,6 +307,7 @@ static void display_perfstat_help(void) display_events_help(); printf( + " -l # scale counter values\n" " -a # system-wide collection\n"); exit(0); } @@ -328,6 +331,7 @@ static void display_help(void) " -c CNT --count=CNT # event period to sample\n\n" " -C CPU --cpu=CPU # CPU (-1 for all) [default: -1]\n" " -p PID --pid=PID # PID of sampled task (-1 for all) [default: -1]\n\n" + " -l # show scale factor for RR events\n" " -d delay --delay= # sampling/display delay [default: 2]\n" " -f CNT --filter=CNT # min-event-count filter [default: 100]\n\n" " -s symbol --symbol= # function to be showed annotated one-shot\n" @@ -436,6 +440,9 @@ static void create_perfstat_counter(int counter) hw_event.config = event_id[counter]; hw_event.record_type = PERF_RECORD_SIMPLE; hw_event.nmi = 0; + if (scale) + hw_event.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING; if (system_wide) { int cpu; @@ -507,28 +514,53 @@ int do_perfstat(int argc, char *argv[]) fprintf(stderr, "\n"); for (counter = 0; counter < nr_counters; counter++) { - int cpu; - __u64 count, single_count; + int cpu, nv; + __u64 count[3], single_count[3]; + int scaled; - count = 0; + count[0] = count[1] = count[2] = 0; + nv = scale ? 3 : 1; for (cpu = 0; cpu < nr_cpus; cpu ++) { res = read(fd[cpu][counter], - (char *) &single_count, sizeof(single_count)); - assert(res == sizeof(single_count)); - count += single_count; + single_count, nv * sizeof(__u64)); + assert(res == nv * sizeof(__u64)); + + count[0] += single_count[0]; + if (scale) { + count[1] += single_count[1]; + count[2] += single_count[2]; + } + } + + scaled = 0; + if (scale) { + if (count[2] == 0) { + fprintf(stderr, " %14s %-20s\n", + "", event_name(counter)); + continue; + } + if (count[2] < count[1]) { + scaled = 1; + count[0] = (unsigned long long) + ((double)count[0] * count[1] / count[2] + 0.5); + } } if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK) || event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) { - double msecs = (double)count / 1000000; + double msecs = (double)count[0] / 1000000; - fprintf(stderr, " %14.6f %-20s (msecs)\n", + fprintf(stderr, " %14.6f %-20s (msecs)", msecs, event_name(counter)); } else { - fprintf(stderr, " %14Ld %-20s (events)\n", - count, event_name(counter)); + fprintf(stderr, " %14Ld %-20s (events)", + count[0], event_name(counter)); } + if (scaled) + fprintf(stderr, " (scaled from %.2f%%)", + (double) count[2] / count[1] * 100); + fprintf(stderr, "\n"); } fprintf(stderr, "\n"); fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n", @@ -1049,6 +1081,7 @@ static void process_options(int argc, char *argv[]) {"filter", required_argument, NULL, 'f'}, {"group", required_argument, NULL, 'g'}, {"help", no_argument, NULL, 'h'}, + {"scale", no_argument, NULL, 'l'}, {"nmi", required_argument, NULL, 'n'}, {"pid", required_argument, NULL, 'p'}, {"vmlinux", required_argument, NULL, 'x'}, @@ -1060,7 +1093,7 @@ static void process_options(int argc, char *argv[]) {"munmap_info", no_argument, NULL, 'U'}, {NULL, 0, NULL, 0 } }; - int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hn:m:p:s:Sx:zMU", + int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:s:Sx:zMU", long_options, &option_index); if (c == -1) break; @@ -1084,6 +1117,7 @@ static void process_options(int argc, char *argv[]) case 'f': count_filter = atoi(optarg); break; case 'g': group = atoi(optarg); break; case 'h': display_help(); break; + case 'l': scale = 1; break; case 'n': nmi = atoi(optarg); break; case 'p': /* CPU and PID are mutually exclusive */ -- cgit v0.10.2 From 78d613eb129fc4edf0e2cabfcc6a4c5285482d21 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 Mar 2009 19:07:11 +0200 Subject: perf_counter: small cleanup of the output routines Move the nmi argument to the _begin() function, so that _end() only needs the handle. This allows the _begin() function to generate a wakeup on event loss. Signed-off-by: Peter Zijlstra Acked-by: Paul Mackerras Orig-LKML-Reference: <20090330171023.959404268@chello.nl> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index d07b452..4471e7e 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1663,10 +1663,20 @@ struct perf_output_handle { unsigned int offset; unsigned int head; int wakeup; + int nmi; }; +static inline void __perf_output_wakeup(struct perf_output_handle *handle) +{ + if (handle->nmi) + perf_pending_queue(handle->counter); + else + perf_counter_wakeup(handle->counter); +} + static int perf_output_begin(struct perf_output_handle *handle, - struct perf_counter *counter, unsigned int size) + struct perf_counter *counter, unsigned int size, + int nmi) { struct perf_mmap_data *data; unsigned int offset, head; @@ -1676,15 +1686,17 @@ static int perf_output_begin(struct perf_output_handle *handle, if (!data) goto out; + handle->counter = counter; + handle->nmi = nmi; + if (!data->nr_pages) - goto out; + goto fail; do { offset = head = atomic_read(&data->head); head += size; } while (atomic_cmpxchg(&data->head, offset, head) != offset); - handle->counter = counter; handle->data = data; handle->offset = offset; handle->head = head; @@ -1692,6 +1704,8 @@ static int perf_output_begin(struct perf_output_handle *handle, return 0; +fail: + __perf_output_wakeup(handle); out: rcu_read_unlock(); @@ -1733,14 +1747,10 @@ static void perf_output_copy(struct perf_output_handle *handle, #define perf_output_put(handle, x) \ perf_output_copy((handle), &(x), sizeof(x)) -static void perf_output_end(struct perf_output_handle *handle, int nmi) +static void perf_output_end(struct perf_output_handle *handle) { - if (handle->wakeup) { - if (nmi) - perf_pending_queue(handle->counter); - else - perf_counter_wakeup(handle->counter); - } + if (handle->wakeup) + __perf_output_wakeup(handle); rcu_read_unlock(); } @@ -1750,12 +1760,12 @@ static int perf_output_write(struct perf_counter *counter, int nmi, struct perf_output_handle handle; int ret; - ret = perf_output_begin(&handle, counter, size); + ret = perf_output_begin(&handle, counter, size, nmi); if (ret) goto out; perf_output_copy(&handle, buf, size); - perf_output_end(&handle, nmi); + perf_output_end(&handle); out: return ret; @@ -1804,7 +1814,7 @@ static void perf_output_group(struct perf_counter *counter, int nmi) size = sizeof(header) + counter->nr_siblings * sizeof(entry); - ret = perf_output_begin(&handle, counter, size); + ret = perf_output_begin(&handle, counter, size, nmi); if (ret) return; @@ -1824,7 +1834,7 @@ static void perf_output_group(struct perf_counter *counter, int nmi) perf_output_put(&handle, entry); } - perf_output_end(&handle, nmi); + perf_output_end(&handle); } void perf_counter_output(struct perf_counter *counter, @@ -1869,7 +1879,7 @@ static void perf_counter_mmap_output(struct perf_counter *counter, { struct perf_output_handle handle; int size = mmap_event->event.header.size; - int ret = perf_output_begin(&handle, counter, size); + int ret = perf_output_begin(&handle, counter, size, 0); if (ret) return; @@ -1877,7 +1887,7 @@ static void perf_counter_mmap_output(struct perf_counter *counter, perf_output_put(&handle, mmap_event->event); perf_output_copy(&handle, mmap_event->file_name, mmap_event->file_size); - perf_output_end(&handle, 0); + perf_output_end(&handle); } static int perf_counter_mmap_match(struct perf_counter *counter, -- cgit v0.10.2 From 5ed00415e304203a0a9dcaef226d6d3f1106070e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 Mar 2009 19:07:12 +0200 Subject: perf_counter: re-arrange the perf_event_type Breaks ABI yet again :-) Change the event type so that [0, 2^31-1] are regular event types, but [2^31, 2^32-1] forms a bitmask for overflow events. Signed-off-by: Peter Zijlstra Acked-by: Paul Mackerras Orig-LKML-Reference: <20090330171024.047961770@chello.nl> Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 037a811..edf5bfb 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -210,13 +210,15 @@ struct perf_event_header { }; enum perf_event_type { - PERF_EVENT_IP = 0, + PERF_EVENT_GROUP = 1, PERF_EVENT_MMAP = 2, PERF_EVENT_MUNMAP = 3, - __PERF_EVENT_TID = 0x100, + PERF_EVENT_OVERFLOW = 1UL << 31, + __PERF_EVENT_IP = 1UL << 30, + __PERF_EVENT_TID = 1UL << 29, }; #ifdef __KERNEL__ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 4471e7e..d93e9dd 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1754,50 +1754,44 @@ static void perf_output_end(struct perf_output_handle *handle) rcu_read_unlock(); } -static int perf_output_write(struct perf_counter *counter, int nmi, - void *buf, ssize_t size) -{ - struct perf_output_handle handle; - int ret; - - ret = perf_output_begin(&handle, counter, size, nmi); - if (ret) - goto out; - - perf_output_copy(&handle, buf, size); - perf_output_end(&handle); - -out: - return ret; -} - static void perf_output_simple(struct perf_counter *counter, int nmi, struct pt_regs *regs) { - unsigned int size; + int ret; + struct perf_output_handle handle; + struct perf_event_header header; + u64 ip; struct { - struct perf_event_header header; - u64 ip; u32 pid, tid; - } event; + } tid_entry; - event.header.type = PERF_EVENT_IP; - event.ip = instruction_pointer(regs); + header.type = PERF_EVENT_OVERFLOW; + header.size = sizeof(header); - size = sizeof(event); + ip = instruction_pointer(regs); + header.type |= __PERF_EVENT_IP; + header.size += sizeof(ip); if (counter->hw_event.include_tid) { /* namespace issues */ - event.pid = current->group_leader->pid; - event.tid = current->pid; + tid_entry.pid = current->group_leader->pid; + tid_entry.tid = current->pid; - event.header.type |= __PERF_EVENT_TID; - } else - size -= sizeof(u64); + header.type |= __PERF_EVENT_TID; + header.size += sizeof(tid_entry); + } - event.header.size = size; + ret = perf_output_begin(&handle, counter, header.size, nmi); + if (ret) + return; + + perf_output_put(&handle, header); + perf_output_put(&handle, ip); + + if (counter->hw_event.include_tid) + perf_output_put(&handle, tid_entry); - perf_output_write(counter, nmi, &event, size); + perf_output_end(&handle); } static void perf_output_group(struct perf_counter *counter, int nmi) -- cgit v0.10.2 From 023c54c42288416b4f43c67bfd5049a76926fad6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 Mar 2009 19:07:13 +0200 Subject: perf_counter tools: kerneltop: update event_types Go along with the new perf_event_type ABI. Signed-off-by: Peter Zijlstra Acked-by: Paul Mackerras Orig-LKML-Reference: <20090330171024.133985461@chello.nl> Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c index c0ca015..430810d 100644 --- a/Documentation/perf_counter/kerneltop.c +++ b/Documentation/perf_counter/kerneltop.c @@ -1263,8 +1263,8 @@ static void mmap_read(struct mmap_data *md) old += size; switch (event->header.type) { - case PERF_EVENT_IP: - case PERF_EVENT_IP | __PERF_EVENT_TID: + case PERF_EVENT_OVERFLOW | __PERF_EVENT_IP: + case PERF_EVENT_OVERFLOW | __PERF_EVENT_IP | __PERF_EVENT_TID: process_event(event->ip.ip, md->counter); break; -- cgit v0.10.2 From 394ee07623cf556c8daae2b3c00cf5fea47f0811 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 Mar 2009 19:07:14 +0200 Subject: perf_counter: provide generic callchain bits Provide the generic callchain support bits. If hw_event->callchain is set the arch specific perf_callchain() function is called upon to provide a perf_callchain_entry structure filled with the current callchain. If it does so, it is added to the overflow output event. Signed-off-by: Peter Zijlstra Acked-by: Paul Mackerras Orig-LKML-Reference: <20090330171024.254266860@chello.nl> Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index edf5bfb..43083af 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -140,8 +140,9 @@ struct perf_counter_hw_event { include_tid : 1, /* include the tid */ mmap : 1, /* include mmap data */ munmap : 1, /* include munmap data */ + callchain : 1, /* add callchain data */ - __reserved_1 : 52; + __reserved_1 : 51; __u32 extra_config_len; __u32 __reserved_4; @@ -219,6 +220,7 @@ enum perf_event_type { PERF_EVENT_OVERFLOW = 1UL << 31, __PERF_EVENT_IP = 1UL << 30, __PERF_EVENT_TID = 1UL << 29, + __PERF_EVENT_CALLCHAIN = 1UL << 28, }; #ifdef __KERNEL__ @@ -504,6 +506,15 @@ extern void perf_counter_mmap(unsigned long addr, unsigned long len, extern void perf_counter_munmap(unsigned long addr, unsigned long len, unsigned long pgoff, struct file *file); +#define MAX_STACK_DEPTH 255 + +struct perf_callchain_entry { + u64 nr; + u64 ip[MAX_STACK_DEPTH]; +}; + +extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); + #else static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index d93e9dd..860cdc2 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1654,6 +1654,17 @@ void perf_counter_do_pending(void) } /* + * Callchain support -- arch specific + */ + +struct perf_callchain_entry * +__attribute__((weak)) +perf_callchain(struct pt_regs *regs) +{ + return NULL; +} + +/* * Output */ @@ -1764,6 +1775,8 @@ static void perf_output_simple(struct perf_counter *counter, struct { u32 pid, tid; } tid_entry; + struct perf_callchain_entry *callchain = NULL; + int callchain_size = 0; header.type = PERF_EVENT_OVERFLOW; header.size = sizeof(header); @@ -1781,6 +1794,17 @@ static void perf_output_simple(struct perf_counter *counter, header.size += sizeof(tid_entry); } + if (counter->hw_event.callchain) { + callchain = perf_callchain(regs); + + if (callchain) { + callchain_size = (1 + callchain->nr) * sizeof(u64); + + header.type |= __PERF_EVENT_CALLCHAIN; + header.size += callchain_size; + } + } + ret = perf_output_begin(&handle, counter, header.size, nmi); if (ret) return; @@ -1791,6 +1815,9 @@ static void perf_output_simple(struct perf_counter *counter, if (counter->hw_event.include_tid) perf_output_put(&handle, tid_entry); + if (callchain) + perf_output_copy(&handle, callchain, callchain_size); + perf_output_end(&handle); } -- cgit v0.10.2 From d7d59fb323833682b117b528d77eeb8ef587036a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 Mar 2009 19:07:15 +0200 Subject: perf_counter: x86: callchain support Provide the x86 perf_callchain() implementation. Code based on the ftrace/sysprof code from Soeren Sandmann Pedersen. Signed-off-by: Peter Zijlstra Acked-by: Paul Mackerras Cc: Soeren Sandmann Pedersen Cc: Frederic Weisbecker Cc: Steven Rostedt Orig-LKML-Reference: <20090330171024.341993293@chello.nl> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index b8885cc..e16dfaf 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -16,8 +16,10 @@ #include #include #include +#include #include +#include static bool perf_counters_initialized __read_mostly; @@ -958,3 +960,155 @@ hw_perf_counter_init(struct perf_counter *counter) return &x86_perf_counter_ops; } + +/* + * callchain support + */ + +static inline +void callchain_store(struct perf_callchain_entry *entry, unsigned long ip) +{ + if (entry->nr < MAX_STACK_DEPTH) + entry->ip[entry->nr++] = ip; +} + +static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry); +static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry); + + +static void +backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) +{ + /* Ignore warnings */ +} + +static void backtrace_warning(void *data, char *msg) +{ + /* Ignore warnings */ +} + +static int backtrace_stack(void *data, char *name) +{ + /* Don't bother with IRQ stacks for now */ + return -1; +} + +static void backtrace_address(void *data, unsigned long addr, int reliable) +{ + struct perf_callchain_entry *entry = data; + + if (reliable) + callchain_store(entry, addr); +} + +static const struct stacktrace_ops backtrace_ops = { + .warning = backtrace_warning, + .warning_symbol = backtrace_warning_symbol, + .stack = backtrace_stack, + .address = backtrace_address, +}; + +static void +perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) +{ + unsigned long bp; + char *stack; + + callchain_store(entry, instruction_pointer(regs)); + + stack = ((char *)regs + sizeof(struct pt_regs)); +#ifdef CONFIG_FRAME_POINTER + bp = frame_pointer(regs); +#else + bp = 0; +#endif + + dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, entry); +} + + +struct stack_frame { + const void __user *next_fp; + unsigned long return_address; +}; + +static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) +{ + int ret; + + if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) + return 0; + + ret = 1; + pagefault_disable(); + if (__copy_from_user_inatomic(frame, fp, sizeof(*frame))) + ret = 0; + pagefault_enable(); + + return ret; +} + +static void +perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) +{ + struct stack_frame frame; + const void __user *fp; + + regs = (struct pt_regs *)current->thread.sp0 - 1; + fp = (void __user *)regs->bp; + + callchain_store(entry, regs->ip); + + while (entry->nr < MAX_STACK_DEPTH) { + frame.next_fp = NULL; + frame.return_address = 0; + + if (!copy_stack_frame(fp, &frame)) + break; + + if ((unsigned long)fp < user_stack_pointer(regs)) + break; + + callchain_store(entry, frame.return_address); + fp = frame.next_fp; + } +} + +static void +perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) +{ + int is_user; + + if (!regs) + return; + + is_user = user_mode(regs); + + if (!current || current->pid == 0) + return; + + if (is_user && current->state != TASK_RUNNING) + return; + + if (!is_user) + perf_callchain_kernel(regs, entry); + + if (current->mm) + perf_callchain_user(regs, entry); +} + +struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) +{ + struct perf_callchain_entry *entry; + + if (in_nmi()) + entry = &__get_cpu_var(nmi_entry); + else + entry = &__get_cpu_var(irq_entry); + + entry->nr = 0; + + perf_do_callchain(regs, entry); + + return entry; +} -- cgit v0.10.2 From 4e935e47177c3b26cf383e79849bae2a464d0160 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 Mar 2009 19:07:16 +0200 Subject: perf_counter: pmc arbitration Follow the example set by powerpc and try to play nice with oprofile and the nmi watchdog. Signed-off-by: Peter Zijlstra Acked-by: Paul Mackerras Orig-LKML-Reference: <20090330171024.459968444@chello.nl> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index e16dfaf..2a946a1 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -20,6 +20,7 @@ #include #include +#include static bool perf_counters_initialized __read_mostly; @@ -172,6 +173,65 @@ again: atomic64_sub(delta, &hwc->period_left); } +static atomic_t num_counters; +static DEFINE_MUTEX(pmc_reserve_mutex); + +static bool reserve_pmc_hardware(void) +{ + int i; + + if (nmi_watchdog == NMI_LOCAL_APIC) + disable_lapic_nmi_watchdog(); + + for (i = 0; i < nr_counters_generic; i++) { + if (!reserve_perfctr_nmi(pmc_ops->perfctr + i)) + goto perfctr_fail; + } + + for (i = 0; i < nr_counters_generic; i++) { + if (!reserve_evntsel_nmi(pmc_ops->eventsel + i)) + goto eventsel_fail; + } + + return true; + +eventsel_fail: + for (i--; i >= 0; i--) + release_evntsel_nmi(pmc_ops->eventsel + i); + + i = nr_counters_generic; + +perfctr_fail: + for (i--; i >= 0; i--) + release_perfctr_nmi(pmc_ops->perfctr + i); + + if (nmi_watchdog == NMI_LOCAL_APIC) + enable_lapic_nmi_watchdog(); + + return false; +} + +static void release_pmc_hardware(void) +{ + int i; + + for (i = 0; i < nr_counters_generic; i++) { + release_perfctr_nmi(pmc_ops->perfctr + i); + release_evntsel_nmi(pmc_ops->eventsel + i); + } + + if (nmi_watchdog == NMI_LOCAL_APIC) + enable_lapic_nmi_watchdog(); +} + +static void hw_perf_counter_destroy(struct perf_counter *counter) +{ + if (atomic_dec_and_mutex_lock(&num_counters, &pmc_reserve_mutex)) { + release_pmc_hardware(); + mutex_unlock(&pmc_reserve_mutex); + } +} + /* * Setup the hardware configuration for a given hw_event_type */ @@ -179,10 +239,23 @@ static int __hw_perf_counter_init(struct perf_counter *counter) { struct perf_counter_hw_event *hw_event = &counter->hw_event; struct hw_perf_counter *hwc = &counter->hw; + int err; if (unlikely(!perf_counters_initialized)) return -EINVAL; + err = 0; + if (atomic_inc_not_zero(&num_counters)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_read(&num_counters) == 0 && !reserve_pmc_hardware()) + err = -EBUSY; + else + atomic_inc(&num_counters); + mutex_unlock(&pmc_reserve_mutex); + } + if (err) + return err; + /* * Generate PMC IRQs: * (keep 'enabled' bit clear for now) @@ -230,6 +303,8 @@ static int __hw_perf_counter_init(struct perf_counter *counter) hwc->config |= pmc_ops->event_map(perf_event_id(hw_event)); } + counter->destroy = hw_perf_counter_destroy; + return 0; } -- cgit v0.10.2 From 9dd499889bdb12ac0e412ccdd718fe0d348258f2 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Fri, 27 Mar 2009 12:13:43 +0100 Subject: perf_counter tools: kerneltop: add real-time data acquisition thread Decouple kerneltop display from event acquisition by introducing a separate data acquisition thread. This fixes annnoying kerneltop display refresh jitter and missed events. Also add a -r option, to switch the data acquisition thread to real-time priority. Signed-off-by: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Orig-LKML-Reference: Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c index 430810d..33b4fcf 100644 --- a/Documentation/perf_counter/kerneltop.c +++ b/Documentation/perf_counter/kerneltop.c @@ -77,6 +77,8 @@ #include #include #include +#include +#include #include #include @@ -181,6 +183,7 @@ static int tid = -1; static int profile_cpu = -1; static int nr_cpus = 0; static int nmi = 1; +static unsigned int realtime_prio = 0; static int group = 0; static unsigned int page_size; static unsigned int mmap_pages = 16; @@ -334,6 +337,7 @@ static void display_help(void) " -l # show scale factor for RR events\n" " -d delay --delay= # sampling/display delay [default: 2]\n" " -f CNT --filter=CNT # min-event-count filter [default: 100]\n\n" + " -r prio --realtime= # event acquisition runs with SCHED_FIFO policy\n" " -s symbol --symbol= # function to be showed annotated one-shot\n" " -x path --vmlinux= # the vmlinux binary, required for -s use\n" " -z --zero # zero counts after display\n" @@ -620,7 +624,6 @@ static int compare(const void *__sym1, const void *__sym2) return sym_weight(sym1) < sym_weight(sym2); } -static time_t last_refresh; static long events; static long userspace_events; static const char CONSOLE_CLEAR[] = ""; @@ -634,6 +637,7 @@ static void print_sym_table(void) float events_per_sec = events/delay_secs; float kevents_per_sec = (events-userspace_events)/delay_secs; + events = userspace_events = 0; memcpy(tmp, sym_table, sizeof(sym_table[0])*sym_table_count); qsort(tmp, sym_table_count, sizeof(tmp[0]), compare); @@ -714,8 +718,6 @@ static void print_sym_table(void) if (sym_filter_entry) show_details(sym_filter_entry); - last_refresh = time(NULL); - { struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; @@ -726,6 +728,16 @@ static void print_sym_table(void) } } +static void *display_thread(void *arg) +{ + printf("KernelTop refresh period: %d seconds\n", delay_secs); + + while (!sleep(delay_secs)) + print_sym_table(); + + return NULL; +} + static int read_symbol(FILE *in, struct sym_entry *s) { static int filter_match = 0; @@ -1081,19 +1093,20 @@ static void process_options(int argc, char *argv[]) {"filter", required_argument, NULL, 'f'}, {"group", required_argument, NULL, 'g'}, {"help", no_argument, NULL, 'h'}, - {"scale", no_argument, NULL, 'l'}, {"nmi", required_argument, NULL, 'n'}, + {"mmap_info", no_argument, NULL, 'M'}, + {"mmap_pages", required_argument, NULL, 'm'}, + {"munmap_info", no_argument, NULL, 'U'}, {"pid", required_argument, NULL, 'p'}, - {"vmlinux", required_argument, NULL, 'x'}, + {"realtime", required_argument, NULL, 'r'}, + {"scale", no_argument, NULL, 'l'}, {"symbol", required_argument, NULL, 's'}, {"stat", no_argument, NULL, 'S'}, + {"vmlinux", required_argument, NULL, 'x'}, {"zero", no_argument, NULL, 'z'}, - {"mmap_pages", required_argument, NULL, 'm'}, - {"mmap_info", no_argument, NULL, 'M'}, - {"munmap_info", no_argument, NULL, 'U'}, {NULL, 0, NULL, 0 } }; - int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:s:Sx:zMU", + int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMU", long_options, &option_index); if (c == -1) break; @@ -1127,6 +1140,7 @@ static void process_options(int argc, char *argv[]) profile_cpu = -1; } tid = atoi(optarg); break; + case 'r': realtime_prio = atoi(optarg); break; case 's': sym_filter = strdup(optarg); break; case 'S': run_perfstat = 1; break; case 'x': vmlinux = strdup(optarg); break; @@ -1289,6 +1303,7 @@ int main(int argc, char *argv[]) struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; struct perf_counter_hw_event hw_event; + pthread_t thread; int i, counter, group_fd, nr_poll = 0; unsigned int cpu; int ret; @@ -1363,8 +1378,20 @@ int main(int argc, char *argv[]) } } - printf("KernelTop refresh period: %d seconds\n", delay_secs); - last_refresh = time(NULL); + if (pthread_create(&thread, NULL, display_thread, NULL)) { + printf("Could not create display thread.\n"); + exit(-1); + } + + if (realtime_prio) { + struct sched_param param; + + param.sched_priority = realtime_prio; + if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { + printf("Could not set realtime priority.\n"); + exit(-1); + } + } while (1) { int hits = events; @@ -1374,14 +1401,8 @@ int main(int argc, char *argv[]) mmap_read(&mmap_array[i][counter]); } - if (time(NULL) >= last_refresh + delay_secs) { - print_sym_table(); - events = userspace_events = 0; - } - if (hits == events) - ret = poll(event_array, nr_poll, 1000); - hits = events; + ret = poll(event_array, nr_poll, 100); } return 0; -- cgit v0.10.2 From 8a057d84912f36e53f970c4d177cb4bb6b2f9e08 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 2 Apr 2009 11:11:59 +0200 Subject: perf_counter: move the event overflow output bits to record_type Per suggestion from Paul, move the event overflow bits to record_type and sanitize the enums a bit. Breaks the ABI -- again ;-) Suggested-by: Paul Mackerras Signed-off-by: Peter Zijlstra Cc: Corey Ashford Orig-LKML-Reference: <20090402091319.151921176@chello.nl> Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 43083af..06a6fba 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -73,15 +73,6 @@ enum sw_event_ids { PERF_SW_EVENTS_MAX = 7, }; -/* - * IRQ-notification data record type: - */ -enum perf_counter_record_type { - PERF_RECORD_SIMPLE = 0, - PERF_RECORD_IRQ = 1, - PERF_RECORD_GROUP = 2, -}; - #define __PERF_COUNTER_MASK(name) \ (((1ULL << PERF_COUNTER_##name##_BITS) - 1) << \ PERF_COUNTER_##name##_SHIFT) @@ -103,6 +94,17 @@ enum perf_counter_record_type { #define PERF_COUNTER_EVENT_MASK __PERF_COUNTER_MASK(EVENT) /* + * Bits that can be set in hw_event.record_type to request information + * in the overflow packets. + */ +enum perf_counter_record_format { + PERF_RECORD_IP = 1U << 0, + PERF_RECORD_TID = 1U << 1, + PERF_RECORD_GROUP = 1U << 2, + PERF_RECORD_CALLCHAIN = 1U << 3, +}; + +/* * Bits that can be set in hw_event.read_format to request that * reads on the counter should return the indicated quantities, * in increasing order of bit value, after the counter value. @@ -125,8 +127,8 @@ struct perf_counter_hw_event { __u64 config; __u64 irq_period; - __u64 record_type; - __u64 read_format; + __u32 record_type; + __u32 read_format; __u64 disabled : 1, /* off by default */ nmi : 1, /* NMI sampling */ @@ -137,12 +139,10 @@ struct perf_counter_hw_event { exclude_kernel : 1, /* ditto kernel */ exclude_hv : 1, /* ditto hypervisor */ exclude_idle : 1, /* don't count when idle */ - include_tid : 1, /* include the tid */ mmap : 1, /* include mmap data */ munmap : 1, /* include munmap data */ - callchain : 1, /* add callchain data */ - __reserved_1 : 51; + __reserved_1 : 53; __u32 extra_config_len; __u32 __reserved_4; @@ -212,15 +212,21 @@ struct perf_event_header { enum perf_event_type { - PERF_EVENT_GROUP = 1, - - PERF_EVENT_MMAP = 2, - PERF_EVENT_MUNMAP = 3, + PERF_EVENT_MMAP = 1, + PERF_EVENT_MUNMAP = 2, - PERF_EVENT_OVERFLOW = 1UL << 31, - __PERF_EVENT_IP = 1UL << 30, - __PERF_EVENT_TID = 1UL << 29, - __PERF_EVENT_CALLCHAIN = 1UL << 28, + /* + * Half the event type space is reserved for the counter overflow + * bitfields, as found in hw_event.record_type. + * + * These events will have types of the form: + * PERF_EVENT_COUNTER_OVERFLOW { | __PERF_EVENT_* } * + */ + PERF_EVENT_COUNTER_OVERFLOW = 1UL << 31, + __PERF_EVENT_IP = PERF_RECORD_IP, + __PERF_EVENT_TID = PERF_RECORD_TID, + __PERF_EVENT_GROUP = PERF_RECORD_GROUP, + __PERF_EVENT_CALLCHAIN = PERF_RECORD_CALLCHAIN, }; #ifdef __KERNEL__ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 860cdc2..995063d 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1765,27 +1765,34 @@ static void perf_output_end(struct perf_output_handle *handle) rcu_read_unlock(); } -static void perf_output_simple(struct perf_counter *counter, - int nmi, struct pt_regs *regs) +void perf_counter_output(struct perf_counter *counter, + int nmi, struct pt_regs *regs) { int ret; + u64 record_type = counter->hw_event.record_type; struct perf_output_handle handle; struct perf_event_header header; u64 ip; struct { u32 pid, tid; } tid_entry; + struct { + u64 event; + u64 counter; + } group_entry; struct perf_callchain_entry *callchain = NULL; int callchain_size = 0; - header.type = PERF_EVENT_OVERFLOW; + header.type = PERF_EVENT_COUNTER_OVERFLOW; header.size = sizeof(header); - ip = instruction_pointer(regs); - header.type |= __PERF_EVENT_IP; - header.size += sizeof(ip); + if (record_type & PERF_RECORD_IP) { + ip = instruction_pointer(regs); + header.type |= __PERF_EVENT_IP; + header.size += sizeof(ip); + } - if (counter->hw_event.include_tid) { + if (record_type & PERF_RECORD_TID) { /* namespace issues */ tid_entry.pid = current->group_leader->pid; tid_entry.tid = current->pid; @@ -1794,7 +1801,13 @@ static void perf_output_simple(struct perf_counter *counter, header.size += sizeof(tid_entry); } - if (counter->hw_event.callchain) { + if (record_type & PERF_RECORD_GROUP) { + header.type |= __PERF_EVENT_GROUP; + header.size += sizeof(u64) + + counter->nr_siblings * sizeof(group_entry); + } + + if (record_type & PERF_RECORD_CALLCHAIN) { callchain = perf_callchain(regs); if (callchain) { @@ -1810,69 +1823,35 @@ static void perf_output_simple(struct perf_counter *counter, return; perf_output_put(&handle, header); - perf_output_put(&handle, ip); - if (counter->hw_event.include_tid) - perf_output_put(&handle, tid_entry); + if (record_type & PERF_RECORD_IP) + perf_output_put(&handle, ip); - if (callchain) - perf_output_copy(&handle, callchain, callchain_size); - - perf_output_end(&handle); -} - -static void perf_output_group(struct perf_counter *counter, int nmi) -{ - struct perf_output_handle handle; - struct perf_event_header header; - struct perf_counter *leader, *sub; - unsigned int size; - struct { - u64 event; - u64 counter; - } entry; - int ret; - - size = sizeof(header) + counter->nr_siblings * sizeof(entry); + if (record_type & PERF_RECORD_TID) + perf_output_put(&handle, tid_entry); - ret = perf_output_begin(&handle, counter, size, nmi); - if (ret) - return; + if (record_type & PERF_RECORD_GROUP) { + struct perf_counter *leader, *sub; + u64 nr = counter->nr_siblings; - header.type = PERF_EVENT_GROUP; - header.size = size; + perf_output_put(&handle, nr); - perf_output_put(&handle, header); + leader = counter->group_leader; + list_for_each_entry(sub, &leader->sibling_list, list_entry) { + if (sub != counter) + sub->hw_ops->read(sub); - leader = counter->group_leader; - list_for_each_entry(sub, &leader->sibling_list, list_entry) { - if (sub != counter) - sub->hw_ops->read(sub); + group_entry.event = sub->hw_event.config; + group_entry.counter = atomic64_read(&sub->count); - entry.event = sub->hw_event.config; - entry.counter = atomic64_read(&sub->count); - - perf_output_put(&handle, entry); + perf_output_put(&handle, group_entry); + } } - perf_output_end(&handle); -} - -void perf_counter_output(struct perf_counter *counter, - int nmi, struct pt_regs *regs) -{ - switch (counter->hw_event.record_type) { - case PERF_RECORD_SIMPLE: - return; - - case PERF_RECORD_IRQ: - perf_output_simple(counter, nmi, regs); - break; + if (callchain) + perf_output_copy(&handle, callchain, callchain_size); - case PERF_RECORD_GROUP: - perf_output_group(counter, nmi); - break; - } + perf_output_end(&handle); } /* -- cgit v0.10.2 From c457810ab4a825161aec6ef71b581e1bc8febd1a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 2 Apr 2009 11:12:01 +0200 Subject: perf_counter: per event wakeups By request, provide a way to request a wakeup every 'n' events instead of every page of output. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Orig-LKML-Reference: <20090402091319.323309784@chello.nl> Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 06a6fba..5428ba1 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -145,7 +145,7 @@ struct perf_counter_hw_event { __reserved_1 : 53; __u32 extra_config_len; - __u32 __reserved_4; + __u32 wakeup_events; /* wakeup every n events */ __u64 __reserved_2; __u64 __reserved_3; @@ -321,6 +321,7 @@ struct perf_mmap_data { int nr_pages; atomic_t wakeup; atomic_t head; + atomic_t events; struct perf_counter_mmap_page *user_page; void *data_pages[0]; }; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 995063d..9bcab10 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1760,7 +1760,15 @@ static void perf_output_copy(struct perf_output_handle *handle, static void perf_output_end(struct perf_output_handle *handle) { - if (handle->wakeup) + int wakeup_events = handle->counter->hw_event.wakeup_events; + + if (wakeup_events) { + int events = atomic_inc_return(&handle->data->events); + if (events >= wakeup_events) { + atomic_sub(wakeup_events, &handle->data->events); + __perf_output_wakeup(handle); + } + } else if (handle->wakeup) __perf_output_wakeup(handle); rcu_read_unlock(); } -- cgit v0.10.2 From 3df70fd623bb109e0079e697c0276d220a4b7908 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 2 Apr 2009 11:12:02 +0200 Subject: perf_counter: kerneltop: update to new ABI Update to reflect the new record_type ABI changes. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Orig-LKML-Reference: <20090402091319.407283141@chello.nl> Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c index 33b4fcf..4f8d791 100644 --- a/Documentation/perf_counter/kerneltop.c +++ b/Documentation/perf_counter/kerneltop.c @@ -442,7 +442,7 @@ static void create_perfstat_counter(int counter) memset(&hw_event, 0, sizeof(hw_event)); hw_event.config = event_id[counter]; - hw_event.record_type = PERF_RECORD_SIMPLE; + hw_event.record_type = 0; hw_event.nmi = 0; if (scale) hw_event.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | @@ -1277,8 +1277,8 @@ static void mmap_read(struct mmap_data *md) old += size; switch (event->header.type) { - case PERF_EVENT_OVERFLOW | __PERF_EVENT_IP: - case PERF_EVENT_OVERFLOW | __PERF_EVENT_IP | __PERF_EVENT_TID: + case PERF_EVENT_COUNTER_OVERFLOW | __PERF_EVENT_IP: + case PERF_EVENT_COUNTER_OVERFLOW | __PERF_EVENT_IP | __PERF_EVENT_TID: process_event(event->ip.ip, md->counter); break; @@ -1337,9 +1337,8 @@ int main(int argc, char *argv[]) memset(&hw_event, 0, sizeof(hw_event)); hw_event.config = event_id[counter]; hw_event.irq_period = event_count[counter]; - hw_event.record_type = PERF_RECORD_IRQ; + hw_event.record_type = PERF_RECORD_IP | PERF_RECORD_TID; hw_event.nmi = nmi; - hw_event.include_tid = 1; hw_event.mmap = use_mmap; hw_event.munmap = use_munmap; -- cgit v0.10.2 From 5872bdb88a35fae7d224bd6b21e5f377e854ccfc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 2 Apr 2009 11:12:03 +0200 Subject: perf_counter: add more context information Put in counts to tell which ips belong to what context. ----- | | hv | -- nr | | kernel | -- | | user ----- Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Orig-LKML-Reference: <20090402091319.493101305@chello.nl> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 2a946a1..c74e20d 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -1088,6 +1088,7 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) { unsigned long bp; char *stack; + int nr = entry->nr; callchain_store(entry, instruction_pointer(regs)); @@ -1099,6 +1100,8 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) #endif dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, entry); + + entry->kernel = entry->nr - nr; } @@ -1128,6 +1131,7 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) { struct stack_frame frame; const void __user *fp; + int nr = entry->nr; regs = (struct pt_regs *)current->thread.sp0 - 1; fp = (void __user *)regs->bp; @@ -1147,6 +1151,8 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) callchain_store(entry, frame.return_address); fp = frame.next_fp; } + + entry->user = entry->nr - nr; } static void @@ -1182,6 +1188,9 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) entry = &__get_cpu_var(irq_entry); entry->nr = 0; + entry->hv = 0; + entry->kernel = 0; + entry->user = 0; perf_do_callchain(regs, entry); diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 5428ba1..90cce0c 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -513,10 +513,10 @@ extern void perf_counter_mmap(unsigned long addr, unsigned long len, extern void perf_counter_munmap(unsigned long addr, unsigned long len, unsigned long pgoff, struct file *file); -#define MAX_STACK_DEPTH 255 +#define MAX_STACK_DEPTH 254 struct perf_callchain_entry { - u64 nr; + u32 nr, hv, kernel, user; u64 ip[MAX_STACK_DEPTH]; }; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 9bcab10..f105a6e 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1819,7 +1819,7 @@ void perf_counter_output(struct perf_counter *counter, callchain = perf_callchain(regs); if (callchain) { - callchain_size = (1 + callchain->nr) * sizeof(u64); + callchain_size = (2 + callchain->nr) * sizeof(u64); header.type |= __PERF_EVENT_CALLCHAIN; header.size += callchain_size; -- cgit v0.10.2 From 92f22a3865abe87eea2609a6f8e5be5123f7ce4f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 2 Apr 2009 11:12:04 +0200 Subject: perf_counter: update mmap() counter read Paul noted that we don't need SMP barriers for the mmap() counter read because its always on the same cpu (otherwise you can't access the hw counter anyway). So remove the SMP barriers and replace them with regular compiler barriers. Further, update the comment to include a race free method of reading said hardware counter. The primary change is putting the pmc_read inside the seq-loop, otherwise we can still race and read rubbish. Noticed-by: Paul Mackerras Signed-off-by: Peter Zijlstra Cc: Corey Ashford Orig-LKML-Reference: <20090402091319.577951445@chello.nl> Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 90cce0c..f2b914d 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -167,30 +167,28 @@ struct perf_counter_mmap_page { /* * Bits needed to read the hw counters in user-space. * - * The index and offset should be read atomically using the seqlock: - * - * __u32 seq, index; - * __s64 offset; + * u32 seq; + * s64 count; * * again: - * rmb(); * seq = pc->lock; - * * if (unlikely(seq & 1)) { * cpu_relax(); * goto again; * } * - * index = pc->index; - * offset = pc->offset; + * if (pc->index) { + * count = pmc_read(pc->index - 1); + * count += pc->offset; + * } else + * goto regular_read; * - * rmb(); + * barrier(); * if (pc->lock != seq) * goto again; * - * After this, index contains architecture specific counter index + 1, - * so that 0 means unavailable, offset contains the value to be added - * to the result of the raw timer read to obtain this counter's value. + * NOTE: for obvious reason this only works on self-monitoring + * processes. */ __u32 lock; /* seqlock for synchronization */ __u32 index; /* hardware counter identifier */ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index f105a6e..2a5d4f5 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1340,13 +1340,13 @@ void perf_counter_update_userpage(struct perf_counter *counter) */ preempt_disable(); ++userpg->lock; - smp_wmb(); + barrier(); userpg->index = counter->hw.idx; userpg->offset = atomic64_read(&counter->count); if (counter->state == PERF_COUNTER_STATE_ACTIVE) userpg->offset -= atomic64_read(&counter->hw.prev_count); - smp_wmb(); + barrier(); ++userpg->lock; preempt_enable(); unlock: -- cgit v0.10.2 From ca5f9524d61f54b1f618293ab92fc6b49cac864d Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Fri, 3 Apr 2009 13:39:33 -0700 Subject: futex: separate futex_wait_queue_me() logic from futex_wait() Refactor futex_wait() in preparation for futex_wait_requeue_pi(). In order to reuse a good chunk of the futex_wait() code for the upcoming futex_wait_requeue_pi() function, this patch breaks out the queue-to-wakeup section of futex_wait() into futex_wait_queue_me(). Signed-off-by: Darren Hart Reviewed-by: Thomas Gleixner Signed-off-by: Thomas Gleixner diff --git a/kernel/futex.c b/kernel/futex.c index 6b50a02..ebb48d6 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1115,24 +1115,87 @@ handle_fault: static long futex_wait_restart(struct restart_block *restart); +/** + * futex_wait_queue_me() - queue_me() and wait for wakeup, timeout, or signal + * @hb: the futex hash bucket, must be locked by the caller + * @q: the futex_q to queue up on + * @timeout: the prepared hrtimer_sleeper, or null for no timeout + * @wait: the wait_queue to add to the futex_q after queueing in the hb + */ +static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, + struct hrtimer_sleeper *timeout, + wait_queue_t *wait) +{ + queue_me(q, hb); + + /* + * There might have been scheduling since the queue_me(), as we + * cannot hold a spinlock across the get_user() in case it + * faults, and we cannot just set TASK_INTERRUPTIBLE state when + * queueing ourselves into the futex hash. This code thus has to + * rely on the futex_wake() code removing us from hash when it + * wakes us up. + */ + + /* add_wait_queue is the barrier after __set_current_state. */ + __set_current_state(TASK_INTERRUPTIBLE); + + /* + * Add current as the futex_q waiter. We don't remove ourselves from + * the wait_queue because we are the only user of it. + */ + add_wait_queue(&q->waiter, wait); + + /* Arm the timer */ + if (timeout) { + hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); + if (!hrtimer_active(&timeout->timer)) + timeout->task = NULL; + } + + /* + * !plist_node_empty() is safe here without any lock. + * q.lock_ptr != 0 is not safe, because of ordering against wakeup. + */ + if (likely(!plist_node_empty(&q->list))) { + /* + * If the timer has already expired, current will already be + * flagged for rescheduling. Only call schedule if there + * is no timeout, or if it has yet to expire. + */ + if (!timeout || timeout->task) + schedule(); + } + __set_current_state(TASK_RUNNING); +} + static int futex_wait(u32 __user *uaddr, int fshared, u32 val, ktime_t *abs_time, u32 bitset, int clockrt) { - struct task_struct *curr = current; + struct hrtimer_sleeper timeout, *to = NULL; + DECLARE_WAITQUEUE(wait, current); struct restart_block *restart; - DECLARE_WAITQUEUE(wait, curr); struct futex_hash_bucket *hb; struct futex_q q; u32 uval; int ret; - struct hrtimer_sleeper t; - int rem = 0; if (!bitset) return -EINVAL; q.pi_state = NULL; q.bitset = bitset; + + if (abs_time) { + to = &timeout; + + hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME : + CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + hrtimer_init_sleeper(to, current); + hrtimer_set_expires_range_ns(&to->timer, *abs_time, + current->timer_slack_ns); + } + retry: q.key = FUTEX_KEY_INIT; ret = get_futex_key(uaddr, fshared, &q.key); @@ -1178,75 +1241,22 @@ retry_private: goto retry; } ret = -EWOULDBLOCK; + + /* Only actually queue if *uaddr contained val. */ if (unlikely(uval != val)) { queue_unlock(&q, hb); goto out_put_key; } - /* Only actually queue if *uaddr contained val. */ - queue_me(&q, hb); - - /* - * There might have been scheduling since the queue_me(), as we - * cannot hold a spinlock across the get_user() in case it - * faults, and we cannot just set TASK_INTERRUPTIBLE state when - * queueing ourselves into the futex hash. This code thus has to - * rely on the futex_wake() code removing us from hash when it - * wakes us up. - */ - - /* add_wait_queue is the barrier after __set_current_state. */ - __set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&q.waiter, &wait); - /* - * !plist_node_empty() is safe here without any lock. - * q.lock_ptr != 0 is not safe, because of ordering against wakeup. - */ - if (likely(!plist_node_empty(&q.list))) { - if (!abs_time) - schedule(); - else { - hrtimer_init_on_stack(&t.timer, - clockrt ? CLOCK_REALTIME : - CLOCK_MONOTONIC, - HRTIMER_MODE_ABS); - hrtimer_init_sleeper(&t, current); - hrtimer_set_expires_range_ns(&t.timer, *abs_time, - current->timer_slack_ns); - - hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS); - if (!hrtimer_active(&t.timer)) - t.task = NULL; - - /* - * the timer could have already expired, in which - * case current would be flagged for rescheduling. - * Don't bother calling schedule. - */ - if (likely(t.task)) - schedule(); - - hrtimer_cancel(&t.timer); - - /* Flag if a timeout occured */ - rem = (t.task == NULL); - - destroy_hrtimer_on_stack(&t.timer); - } - } - __set_current_state(TASK_RUNNING); - - /* - * NOTE: we don't remove ourselves from the waitqueue because - * we are the only user of it. - */ + /* queue_me and wait for wakeup, timeout, or a signal. */ + futex_wait_queue_me(hb, &q, to, &wait); /* If we were woken (and unqueued), we succeeded, whatever. */ ret = 0; if (!unqueue_me(&q)) goto out_put_key; ret = -ETIMEDOUT; - if (rem) + if (to && !to->task) goto out_put_key; /* @@ -1275,6 +1285,10 @@ retry_private: out_put_key: put_futex_key(fshared, &q.key); out: + if (to) { + hrtimer_cancel(&to->timer); + destroy_hrtimer_on_stack(&to->timer); + } return ret; } -- cgit v0.10.2 From 4b1c486b3587d2abf50bee4a05eb488cd4045f2c Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Fri, 3 Apr 2009 13:39:42 -0700 Subject: futex: add helper to find the top prio waiter of a futex Improve legibility by wrapping finding the top waiter in a function. This will be used by the follow-on patches for enabling requeue pi. Signed-off-by: Darren Hart Reviewed-by: Thomas Gleixner Signed-off-by: Thomas Gleixner diff --git a/kernel/futex.c b/kernel/futex.c index ebb48d6..421fb5e 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -276,6 +276,25 @@ void put_futex_key(int fshared, union futex_key *key) drop_futex_key_refs(key); } +/** + * futex_top_waiter() - Return the highest priority waiter on a futex + * @hb: the hash bucket the futex_q's reside in + * @key: the futex key (to distinguish it from other futex futex_q's) + * + * Must be called with the hb lock held. + */ +static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb, + union futex_key *key) +{ + struct futex_q *this; + + plist_for_each_entry(this, &hb->chain, list) { + if (match_futex(&this->key, key)) + return this; + } + return NULL; +} + static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval) { u32 curval; -- cgit v0.10.2 From 1a52084d0919c2799258737c21fb328a9de159b5 Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Fri, 3 Apr 2009 13:39:52 -0700 Subject: futex: split out atomic logic from futex_lock_pi() Refactor the atomic portion of futex_lock_pi() into futex_lock_pi_atomic(). This logic will be needed by requeue_pi, so modularize it to reduce code duplication. The only significant change is passing of the task to try and take the lock for. This simplifies the -EDEADLK test as if the lock is owned by task t, it's a deadlock, regardless of if we are doing requeue pi or not. This patch updates the corresponding comment accordingly. Signed-off-by: Darren Hart Reviewed-by: Thomas Gleixner Signed-off-by: Thomas Gleixner diff --git a/kernel/futex.c b/kernel/futex.c index 421fb5e..986b16e 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -556,6 +556,127 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, return 0; } +/** + * futex_lock_pi_atomic() - atomic work required to acquire a pi aware futex + * @uaddr: the pi futex user address + * @hb: the pi futex hash bucket + * @key: the futex key associated with uaddr and hb + * @ps: the pi_state pointer where we store the result of the lookup + * @task: the task to perform the atomic lock work for. This will be + * "current" except in the case of requeue pi. + * + * Returns: + * 0 - ready to wait + * 1 - acquired the lock + * <0 - error + * + * The hb->lock and futex_key refs shall be held by the caller. + */ +static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, + union futex_key *key, + struct futex_pi_state **ps, + struct task_struct *task) +{ + int lock_taken, ret, ownerdied = 0; + u32 uval, newval, curval; + +retry: + ret = lock_taken = 0; + + /* + * To avoid races, we attempt to take the lock here again + * (by doing a 0 -> TID atomic cmpxchg), while holding all + * the locks. It will most likely not succeed. + */ + newval = task_pid_vnr(task); + + curval = cmpxchg_futex_value_locked(uaddr, 0, newval); + + if (unlikely(curval == -EFAULT)) + return -EFAULT; + + /* + * Detect deadlocks. + */ + if ((unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(task)))) + return -EDEADLK; + + /* + * Surprise - we got the lock. Just return to userspace: + */ + if (unlikely(!curval)) + return 1; + + uval = curval; + + /* + * Set the FUTEX_WAITERS flag, so the owner will know it has someone + * to wake at the next unlock. + */ + newval = curval | FUTEX_WAITERS; + + /* + * There are two cases, where a futex might have no owner (the + * owner TID is 0): OWNER_DIED. We take over the futex in this + * case. We also do an unconditional take over, when the owner + * of the futex died. + * + * This is safe as we are protected by the hash bucket lock ! + */ + if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) { + /* Keep the OWNER_DIED bit */ + newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(task); + ownerdied = 0; + lock_taken = 1; + } + + curval = cmpxchg_futex_value_locked(uaddr, uval, newval); + + if (unlikely(curval == -EFAULT)) + return -EFAULT; + if (unlikely(curval != uval)) + goto retry; + + /* + * We took the lock due to owner died take over. + */ + if (unlikely(lock_taken)) + return 1; + + /* + * We dont have the lock. Look up the PI state (or create it if + * we are the first waiter): + */ + ret = lookup_pi_state(uval, hb, key, ps); + + if (unlikely(ret)) { + switch (ret) { + case -ESRCH: + /* + * No owner found for this futex. Check if the + * OWNER_DIED bit is set to figure out whether + * this is a robust futex or not. + */ + if (get_futex_value_locked(&curval, uaddr)) + return -EFAULT; + + /* + * We simply start over in case of a robust + * futex. The code above will take the futex + * and return happy. + */ + if (curval & FUTEX_OWNER_DIED) { + ownerdied = 1; + goto retry; + } + default: + break; + } + } + + return ret; +} + /* * The hash bucket lock must be held when this is called. * Afterwards, the futex_q must not be accessed. @@ -1340,9 +1461,9 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, struct hrtimer_sleeper timeout, *to = NULL; struct task_struct *curr = current; struct futex_hash_bucket *hb; - u32 uval, newval, curval; + u32 uval; struct futex_q q; - int ret, lock_taken, ownerdied = 0; + int ret; if (refill_pi_state_cache()) return -ENOMEM; @@ -1365,81 +1486,15 @@ retry: retry_private: hb = queue_lock(&q); -retry_locked: - ret = lock_taken = 0; - - /* - * To avoid races, we attempt to take the lock here again - * (by doing a 0 -> TID atomic cmpxchg), while holding all - * the locks. It will most likely not succeed. - */ - newval = task_pid_vnr(current); - - curval = cmpxchg_futex_value_locked(uaddr, 0, newval); - - if (unlikely(curval == -EFAULT)) - goto uaddr_faulted; - - /* - * Detect deadlocks. In case of REQUEUE_PI this is a valid - * situation and we return success to user space. - */ - if (unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(current))) { - ret = -EDEADLK; - goto out_unlock_put_key; - } - - /* - * Surprise - we got the lock. Just return to userspace: - */ - if (unlikely(!curval)) - goto out_unlock_put_key; - - uval = curval; - - /* - * Set the WAITERS flag, so the owner will know it has someone - * to wake at next unlock - */ - newval = curval | FUTEX_WAITERS; - - /* - * There are two cases, where a futex might have no owner (the - * owner TID is 0): OWNER_DIED. We take over the futex in this - * case. We also do an unconditional take over, when the owner - * of the futex died. - * - * This is safe as we are protected by the hash bucket lock ! - */ - if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) { - /* Keep the OWNER_DIED bit */ - newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(current); - ownerdied = 0; - lock_taken = 1; - } - - curval = cmpxchg_futex_value_locked(uaddr, uval, newval); - - if (unlikely(curval == -EFAULT)) - goto uaddr_faulted; - if (unlikely(curval != uval)) - goto retry_locked; - - /* - * We took the lock due to owner died take over. - */ - if (unlikely(lock_taken)) - goto out_unlock_put_key; - - /* - * We dont have the lock. Look up the PI state (or create it if - * we are the first waiter): - */ - ret = lookup_pi_state(uval, hb, &q.key, &q.pi_state); - + ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current); if (unlikely(ret)) { switch (ret) { - + case 1: + /* We got the lock. */ + ret = 0; + goto out_unlock_put_key; + case -EFAULT: + goto uaddr_faulted; case -EAGAIN: /* * Task is exiting and we just wait for the @@ -1449,25 +1504,6 @@ retry_locked: put_futex_key(fshared, &q.key); cond_resched(); goto retry; - - case -ESRCH: - /* - * No owner found for this futex. Check if the - * OWNER_DIED bit is set to figure out whether - * this is a robust futex or not. - */ - if (get_futex_value_locked(&curval, uaddr)) - goto uaddr_faulted; - - /* - * We simply start over in case of a robust - * futex. The code above will take the futex - * and return happy. - */ - if (curval & FUTEX_OWNER_DIED) { - ownerdied = 1; - goto retry_locked; - } default: goto out_unlock_put_key; } -- cgit v0.10.2 From dd9739980b50c8cde33e1f8eb08b7e0140bcd61e Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Fri, 3 Apr 2009 13:40:02 -0700 Subject: futex: split out fixup owner logic from futex_lock_pi() Refactor the post lock acquisition logic from futex_lock_pi(). This code will be reused in futex_wait_requeue_pi(). Signed-off-by: Darren Hart Reviewed-by: Thomas Gleixner Signed-off-by: Thomas Gleixner diff --git a/kernel/futex.c b/kernel/futex.c index 986b16e..af831fb 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1256,6 +1256,79 @@ handle_fault: static long futex_wait_restart(struct restart_block *restart); /** + * fixup_owner() - Post lock pi_state and corner case management + * @uaddr: user address of the futex + * @fshared: whether the futex is shared (1) or not (0) + * @q: futex_q (contains pi_state and access to the rt_mutex) + * @locked: if the attempt to take the rt_mutex succeeded (1) or not (0) + * + * After attempting to lock an rt_mutex, this function is called to cleanup + * the pi_state owner as well as handle race conditions that may allow us to + * acquire the lock. Must be called with the hb lock held. + * + * Returns: + * 1 - success, lock taken + * 0 - success, lock not taken + * <0 - on error (-EFAULT) + */ +static int fixup_owner(u32 __user *uaddr, int fshared, struct futex_q *q, + int locked) +{ + struct task_struct *owner; + int ret = 0; + + if (locked) { + /* + * Got the lock. We might not be the anticipated owner if we + * did a lock-steal - fix up the PI-state in that case: + */ + if (q->pi_state->owner != current) + ret = fixup_pi_state_owner(uaddr, q, current, fshared); + goto out; + } + + /* + * Catch the rare case, where the lock was released when we were on the + * way back before we locked the hash bucket. + */ + if (q->pi_state->owner == current) { + /* + * Try to get the rt_mutex now. This might fail as some other + * task acquired the rt_mutex after we removed ourself from the + * rt_mutex waiters list. + */ + if (rt_mutex_trylock(&q->pi_state->pi_mutex)) { + locked = 1; + goto out; + } + + /* + * pi_state is incorrect, some other task did a lock steal and + * we returned due to timeout or signal without taking the + * rt_mutex. Too late. We can access the rt_mutex_owner without + * locking, as the other task is now blocked on the hash bucket + * lock. Fix the state up. + */ + owner = rt_mutex_owner(&q->pi_state->pi_mutex); + ret = fixup_pi_state_owner(uaddr, q, owner, fshared); + goto out; + } + + /* + * Paranoia check. If we did not take the lock, then we should not be + * the owner, nor the pending owner, of the rt_mutex. + */ + if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) + printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p " + "pi-state %p\n", ret, + q->pi_state->pi_mutex.owner, + q->pi_state->owner); + +out: + return ret ? ret : locked; +} + +/** * futex_wait_queue_me() - queue_me() and wait for wakeup, timeout, or signal * @hb: the futex hash bucket, must be locked by the caller * @q: the futex_q to queue up on @@ -1459,11 +1532,10 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, int detect, ktime_t *time, int trylock) { struct hrtimer_sleeper timeout, *to = NULL; - struct task_struct *curr = current; struct futex_hash_bucket *hb; u32 uval; struct futex_q q; - int ret; + int res, ret; if (refill_pi_state_cache()) return -ENOMEM; @@ -1527,71 +1599,21 @@ retry_private: } spin_lock(q.lock_ptr); - - if (!ret) { - /* - * Got the lock. We might not be the anticipated owner - * if we did a lock-steal - fix up the PI-state in - * that case: - */ - if (q.pi_state->owner != curr) - ret = fixup_pi_state_owner(uaddr, &q, curr, fshared); - } else { - /* - * Catch the rare case, where the lock was released - * when we were on the way back before we locked the - * hash bucket. - */ - if (q.pi_state->owner == curr) { - /* - * Try to get the rt_mutex now. This might - * fail as some other task acquired the - * rt_mutex after we removed ourself from the - * rt_mutex waiters list. - */ - if (rt_mutex_trylock(&q.pi_state->pi_mutex)) - ret = 0; - else { - /* - * pi_state is incorrect, some other - * task did a lock steal and we - * returned due to timeout or signal - * without taking the rt_mutex. Too - * late. We can access the - * rt_mutex_owner without locking, as - * the other task is now blocked on - * the hash bucket lock. Fix the state - * up. - */ - struct task_struct *owner; - int res; - - owner = rt_mutex_owner(&q.pi_state->pi_mutex); - res = fixup_pi_state_owner(uaddr, &q, owner, - fshared); - - /* propagate -EFAULT, if the fixup failed */ - if (res) - ret = res; - } - } else { - /* - * Paranoia check. If we did not take the lock - * in the trylock above, then we should not be - * the owner of the rtmutex, neither the real - * nor the pending one: - */ - if (rt_mutex_owner(&q.pi_state->pi_mutex) == curr) - printk(KERN_ERR "futex_lock_pi: ret = %d " - "pi-mutex: %p pi-state %p\n", ret, - q.pi_state->pi_mutex.owner, - q.pi_state->owner); - } - } + /* + * Fixup the pi_state owner and possibly acquire the lock if we + * haven't already. + */ + res = fixup_owner(uaddr, fshared, &q, !ret); + /* + * If fixup_owner() returned an error, proprogate that. If it acquired + * the lock, clear our -ETIMEDOUT or -EINTR. + */ + if (res) + ret = (res < 0) ? res : 0; /* - * If fixup_pi_state_owner() faulted and was unable to handle the - * fault, unlock it and return the fault to userspace. + * If fixup_owner() faulted and was unable to handle the fault, unlock + * it and return the fault to userspace. */ if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) rt_mutex_unlock(&q.pi_state->pi_mutex); @@ -1599,9 +1621,7 @@ retry_private: /* Unqueue and drop the lock */ unqueue_me_pi(&q); - if (to) - destroy_hrtimer_on_stack(&to->timer); - return ret != -EINTR ? ret : -ERESTARTNOINTR; + goto out; out_unlock_put_key: queue_unlock(&q, hb); @@ -1611,7 +1631,7 @@ out_put_key: out: if (to) destroy_hrtimer_on_stack(&to->timer); - return ret; + return ret != -EINTR ? ret : -ERESTARTNOINTR; uaddr_faulted: /* -- cgit v0.10.2 From 8dac456a681bd94272ff50ecb31be6b669382c2b Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Fri, 3 Apr 2009 13:40:12 -0700 Subject: rt_mutex: add proxy lock routines This patch is a prerequisite for futex requeue_pi. It basically splits rt_mutex_slowlock() right down the middle, just before the first call to schedule(). It further adds helper functions which make use of the split and provide the rt-mutex preliminaries for futex requeue_pi. Signed-off-by: Darren Hart Reviewed-by: Thomas Gleixner Signed-off-by: Thomas Gleixner diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index 69d9cb9..fec77e7 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c @@ -300,7 +300,8 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, * assigned pending owner [which might not have taken the * lock yet]: */ -static inline int try_to_steal_lock(struct rt_mutex *lock) +static inline int try_to_steal_lock(struct rt_mutex *lock, + struct task_struct *task) { struct task_struct *pendowner = rt_mutex_owner(lock); struct rt_mutex_waiter *next; @@ -309,11 +310,11 @@ static inline int try_to_steal_lock(struct rt_mutex *lock) if (!rt_mutex_owner_pending(lock)) return 0; - if (pendowner == current) + if (pendowner == task) return 1; spin_lock_irqsave(&pendowner->pi_lock, flags); - if (current->prio >= pendowner->prio) { + if (task->prio >= pendowner->prio) { spin_unlock_irqrestore(&pendowner->pi_lock, flags); return 0; } @@ -338,21 +339,21 @@ static inline int try_to_steal_lock(struct rt_mutex *lock) * We are going to steal the lock and a waiter was * enqueued on the pending owners pi_waiters queue. So * we have to enqueue this waiter into - * current->pi_waiters list. This covers the case, - * where current is boosted because it holds another + * task->pi_waiters list. This covers the case, + * where task is boosted because it holds another * lock and gets unboosted because the booster is * interrupted, so we would delay a waiter with higher - * priority as current->normal_prio. + * priority as task->normal_prio. * * Note: in the rare case of a SCHED_OTHER task changing * its priority and thus stealing the lock, next->task - * might be current: + * might be task: */ - if (likely(next->task != current)) { - spin_lock_irqsave(¤t->pi_lock, flags); - plist_add(&next->pi_list_entry, ¤t->pi_waiters); - __rt_mutex_adjust_prio(current); - spin_unlock_irqrestore(¤t->pi_lock, flags); + if (likely(next->task != task)) { + spin_lock_irqsave(&task->pi_lock, flags); + plist_add(&next->pi_list_entry, &task->pi_waiters); + __rt_mutex_adjust_prio(task); + spin_unlock_irqrestore(&task->pi_lock, flags); } return 1; } @@ -389,7 +390,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock) */ mark_rt_mutex_waiters(lock); - if (rt_mutex_owner(lock) && !try_to_steal_lock(lock)) + if (rt_mutex_owner(lock) && !try_to_steal_lock(lock, current)) return 0; /* We got the lock. */ @@ -411,6 +412,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock) */ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, + struct task_struct *task, int detect_deadlock) { struct task_struct *owner = rt_mutex_owner(lock); @@ -418,21 +420,21 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, unsigned long flags; int chain_walk = 0, res; - spin_lock_irqsave(¤t->pi_lock, flags); - __rt_mutex_adjust_prio(current); - waiter->task = current; + spin_lock_irqsave(&task->pi_lock, flags); + __rt_mutex_adjust_prio(task); + waiter->task = task; waiter->lock = lock; - plist_node_init(&waiter->list_entry, current->prio); - plist_node_init(&waiter->pi_list_entry, current->prio); + plist_node_init(&waiter->list_entry, task->prio); + plist_node_init(&waiter->pi_list_entry, task->prio); /* Get the top priority waiter on the lock */ if (rt_mutex_has_waiters(lock)) top_waiter = rt_mutex_top_waiter(lock); plist_add(&waiter->list_entry, &lock->wait_list); - current->pi_blocked_on = waiter; + task->pi_blocked_on = waiter; - spin_unlock_irqrestore(¤t->pi_lock, flags); + spin_unlock_irqrestore(&task->pi_lock, flags); if (waiter == rt_mutex_top_waiter(lock)) { spin_lock_irqsave(&owner->pi_lock, flags); @@ -460,7 +462,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, spin_unlock(&lock->wait_lock); res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter, - current); + task); spin_lock(&lock->wait_lock); @@ -605,37 +607,25 @@ void rt_mutex_adjust_pi(struct task_struct *task) rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task); } -/* - * Slow path lock function: +/** + * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop + * @lock: the rt_mutex to take + * @state: the state the task should block in (TASK_INTERRUPTIBLE + * or TASK_UNINTERRUPTIBLE) + * @timeout: the pre-initialized and started timer, or NULL for none + * @waiter: the pre-initialized rt_mutex_waiter + * @detect_deadlock: passed to task_blocks_on_rt_mutex + * + * lock->wait_lock must be held by the caller. */ static int __sched -rt_mutex_slowlock(struct rt_mutex *lock, int state, - struct hrtimer_sleeper *timeout, - int detect_deadlock) +__rt_mutex_slowlock(struct rt_mutex *lock, int state, + struct hrtimer_sleeper *timeout, + struct rt_mutex_waiter *waiter, + int detect_deadlock) { - struct rt_mutex_waiter waiter; int ret = 0; - debug_rt_mutex_init_waiter(&waiter); - waiter.task = NULL; - - spin_lock(&lock->wait_lock); - - /* Try to acquire the lock again: */ - if (try_to_take_rt_mutex(lock)) { - spin_unlock(&lock->wait_lock); - return 0; - } - - set_current_state(state); - - /* Setup the timer, when timeout != NULL */ - if (unlikely(timeout)) { - hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); - if (!hrtimer_active(&timeout->timer)) - timeout->task = NULL; - } - for (;;) { /* Try to acquire the lock: */ if (try_to_take_rt_mutex(lock)) @@ -656,19 +646,19 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, } /* - * waiter.task is NULL the first time we come here and + * waiter->task is NULL the first time we come here and * when we have been woken up by the previous owner * but the lock got stolen by a higher prio task. */ - if (!waiter.task) { - ret = task_blocks_on_rt_mutex(lock, &waiter, + if (!waiter->task) { + ret = task_blocks_on_rt_mutex(lock, waiter, current, detect_deadlock); /* * If we got woken up by the owner then start loop * all over without going into schedule to try * to get the lock now: */ - if (unlikely(!waiter.task)) { + if (unlikely(!waiter->task)) { /* * Reset the return value. We might * have returned with -EDEADLK and the @@ -684,15 +674,52 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, spin_unlock(&lock->wait_lock); - debug_rt_mutex_print_deadlock(&waiter); + debug_rt_mutex_print_deadlock(waiter); - if (waiter.task) + if (waiter->task) schedule_rt_mutex(lock); spin_lock(&lock->wait_lock); set_current_state(state); } + return ret; +} + +/* + * Slow path lock function: + */ +static int __sched +rt_mutex_slowlock(struct rt_mutex *lock, int state, + struct hrtimer_sleeper *timeout, + int detect_deadlock) +{ + struct rt_mutex_waiter waiter; + int ret = 0; + + debug_rt_mutex_init_waiter(&waiter); + waiter.task = NULL; + + spin_lock(&lock->wait_lock); + + /* Try to acquire the lock again: */ + if (try_to_take_rt_mutex(lock)) { + spin_unlock(&lock->wait_lock); + return 0; + } + + set_current_state(state); + + /* Setup the timer, when timeout != NULL */ + if (unlikely(timeout)) { + hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); + if (!hrtimer_active(&timeout->timer)) + timeout->task = NULL; + } + + ret = __rt_mutex_slowlock(lock, state, timeout, &waiter, + detect_deadlock); + set_current_state(TASK_RUNNING); if (unlikely(waiter.task)) @@ -986,6 +1013,59 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock, } /** + * rt_mutex_start_proxy_lock() - Start lock acquisition for another task + * @lock: the rt_mutex to take + * @waiter: the pre-initialized rt_mutex_waiter + * @task: the task to prepare + * @detect_deadlock: perform deadlock detection (1) or not (0) + * + * Returns: + * 0 - task blocked on lock + * 1 - acquired the lock for task, caller should wake it up + * <0 - error + * + * Special API call for FUTEX_REQUEUE_PI support. + */ +int rt_mutex_start_proxy_lock(struct rt_mutex *lock, + struct rt_mutex_waiter *waiter, + struct task_struct *task, int detect_deadlock) +{ + int ret; + + spin_lock(&lock->wait_lock); + + mark_rt_mutex_waiters(lock); + + if (!rt_mutex_owner(lock) || try_to_steal_lock(lock, task)) { + /* We got the lock for task. */ + debug_rt_mutex_lock(lock); + + rt_mutex_set_owner(lock, task, 0); + + rt_mutex_deadlock_account_lock(lock, task); + return 1; + } + + ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock); + + + if (ret && !waiter->task) { + /* + * Reset the return value. We might have + * returned with -EDEADLK and the owner + * released the lock while we were walking the + * pi chain. Let the waiter sort it out. + */ + ret = 0; + } + spin_unlock(&lock->wait_lock); + + debug_rt_mutex_print_deadlock(waiter); + + return ret; +} + +/** * rt_mutex_next_owner - return the next owner of the lock * * @lock: the rt lock query @@ -1004,3 +1084,57 @@ struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock) return rt_mutex_top_waiter(lock)->task; } + +/** + * rt_mutex_finish_proxy_lock() - Complete lock acquisition + * @lock: the rt_mutex we were woken on + * @to: the timeout, null if none. hrtimer should already have + * been started. + * @waiter: the pre-initialized rt_mutex_waiter + * @detect_deadlock: perform deadlock detection (1) or not (0) + * + * Complete the lock acquisition started our behalf by another thread. + * + * Returns: + * 0 - success + * <0 - error, one of -EINTR, -ETIMEDOUT, or -EDEADLK + * + * Special API call for PI-futex requeue support + */ +int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, + struct hrtimer_sleeper *to, + struct rt_mutex_waiter *waiter, + int detect_deadlock) +{ + int ret; + + spin_lock(&lock->wait_lock); + + set_current_state(TASK_INTERRUPTIBLE); + + ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, + detect_deadlock); + + set_current_state(TASK_RUNNING); + + if (unlikely(waiter->task)) + remove_waiter(lock, waiter); + + /* + * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might + * have to fix that up. + */ + fixup_rt_mutex_waiters(lock); + + spin_unlock(&lock->wait_lock); + + /* + * Readjust priority, when we did not get the lock. We might have been + * the pending owner and boosted. Since we did not take the lock, the + * PI boost has to go. + */ + if (unlikely(ret)) + rt_mutex_adjust_prio(current); + + return ret; +} diff --git a/kernel/rtmutex_common.h b/kernel/rtmutex_common.h index e124bf5..97a2f81 100644 --- a/kernel/rtmutex_common.h +++ b/kernel/rtmutex_common.h @@ -120,6 +120,14 @@ extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, struct task_struct *proxy_owner); extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, struct task_struct *proxy_owner); +extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock, + struct rt_mutex_waiter *waiter, + struct task_struct *task, + int detect_deadlock); +extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, + struct hrtimer_sleeper *to, + struct rt_mutex_waiter *waiter, + int detect_deadlock); #ifdef CONFIG_DEBUG_RT_MUTEXES # include "rtmutex-debug.h" -- cgit v0.10.2 From a72188d8a64ebe74722f1cf7ffac41b41ffdba21 Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Fri, 3 Apr 2009 13:40:22 -0700 Subject: futex: add FUTEX_HAS_TIMEOUT flag to restart.futex.flags Currently restart is only used if there is a timeout. The requeue_pi functionality requires restarting to futex_lock_pi() on signal after wakeup in futex_wait_requeue_pi() regardless of if there was a timeout or not. Using 0 for the timeout value is confusing as that could indicate an expired timer. The flag makes this explicit. While the check is not technically needed in futex_wait_restart(), doing so makes the code consistent with and will avoid confusion should the need arise to restart wait without a timeout. Signed-off-by: Darren Hart Reviewed-by: Thomas Gleixner Signed-off-by: Thomas Gleixner diff --git a/kernel/futex.c b/kernel/futex.c index af831fb..6b597cf 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1252,6 +1252,7 @@ handle_fault: */ #define FLAGS_SHARED 0x01 #define FLAGS_CLOCKRT 0x02 +#define FLAGS_HAS_TIMEOUT 0x04 static long futex_wait_restart(struct restart_block *restart); @@ -1486,7 +1487,7 @@ retry_private: restart->futex.val = val; restart->futex.time = abs_time->tv64; restart->futex.bitset = bitset; - restart->futex.flags = 0; + restart->futex.flags = FLAGS_HAS_TIMEOUT; if (fshared) restart->futex.flags |= FLAGS_SHARED; @@ -1510,13 +1511,16 @@ static long futex_wait_restart(struct restart_block *restart) { u32 __user *uaddr = (u32 __user *)restart->futex.uaddr; int fshared = 0; - ktime_t t; + ktime_t t, *tp = NULL; - t.tv64 = restart->futex.time; + if (restart->futex.flags & FLAGS_HAS_TIMEOUT) { + t.tv64 = restart->futex.time; + tp = &t; + } restart->fn = do_no_restart_syscall; if (restart->futex.flags & FLAGS_SHARED) fshared = 1; - return (long)futex_wait(uaddr, fshared, restart->futex.val, &t, + return (long)futex_wait(uaddr, fshared, restart->futex.val, tp, restart->futex.bitset, restart->futex.flags & FLAGS_CLOCKRT); } -- cgit v0.10.2 From 9121e4783cd5c7e2a407763f3b61c2d573891133 Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Fri, 3 Apr 2009 13:40:31 -0700 Subject: futex: distangle futex_requeue() futex_requeue() is getting a bit long-winded, and will be getting more so after the requeue_pi patch. Factor out the actual requeueing into a nicely contained inline function to reduce function length and improve legibility. Signed-off-by: Darren Hart Reviewed-by: Thomas Gleixner Signed-off-by: Thomas Gleixner diff --git a/kernel/futex.c b/kernel/futex.c index 6b597cf..e76942e 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -940,6 +940,34 @@ out: return ret; } +/** + * requeue_futex() - Requeue a futex_q from one hb to another + * @q: the futex_q to requeue + * @hb1: the source hash_bucket + * @hb2: the target hash_bucket + * @key2: the new key for the requeued futex_q + */ +static inline +void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1, + struct futex_hash_bucket *hb2, union futex_key *key2) +{ + + /* + * If key1 and key2 hash to the same bucket, no need to + * requeue. + */ + if (likely(&hb1->chain != &hb2->chain)) { + plist_del(&q->list, &hb1->chain); + plist_add(&q->list, &hb2->chain); + q->lock_ptr = &hb2->lock; +#ifdef CONFIG_DEBUG_PI_LIST + q->list.plist.lock = &hb2->lock; +#endif + } + get_futex_key_refs(key2); + q->key = *key2; +} + /* * Requeue all waiters hashed on one physical page to another * physical page. @@ -999,20 +1027,7 @@ retry_private: if (++ret <= nr_wake) { wake_futex(this); } else { - /* - * If key1 and key2 hash to the same bucket, no need to - * requeue. - */ - if (likely(head1 != &hb2->chain)) { - plist_del(&this->list, &hb1->chain); - plist_add(&this->list, &hb2->chain); - this->lock_ptr = &hb2->lock; -#ifdef CONFIG_DEBUG_PI_LIST - this->list.plist.lock = &hb2->lock; -#endif - } - this->key = key2; - get_futex_key_refs(&key2); + requeue_futex(this, hb1, hb2, &key2); drop_count++; if (ret - nr_wake >= nr_requeue) -- cgit v0.10.2 From f801073f87aa22ddf0e9146355fec3993163790f Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Fri, 3 Apr 2009 13:40:40 -0700 Subject: futex: split out futex value validation code Refactor the code to validate the expected futex value in order to reuse it with the requeue_pi code. Signed-off-by: Darren Hart Reviewed-by: Thomas Gleixner Signed-off-by: Thomas Gleixner diff --git a/kernel/futex.c b/kernel/futex.c index e76942e..dbe857a 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1398,42 +1398,29 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q, __set_current_state(TASK_RUNNING); } -static int futex_wait(u32 __user *uaddr, int fshared, - u32 val, ktime_t *abs_time, u32 bitset, int clockrt) +/** + * futex_wait_setup() - Prepare to wait on a futex + * @uaddr: the futex userspace address + * @val: the expected value + * @fshared: whether the futex is shared (1) or not (0) + * @q: the associated futex_q + * @hb: storage for hash_bucket pointer to be returned to caller + * + * Setup the futex_q and locate the hash_bucket. Get the futex value and + * compare it with the expected value. Handle atomic faults internally. + * Return with the hb lock held and a q.key reference on success, and unlocked + * with no q.key reference on failure. + * + * Returns: + * 0 - uaddr contains val and hb has been locked + * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlcoked + */ +static int futex_wait_setup(u32 __user *uaddr, u32 val, int fshared, + struct futex_q *q, struct futex_hash_bucket **hb) { - struct hrtimer_sleeper timeout, *to = NULL; - DECLARE_WAITQUEUE(wait, current); - struct restart_block *restart; - struct futex_hash_bucket *hb; - struct futex_q q; u32 uval; int ret; - if (!bitset) - return -EINVAL; - - q.pi_state = NULL; - q.bitset = bitset; - - if (abs_time) { - to = &timeout; - - hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME : - CLOCK_MONOTONIC, HRTIMER_MODE_ABS); - hrtimer_init_sleeper(to, current); - hrtimer_set_expires_range_ns(&to->timer, *abs_time, - current->timer_slack_ns); - } - -retry: - q.key = FUTEX_KEY_INIT; - ret = get_futex_key(uaddr, fshared, &q.key); - if (unlikely(ret != 0)) - goto out; - -retry_private: - hb = queue_lock(&q); - /* * Access the page AFTER the hash-bucket is locked. * Order is important: @@ -1450,33 +1437,74 @@ retry_private: * A consequence is that futex_wait() can return zero and absorb * a wakeup when *uaddr != val on entry to the syscall. This is * rare, but normal. - * - * For shared futexes, we hold the mmap semaphore, so the mapping - * cannot have changed since we looked it up in get_futex_key. */ +retry: + q->key = FUTEX_KEY_INIT; + ret = get_futex_key(uaddr, fshared, &q->key); + if (unlikely(ret != 0)) + goto out; + +retry_private: + *hb = queue_lock(q); + ret = get_futex_value_locked(&uval, uaddr); - if (unlikely(ret)) { - queue_unlock(&q, hb); + if (ret) { + queue_unlock(q, *hb); ret = get_user(uval, uaddr); if (ret) - goto out_put_key; + goto out; if (!fshared) goto retry_private; - put_futex_key(fshared, &q.key); + put_futex_key(fshared, &q->key); goto retry; } - ret = -EWOULDBLOCK; - /* Only actually queue if *uaddr contained val. */ - if (unlikely(uval != val)) { - queue_unlock(&q, hb); - goto out_put_key; + if (uval != val) { + queue_unlock(q, *hb); + ret = -EWOULDBLOCK; } +out: + if (ret) + put_futex_key(fshared, &q->key); + return ret; +} + +static int futex_wait(u32 __user *uaddr, int fshared, + u32 val, ktime_t *abs_time, u32 bitset, int clockrt) +{ + struct hrtimer_sleeper timeout, *to = NULL; + DECLARE_WAITQUEUE(wait, current); + struct restart_block *restart; + struct futex_hash_bucket *hb; + struct futex_q q; + int ret; + + if (!bitset) + return -EINVAL; + + q.pi_state = NULL; + q.bitset = bitset; + + if (abs_time) { + to = &timeout; + + hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME : + CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + hrtimer_init_sleeper(to, current); + hrtimer_set_expires_range_ns(&to->timer, *abs_time, + current->timer_slack_ns); + } + + /* Prepare to wait on uaddr. */ + ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); + if (ret) + goto out; + /* queue_me and wait for wakeup, timeout, or a signal. */ futex_wait_queue_me(hb, &q, to, &wait); -- cgit v0.10.2 From 52400ba946759af28442dee6265c5c0180ac7122 Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Fri, 3 Apr 2009 13:40:49 -0700 Subject: futex: add requeue_pi functionality PI Futexes and their underlying rt_mutex cannot be left ownerless if there are pending waiters as this will break the PI boosting logic, so the standard requeue commands aren't sufficient. The new commands properly manage pi futex ownership by ensuring a futex with waiters has an owner at all times. This will allow glibc to properly handle pi mutexes with pthread_condvars. The approach taken here is to create two new futex op codes: FUTEX_WAIT_REQUEUE_PI: Tasks will use this op code to wait on a futex (such as a non-pi waitqueue) and wake after they have been requeued to a pi futex. Prior to returning to userspace, they will acquire this pi futex (and the underlying rt_mutex). futex_wait_requeue_pi() is the result of a high speed collision between futex_wait() and futex_lock_pi() (with the first part of futex_lock_pi() being done by futex_proxy_trylock_atomic() on behalf of the top_waiter). FUTEX_REQUEUE_PI (and FUTEX_CMP_REQUEUE_PI): This call must be used to wake tasks waiting with FUTEX_WAIT_REQUEUE_PI, regardless of how many tasks the caller intends to wake or requeue. pthread_cond_broadcast() should call this with nr_wake=1 and nr_requeue=INT_MAX. pthread_cond_signal() should call this with nr_wake=1 and nr_requeue=0. The reason being we need both callers to get the benefit of the futex_proxy_trylock_atomic() routine. futex_requeue() also enqueues the top_waiter on the rt_mutex via rt_mutex_start_proxy_lock(). Signed-off-by: Darren Hart Reviewed-by: Thomas Gleixner Signed-off-by: Thomas Gleixner diff --git a/include/linux/futex.h b/include/linux/futex.h index 3bf5bb5..b05519c 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -23,6 +23,9 @@ union ktime; #define FUTEX_TRYLOCK_PI 8 #define FUTEX_WAIT_BITSET 9 #define FUTEX_WAKE_BITSET 10 +#define FUTEX_WAIT_REQUEUE_PI 11 +#define FUTEX_REQUEUE_PI 12 +#define FUTEX_CMP_REQUEUE_PI 13 #define FUTEX_PRIVATE_FLAG 128 #define FUTEX_CLOCK_REALTIME 256 @@ -38,6 +41,11 @@ union ktime; #define FUTEX_TRYLOCK_PI_PRIVATE (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG) #define FUTEX_WAIT_BITSET_PRIVATE (FUTEX_WAIT_BITS | FUTEX_PRIVATE_FLAG) #define FUTEX_WAKE_BITSET_PRIVATE (FUTEX_WAKE_BITS | FUTEX_PRIVATE_FLAG) +#define FUTEX_WAIT_REQUEUE_PI_PRIVATE (FUTEX_WAIT_REQUEUE_PI | \ + FUTEX_PRIVATE_FLAG) +#define FUTEX_REQUEUE_PI_PRIVATE (FUTEX_REQUEUE_PI | FUTEX_PRIVATE_FLAG) +#define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \ + FUTEX_PRIVATE_FLAG) /* * Support for robust futexes: the kernel cleans up held futexes at diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index e6b820f..a8cc4e1 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -21,13 +21,14 @@ struct restart_block { struct { unsigned long arg0, arg1, arg2, arg3; }; - /* For futex_wait */ + /* For futex_wait and futex_wait_requeue_pi */ struct { u32 *uaddr; u32 val; u32 flags; u32 bitset; u64 time; + u32 *uaddr2; } futex; /* For nanosleep */ struct { diff --git a/kernel/futex.c b/kernel/futex.c index dbe857a..185c981 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -19,6 +19,10 @@ * PRIVATE futexes by Eric Dumazet * Copyright (C) 2007 Eric Dumazet * + * Requeue-PI support by Darren Hart + * Copyright (C) IBM Corporation, 2009 + * Thanks to Thomas Gleixner for conceptual design and careful reviews. + * * Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly * enough at me, Linus for the original (flawed) idea, Matthew * Kirkwood for proof-of-concept implementation. @@ -109,6 +113,9 @@ struct futex_q { struct futex_pi_state *pi_state; struct task_struct *task; + /* rt_waiter storage for requeue_pi: */ + struct rt_mutex_waiter *rt_waiter; + /* Bitset for the optional bitmasked wakeup */ u32 bitset; }; @@ -827,7 +834,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset) plist_for_each_entry_safe(this, next, head, list) { if (match_futex (&this->key, &key)) { - if (this->pi_state) { + if (this->pi_state || this->rt_waiter) { ret = -EINVAL; break; } @@ -968,20 +975,138 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1, q->key = *key2; } -/* - * Requeue all waiters hashed on one physical page to another - * physical page. +/** + * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue + * q: the futex_q + * key: the key of the requeue target futex + * + * During futex_requeue, with requeue_pi=1, it is possible to acquire the + * target futex if it is uncontended or via a lock steal. Set the futex_q key + * to the requeue target futex so the waiter can detect the wakeup on the right + * futex, but remove it from the hb and NULL the rt_waiter so it can detect + * atomic lock acquisition. Must be called with the q->lock_ptr held. + */ +static inline +void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key) +{ + drop_futex_key_refs(&q->key); + get_futex_key_refs(key); + q->key = *key; + + WARN_ON(plist_node_empty(&q->list)); + plist_del(&q->list, &q->list.plist); + + WARN_ON(!q->rt_waiter); + q->rt_waiter = NULL; + + wake_up(&q->waiter); +} + +/** + * futex_proxy_trylock_atomic() - Attempt an atomic lock for the top waiter + * @pifutex: the user address of the to futex + * @hb1: the from futex hash bucket, must be locked by the caller + * @hb2: the to futex hash bucket, must be locked by the caller + * @key1: the from futex key + * @key2: the to futex key + * + * Try and get the lock on behalf of the top waiter if we can do it atomically. + * Wake the top waiter if we succeed. hb1 and hb2 must be held by the caller. + * + * Returns: + * 0 - failed to acquire the lock atomicly + * 1 - acquired the lock + * <0 - error + */ +static int futex_proxy_trylock_atomic(u32 __user *pifutex, + struct futex_hash_bucket *hb1, + struct futex_hash_bucket *hb2, + union futex_key *key1, union futex_key *key2, + struct futex_pi_state **ps) +{ + struct futex_q *top_waiter; + u32 curval; + int ret; + + if (get_futex_value_locked(&curval, pifutex)) + return -EFAULT; + + top_waiter = futex_top_waiter(hb1, key1); + + /* There are no waiters, nothing for us to do. */ + if (!top_waiter) + return 0; + + /* + * Either take the lock for top_waiter or set the FUTEX_WAITERS bit. + * The pi_state is returned in ps in contended cases. + */ + ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task); + if (ret == 1) + requeue_pi_wake_futex(top_waiter, key2); + + return ret; +} + +/** + * futex_requeue() - Requeue waiters from uaddr1 to uaddr2 + * uaddr1: source futex user address + * uaddr2: target futex user address + * nr_wake: number of waiters to wake (must be 1 for requeue_pi) + * nr_requeue: number of waiters to requeue (0-INT_MAX) + * requeue_pi: if we are attempting to requeue from a non-pi futex to a + * pi futex (pi to pi requeue is not supported) + * + * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire + * uaddr2 atomically on behalf of the top waiter. + * + * Returns: + * >=0 - on success, the number of tasks requeued or woken + * <0 - on error */ static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, - int nr_wake, int nr_requeue, u32 *cmpval) + int nr_wake, int nr_requeue, u32 *cmpval, + int requeue_pi) { union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; + int drop_count = 0, task_count = 0, ret; + struct futex_pi_state *pi_state = NULL; struct futex_hash_bucket *hb1, *hb2; struct plist_head *head1; struct futex_q *this, *next; - int ret, drop_count = 0; + u32 curval2; + + if (requeue_pi) { + /* + * requeue_pi requires a pi_state, try to allocate it now + * without any locks in case it fails. + */ + if (refill_pi_state_cache()) + return -ENOMEM; + /* + * requeue_pi must wake as many tasks as it can, up to nr_wake + * + nr_requeue, since it acquires the rt_mutex prior to + * returning to userspace, so as to not leave the rt_mutex with + * waiters and no owner. However, second and third wake-ups + * cannot be predicted as they involve race conditions with the + * first wake and a fault while looking up the pi_state. Both + * pthread_cond_signal() and pthread_cond_broadcast() should + * use nr_wake=1. + */ + if (nr_wake != 1) + return -EINVAL; + } retry: + if (pi_state != NULL) { + /* + * We will have to lookup the pi_state again, so free this one + * to keep the accounting correct. + */ + free_pi_state(pi_state); + pi_state = NULL; + } + ret = get_futex_key(uaddr1, fshared, &key1); if (unlikely(ret != 0)) goto out; @@ -1020,19 +1145,94 @@ retry_private: } } + if (requeue_pi && (task_count - nr_wake < nr_requeue)) { + /* Attempt to acquire uaddr2 and wake the top_waiter. */ + ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1, + &key2, &pi_state); + + /* + * At this point the top_waiter has either taken uaddr2 or is + * waiting on it. If the former, then the pi_state will not + * exist yet, look it up one more time to ensure we have a + * reference to it. + */ + if (ret == 1) { + WARN_ON(pi_state); + task_count++; + ret = get_futex_value_locked(&curval2, uaddr2); + if (!ret) + ret = lookup_pi_state(curval2, hb2, &key2, + &pi_state); + } + + switch (ret) { + case 0: + break; + case -EFAULT: + double_unlock_hb(hb1, hb2); + put_futex_key(fshared, &key2); + put_futex_key(fshared, &key1); + ret = get_user(curval2, uaddr2); + if (!ret) + goto retry; + goto out; + case -EAGAIN: + /* The owner was exiting, try again. */ + double_unlock_hb(hb1, hb2); + put_futex_key(fshared, &key2); + put_futex_key(fshared, &key1); + cond_resched(); + goto retry; + default: + goto out_unlock; + } + } + head1 = &hb1->chain; plist_for_each_entry_safe(this, next, head1, list) { - if (!match_futex (&this->key, &key1)) + if (task_count - nr_wake >= nr_requeue) + break; + + if (!match_futex(&this->key, &key1)) continue; - if (++ret <= nr_wake) { + + WARN_ON(!requeue_pi && this->rt_waiter); + WARN_ON(requeue_pi && !this->rt_waiter); + + /* + * Wake nr_wake waiters. For requeue_pi, if we acquired the + * lock, we already woke the top_waiter. If not, it will be + * woken by futex_unlock_pi(). + */ + if (++task_count <= nr_wake && !requeue_pi) { wake_futex(this); - } else { - requeue_futex(this, hb1, hb2, &key2); - drop_count++; + continue; + } - if (ret - nr_wake >= nr_requeue) - break; + /* + * Requeue nr_requeue waiters and possibly one more in the case + * of requeue_pi if we couldn't acquire the lock atomically. + */ + if (requeue_pi) { + /* Prepare the waiter to take the rt_mutex. */ + atomic_inc(&pi_state->refcount); + this->pi_state = pi_state; + ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex, + this->rt_waiter, + this->task, 1); + if (ret == 1) { + /* We got the lock. */ + requeue_pi_wake_futex(this, &key2); + continue; + } else if (ret) { + /* -EDEADLK */ + this->pi_state = NULL; + free_pi_state(pi_state); + goto out_unlock; + } } + requeue_futex(this, hb1, hb2, &key2); + drop_count++; } out_unlock: @@ -1047,7 +1247,9 @@ out_put_keys: out_put_key1: put_futex_key(fshared, &key1); out: - return ret; + if (pi_state != NULL) + free_pi_state(pi_state); + return ret ? ret : task_count; } /* The key must be already stored in q->key. */ @@ -1270,6 +1472,7 @@ handle_fault: #define FLAGS_HAS_TIMEOUT 0x04 static long futex_wait_restart(struct restart_block *restart); +static long futex_lock_pi_restart(struct restart_block *restart); /** * fixup_owner() - Post lock pi_state and corner case management @@ -1489,6 +1692,7 @@ static int futex_wait(u32 __user *uaddr, int fshared, q.pi_state = NULL; q.bitset = bitset; + q.rt_waiter = NULL; if (abs_time) { to = &timeout; @@ -1596,6 +1800,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared, } q.pi_state = NULL; + q.rt_waiter = NULL; retry: q.key = FUTEX_KEY_INIT; ret = get_futex_key(uaddr, fshared, &q.key); @@ -1701,6 +1906,20 @@ uaddr_faulted: goto retry; } +static long futex_lock_pi_restart(struct restart_block *restart) +{ + u32 __user *uaddr = (u32 __user *)restart->futex.uaddr; + ktime_t t, *tp = NULL; + int fshared = restart->futex.flags & FLAGS_SHARED; + + if (restart->futex.flags & FLAGS_HAS_TIMEOUT) { + t.tv64 = restart->futex.time; + tp = &t; + } + restart->fn = do_no_restart_syscall; + + return (long)futex_lock_pi(uaddr, fshared, restart->futex.val, tp, 0); +} /* * Userspace attempted a TID -> 0 atomic transition, and failed. @@ -1803,6 +2022,253 @@ pi_faulted: return ret; } +/** + * handle_early_requeue_pi_wakeup() - Detect early wakeup on the initial futex + * @hb: the hash_bucket futex_q was original enqueued on + * @q: the futex_q woken while waiting to be requeued + * @key2: the futex_key of the requeue target futex + * @timeout: the timeout associated with the wait (NULL if none) + * + * Detect if the task was woken on the initial futex as opposed to the requeue + * target futex. If so, determine if it was a timeout or a signal that caused + * the wakeup and return the appropriate error code to the caller. Must be + * called with the hb lock held. + * + * Returns + * 0 - no early wakeup detected + * <0 - -ETIMEDOUT or -ERESTARTSYS (FIXME: or ERESTARTNOINTR?) + */ +static inline +int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, + struct futex_q *q, union futex_key *key2, + struct hrtimer_sleeper *timeout) +{ + int ret = 0; + + /* + * With the hb lock held, we avoid races while we process the wakeup. + * We only need to hold hb (and not hb2) to ensure atomicity as the + * wakeup code can't change q.key from uaddr to uaddr2 if we hold hb. + * It can't be requeued from uaddr2 to something else since we don't + * support a PI aware source futex for requeue. + */ + if (!match_futex(&q->key, key2)) { + WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr)); + /* + * We were woken prior to requeue by a timeout or a signal. + * Unqueue the futex_q and determine which it was. + */ + plist_del(&q->list, &q->list.plist); + drop_futex_key_refs(&q->key); + + if (timeout && !timeout->task) + ret = -ETIMEDOUT; + else { + /* + * We expect signal_pending(current), but another + * thread may have handled it for us already. + */ + /* FIXME: ERESTARTSYS or ERESTARTNOINTR? Do we care if + * the user specified SA_RESTART or not? */ + ret = -ERESTARTSYS; + } + } + return ret; +} + +/** + * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2 + * @uaddr: the futex we initialyl wait on (non-pi) + * @fshared: whether the futexes are shared (1) or not (0). They must be + * the same type, no requeueing from private to shared, etc. + * @val: the expected value of uaddr + * @abs_time: absolute timeout + * @bitset: 32 bit wakeup bitset set by userspace, defaults to all. + * @clockrt: whether to use CLOCK_REALTIME (1) or CLOCK_MONOTONIC (0) + * @uaddr2: the pi futex we will take prior to returning to user-space + * + * The caller will wait on uaddr and will be requeued by futex_requeue() to + * uaddr2 which must be PI aware. Normal wakeup will wake on uaddr2 and + * complete the acquisition of the rt_mutex prior to returning to userspace. + * This ensures the rt_mutex maintains an owner when it has waiters; without + * one, the pi logic wouldn't know which task to boost/deboost, if there was a + * need to. + * + * We call schedule in futex_wait_queue_me() when we enqueue and return there + * via the following: + * 1) wakeup on uaddr2 after an atomic lock acquisition by futex_requeue() + * 2) wakeup on uaddr2 after a requeue and subsequent unlock + * 3) signal (before or after requeue) + * 4) timeout (before or after requeue) + * + * If 3, we setup a restart_block with futex_wait_requeue_pi() as the function. + * + * If 2, we may then block on trying to take the rt_mutex and return via: + * 5) successful lock + * 6) signal + * 7) timeout + * 8) other lock acquisition failure + * + * If 6, we setup a restart_block with futex_lock_pi() as the function. + * + * If 4 or 7, we cleanup and return with -ETIMEDOUT. + * + * Returns: + * 0 - On success + * <0 - On error + */ +static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, + u32 val, ktime_t *abs_time, u32 bitset, + int clockrt, u32 __user *uaddr2) +{ + struct hrtimer_sleeper timeout, *to = NULL; + struct rt_mutex_waiter rt_waiter; + struct rt_mutex *pi_mutex = NULL; + DECLARE_WAITQUEUE(wait, current); + struct restart_block *restart; + struct futex_hash_bucket *hb; + union futex_key key2; + struct futex_q q; + int res, ret; + u32 uval; + + if (!bitset) + return -EINVAL; + + if (abs_time) { + to = &timeout; + hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME : + CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + hrtimer_init_sleeper(to, current); + hrtimer_set_expires_range_ns(&to->timer, *abs_time, + current->timer_slack_ns); + } + + /* + * The waiter is allocated on our stack, manipulated by the requeue + * code while we sleep on uaddr. + */ + debug_rt_mutex_init_waiter(&rt_waiter); + rt_waiter.task = NULL; + + q.pi_state = NULL; + q.bitset = bitset; + q.rt_waiter = &rt_waiter; + + key2 = FUTEX_KEY_INIT; + ret = get_futex_key(uaddr2, fshared, &key2); + if (unlikely(ret != 0)) + goto out; + + /* Prepare to wait on uaddr. */ + ret = futex_wait_setup(uaddr, val, fshared, &q, &hb); + if (ret) { + put_futex_key(fshared, &key2); + goto out; + } + + /* Queue the futex_q, drop the hb lock, wait for wakeup. */ + futex_wait_queue_me(hb, &q, to, &wait); + + spin_lock(&hb->lock); + ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to); + spin_unlock(&hb->lock); + if (ret) + goto out_put_keys; + + /* + * In order for us to be here, we know our q.key == key2, and since + * we took the hb->lock above, we also know that futex_requeue() has + * completed and we no longer have to concern ourselves with a wakeup + * race with the atomic proxy lock acquition by the requeue code. + */ + + /* Check if the requeue code acquired the second futex for us. */ + if (!q.rt_waiter) { + /* + * Got the lock. We might not be the anticipated owner if we + * did a lock-steal - fix up the PI-state in that case. + */ + if (q.pi_state && (q.pi_state->owner != current)) { + spin_lock(q.lock_ptr); + ret = fixup_pi_state_owner(uaddr2, &q, current, + fshared); + spin_unlock(q.lock_ptr); + } + } else { + /* + * We have been woken up by futex_unlock_pi(), a timeout, or a + * signal. futex_unlock_pi() will not destroy the lock_ptr nor + * the pi_state. + */ + WARN_ON(!&q.pi_state); + pi_mutex = &q.pi_state->pi_mutex; + ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1); + debug_rt_mutex_free_waiter(&rt_waiter); + + spin_lock(q.lock_ptr); + /* + * Fixup the pi_state owner and possibly acquire the lock if we + * haven't already. + */ + res = fixup_owner(uaddr2, fshared, &q, !ret); + /* + * If fixup_owner() returned an error, proprogate that. If it + * acquired the lock, clear our -ETIMEDOUT or -EINTR. + */ + if (res) + ret = (res < 0) ? res : 0; + + /* Unqueue and drop the lock. */ + unqueue_me_pi(&q); + } + + /* + * If fixup_pi_state_owner() faulted and was unable to handle the + * fault, unlock the rt_mutex and return the fault to userspace. + */ + if (ret == -EFAULT) { + if (rt_mutex_owner(pi_mutex) == current) + rt_mutex_unlock(pi_mutex); + } else if (ret == -EINTR) { + ret = -EFAULT; + if (get_user(uval, uaddr2)) + goto out_put_keys; + + /* + * We've already been requeued, so restart by calling + * futex_lock_pi() directly, rather then returning to this + * function. + */ + ret = -ERESTART_RESTARTBLOCK; + restart = ¤t_thread_info()->restart_block; + restart->fn = futex_lock_pi_restart; + restart->futex.uaddr = (u32 *)uaddr2; + restart->futex.val = uval; + restart->futex.flags = 0; + if (abs_time) { + restart->futex.flags |= FLAGS_HAS_TIMEOUT; + restart->futex.time = abs_time->tv64; + } + + if (fshared) + restart->futex.flags |= FLAGS_SHARED; + if (clockrt) + restart->futex.flags |= FLAGS_CLOCKRT; + } + +out_put_keys: + put_futex_key(fshared, &q.key); + put_futex_key(fshared, &key2); + +out: + if (to) { + hrtimer_cancel(&to->timer); + destroy_hrtimer_on_stack(&to->timer); + } + return ret; +} + /* * Support for robust futexes: the kernel cleans up held futexes at * thread exit time. @@ -2025,7 +2491,7 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, fshared = 1; clockrt = op & FUTEX_CLOCK_REALTIME; - if (clockrt && cmd != FUTEX_WAIT_BITSET) + if (clockrt && cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI) return -ENOSYS; switch (cmd) { @@ -2040,10 +2506,11 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, ret = futex_wake(uaddr, fshared, val, val3); break; case FUTEX_REQUEUE: - ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL); + ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 0); break; case FUTEX_CMP_REQUEUE: - ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3); + ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, + 0); break; case FUTEX_WAKE_OP: ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3); @@ -2060,6 +2527,18 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, if (futex_cmpxchg_enabled) ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1); break; + case FUTEX_WAIT_REQUEUE_PI: + val3 = FUTEX_BITSET_MATCH_ANY; + ret = futex_wait_requeue_pi(uaddr, fshared, val, timeout, val3, + clockrt, uaddr2); + break; + case FUTEX_REQUEUE_PI: + ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 1); + break; + case FUTEX_CMP_REQUEUE_PI: + ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, + 1); + break; default: ret = -ENOSYS; } @@ -2077,7 +2556,8 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, int cmd = op & FUTEX_CMD_MASK; if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || - cmd == FUTEX_WAIT_BITSET)) { + cmd == FUTEX_WAIT_BITSET || + cmd == FUTEX_WAIT_REQUEUE_PI)) { if (copy_from_user(&ts, utime, sizeof(ts)) != 0) return -EFAULT; if (!timespec_valid(&ts)) @@ -2089,10 +2569,11 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, tp = &t; } /* - * requeue parameter in 'utime' if cmd == FUTEX_REQUEUE. + * requeue parameter in 'utime' if cmd == FUTEX_*_REQUEUE_*. * number of waiters to wake in 'utime' if cmd == FUTEX_WAKE_OP. */ if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE || + cmd == FUTEX_REQUEUE_PI || cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP) val2 = (u32) (unsigned long) utime; -- cgit v0.10.2 From 80fbe6ac9b47cbc11e174a9bf853834dc281da35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Gl=C3=B6ckner?= Date: Mon, 6 Apr 2009 11:50:22 +0200 Subject: ASoC: correct s6000 I2S clock polarity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the data sheet data is clocked out on the falling edge and latched on the rising edge of the bit clock. While the left sample is transmitted the word clock line is low. Signed-off-by: Daniel Glöckner Signed-off-by: Mark Brown diff --git a/sound/soc/s6000/s6000-i2s.c b/sound/soc/s6000/s6000-i2s.c index dcc7904..c5cda18 100644 --- a/sound/soc/s6000/s6000-i2s.c +++ b/sound/soc/s6000/s6000-i2s.c @@ -252,10 +252,10 @@ static int s6000_i2s_set_dai_fmt(struct snd_soc_dai *cpu_dai, } switch (fmt & SND_SOC_DAIFMT_INV_MASK) { - case SND_SOC_DAIFMT_IB_IF: + case SND_SOC_DAIFMT_NB_NF: w |= S6_I2S_LEFT_FIRST; break; - case SND_SOC_DAIFMT_IB_NF: + case SND_SOC_DAIFMT_NB_IF: w |= S6_I2S_RIGHT_FIRST; break; default: diff --git a/sound/soc/s6000/s6105-ipcam.c b/sound/soc/s6000/s6105-ipcam.c index 21c4f55..b5f95f9 100644 --- a/sound/soc/s6000/s6105-ipcam.c +++ b/sound/soc/s6000/s6105-ipcam.c @@ -43,7 +43,7 @@ static int s6105_hw_params(struct snd_pcm_substream *substream, /* set cpu DAI configuration */ ret = snd_soc_dai_set_fmt(cpu_dai, SND_SOC_DAIFMT_CBM_CFM | - SND_SOC_DAIFMT_IB_IF); + SND_SOC_DAIFMT_NB_NF); if (ret < 0) return ret; -- cgit v0.10.2 From 7ba5779533819fc061b4afafcb4a609d55f37057 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 6 Apr 2009 20:49:14 +0900 Subject: tomoyo: remove "undelete domain" command. Since TOMOYO's policy management tools does not use the "undelete domain" command, we decided to remove that command. Signed-off-by: Kentaro Takeda Signed-off-by: Tetsuo Handa Signed-off-by: Toshiharu Harada Signed-off-by: James Morris diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c index 92cea65..a0affd9 100644 --- a/security/tomoyo/common.c +++ b/security/tomoyo/common.c @@ -1252,15 +1252,12 @@ static int tomoyo_write_domain_policy(struct tomoyo_io_buffer *head) struct tomoyo_domain_info *domain = head->write_var1; bool is_delete = false; bool is_select = false; - bool is_undelete = false; unsigned int profile; if (tomoyo_str_starts(&data, TOMOYO_KEYWORD_DELETE)) is_delete = true; else if (tomoyo_str_starts(&data, TOMOYO_KEYWORD_SELECT)) is_select = true; - else if (tomoyo_str_starts(&data, TOMOYO_KEYWORD_UNDELETE)) - is_undelete = true; if (is_select && tomoyo_is_select_one(head, data)) return 0; /* Don't allow updating policies by non manager programs. */ @@ -1274,9 +1271,7 @@ static int tomoyo_write_domain_policy(struct tomoyo_io_buffer *head) down_read(&tomoyo_domain_list_lock); domain = tomoyo_find_domain(data); up_read(&tomoyo_domain_list_lock); - } else if (is_undelete) - domain = tomoyo_undelete_domain(data); - else + } else domain = tomoyo_find_or_assign_new_domain(data, 0); head->write_var1 = domain; return 0; diff --git a/security/tomoyo/common.h b/security/tomoyo/common.h index 26a76d6..e77e6a6 100644 --- a/security/tomoyo/common.h +++ b/security/tomoyo/common.h @@ -88,10 +88,7 @@ struct tomoyo_domain_info { /* Name of this domain. Never NULL. */ const struct tomoyo_path_info *domainname; u8 profile; /* Profile number to use. */ - u8 is_deleted; /* Delete flag. - 0 = active. - 1 = deleted but undeletable. - 255 = deleted and no longer undeletable. */ + bool is_deleted; /* Delete flag. */ bool quota_warned; /* Quota warnning flag. */ /* DOMAIN_FLAGS_*. Use tomoyo_set_domain_flag() to modify. */ u8 flags; @@ -144,7 +141,6 @@ struct tomoyo_double_path_acl_record { #define TOMOYO_KEYWORD_NO_INITIALIZE_DOMAIN "no_initialize_domain " #define TOMOYO_KEYWORD_NO_KEEP_DOMAIN "no_keep_domain " #define TOMOYO_KEYWORD_SELECT "select " -#define TOMOYO_KEYWORD_UNDELETE "undelete " #define TOMOYO_KEYWORD_USE_PROFILE "use_profile " #define TOMOYO_KEYWORD_IGNORE_GLOBAL_ALLOW_READ "ignore_global_allow_read" /* A domain definition starts with . */ @@ -267,8 +263,6 @@ struct tomoyo_domain_info *tomoyo_find_domain(const char *domainname); struct tomoyo_domain_info *tomoyo_find_or_assign_new_domain(const char * domainname, const u8 profile); -/* Undelete a domain. */ -struct tomoyo_domain_info *tomoyo_undelete_domain(const char *domainname); /* Check mode for specified functionality. */ unsigned int tomoyo_check_flags(const struct tomoyo_domain_info *domain, const u8 index); diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c index 093a756..2f2b449 100644 --- a/security/tomoyo/domain.c +++ b/security/tomoyo/domain.c @@ -551,9 +551,7 @@ int tomoyo_write_alias_policy(char *data, const bool is_delete) return tomoyo_update_alias_entry(data, cp, is_delete); } -/* Domain create/delete/undelete handler. */ - -/* #define TOMOYO_DEBUG_DOMAIN_UNDELETE */ +/* Domain create/delete handler. */ /** * tomoyo_delete_domain - Delete a domain. @@ -571,41 +569,15 @@ int tomoyo_delete_domain(char *domainname) tomoyo_fill_path_info(&name); /***** EXCLUSIVE SECTION START *****/ down_write(&tomoyo_domain_list_lock); -#ifdef TOMOYO_DEBUG_DOMAIN_UNDELETE - printk(KERN_DEBUG "tomoyo_delete_domain %s\n", domainname); - list_for_each_entry(domain, &tomoyo_domain_list, list) { - if (tomoyo_pathcmp(domain->domainname, &name)) - continue; - printk(KERN_DEBUG "List: %p %u\n", domain, domain->is_deleted); - } -#endif /* Is there an active domain? */ list_for_each_entry(domain, &tomoyo_domain_list, list) { - struct tomoyo_domain_info *domain2; /* Never delete tomoyo_kernel_domain */ if (domain == &tomoyo_kernel_domain) continue; if (domain->is_deleted || tomoyo_pathcmp(domain->domainname, &name)) continue; - /* Mark already deleted domains as non undeletable. */ - list_for_each_entry(domain2, &tomoyo_domain_list, list) { - if (!domain2->is_deleted || - tomoyo_pathcmp(domain2->domainname, &name)) - continue; -#ifdef TOMOYO_DEBUG_DOMAIN_UNDELETE - if (domain2->is_deleted != 255) - printk(KERN_DEBUG - "Marked %p as non undeletable\n", - domain2); -#endif - domain2->is_deleted = 255; - } - /* Delete and mark active domain as undeletable. */ - domain->is_deleted = 1; -#ifdef TOMOYO_DEBUG_DOMAIN_UNDELETE - printk(KERN_DEBUG "Marked %p as undeletable\n", domain); -#endif + domain->is_deleted = true; break; } up_write(&tomoyo_domain_list_lock); @@ -614,58 +586,6 @@ int tomoyo_delete_domain(char *domainname) } /** - * tomoyo_undelete_domain - Undelete a domain. - * - * @domainname: The name of domain. - * - * Returns pointer to "struct tomoyo_domain_info" on success, NULL otherwise. - */ -struct tomoyo_domain_info *tomoyo_undelete_domain(const char *domainname) -{ - struct tomoyo_domain_info *domain; - struct tomoyo_domain_info *candidate_domain = NULL; - struct tomoyo_path_info name; - - name.name = domainname; - tomoyo_fill_path_info(&name); - /***** EXCLUSIVE SECTION START *****/ - down_write(&tomoyo_domain_list_lock); -#ifdef TOMOYO_DEBUG_DOMAIN_UNDELETE - printk(KERN_DEBUG "tomoyo_undelete_domain %s\n", domainname); - list_for_each_entry(domain, &tomoyo_domain_list, list) { - if (tomoyo_pathcmp(domain->domainname, &name)) - continue; - printk(KERN_DEBUG "List: %p %u\n", domain, domain->is_deleted); - } -#endif - list_for_each_entry(domain, &tomoyo_domain_list, list) { - if (tomoyo_pathcmp(&name, domain->domainname)) - continue; - if (!domain->is_deleted) { - /* This domain is active. I can't undelete. */ - candidate_domain = NULL; -#ifdef TOMOYO_DEBUG_DOMAIN_UNDELETE - printk(KERN_DEBUG "%p is active. I can't undelete.\n", - domain); -#endif - break; - } - /* Is this domain undeletable? */ - if (domain->is_deleted == 1) - candidate_domain = domain; - } - if (candidate_domain) { - candidate_domain->is_deleted = 0; -#ifdef TOMOYO_DEBUG_DOMAIN_UNDELETE - printk(KERN_DEBUG "%p was undeleted.\n", candidate_domain); -#endif - } - up_write(&tomoyo_domain_list_lock); - /***** EXCLUSIVE SECTION END *****/ - return candidate_domain; -} - -/** * tomoyo_find_or_assign_new_domain - Create a domain. * * @domainname: The name of domain. @@ -711,10 +631,6 @@ struct tomoyo_domain_info *tomoyo_find_or_assign_new_domain(const char * /***** CRITICAL SECTION END *****/ if (flag) continue; -#ifdef TOMOYO_DEBUG_DOMAIN_UNDELETE - printk(KERN_DEBUG "Reusing %p %s\n", domain, - domain->domainname->name); -#endif list_for_each_entry(ptr, &domain->acl_info_list, list) { ptr->type |= TOMOYO_ACL_DELETED; } @@ -722,7 +638,7 @@ struct tomoyo_domain_info *tomoyo_find_or_assign_new_domain(const char * domain->profile = profile; domain->quota_warned = false; mb(); /* Avoid out-of-order execution. */ - domain->is_deleted = 0; + domain->is_deleted = false; goto out; } /* No memory reusable. Create using new memory. */ -- cgit v0.10.2 From a2e87d06ddbe6e6fdb8d6d2e5e985efe4efb07dd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:44:59 +0200 Subject: perf_counter: update mmap() counter read, take 2 Update the userspace read method. Paul noted that: - userspace cannot observe ->lock & 1 on the same cpu. - we need a barrier() between reading ->lock and ->index to ensure we read them in that prticular order. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094517.368446033@chello.nl> Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index f2b914d..e22ab47 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -170,22 +170,18 @@ struct perf_counter_mmap_page { * u32 seq; * s64 count; * - * again: - * seq = pc->lock; - * if (unlikely(seq & 1)) { - * cpu_relax(); - * goto again; - * } + * do { + * seq = pc->lock; * - * if (pc->index) { - * count = pmc_read(pc->index - 1); - * count += pc->offset; - * } else - * goto regular_read; + * barrier() + * if (pc->index) { + * count = pmc_read(pc->index - 1); + * count += pc->offset; + * } else + * goto regular_read; * - * barrier(); - * if (pc->lock != seq) - * goto again; + * barrier(); + * } while (pc->lock != seq); * * NOTE: for obvious reason this only works on self-monitoring * processes. -- cgit v0.10.2 From 9c03d88e328d5f28f13191622c2ea1349c36b799 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:45:00 +0200 Subject: perf_counter: add more context information Change the callchain context entries to u16, so as to gain some space. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094517.457320003@chello.nl> Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index e22ab47..f9d5cf0 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -507,10 +507,10 @@ extern void perf_counter_mmap(unsigned long addr, unsigned long len, extern void perf_counter_munmap(unsigned long addr, unsigned long len, unsigned long pgoff, struct file *file); -#define MAX_STACK_DEPTH 254 +#define MAX_STACK_DEPTH 255 struct perf_callchain_entry { - u32 nr, hv, kernel, user; + u16 nr, hv, kernel, user; u64 ip[MAX_STACK_DEPTH]; }; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 2a5d4f5..727624d 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1657,9 +1657,7 @@ void perf_counter_do_pending(void) * Callchain support -- arch specific */ -struct perf_callchain_entry * -__attribute__((weak)) -perf_callchain(struct pt_regs *regs) +__weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) { return NULL; } @@ -1819,7 +1817,7 @@ void perf_counter_output(struct perf_counter *counter, callchain = perf_callchain(regs); if (callchain) { - callchain_size = (2 + callchain->nr) * sizeof(u64); + callchain_size = (1 + callchain->nr) * sizeof(u64); header.type |= __PERF_EVENT_CALLCHAIN; header.size += callchain_size; -- cgit v0.10.2 From 3c446b3d3b38f991f97e9d2df0ad26a60a94dcff Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:45:01 +0200 Subject: perf_counter: SIGIO support Provide support for fcntl() I/O availability signals. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094517.579788800@chello.nl> Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index f9d5cf0..8d5d11b 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -238,6 +238,7 @@ enum perf_event_type { #include #include #include +#include #include struct task_struct; @@ -398,6 +399,7 @@ struct perf_counter { /* poll related */ wait_queue_head_t waitq; + struct fasync_struct *fasync; /* optional: for NMIs */ struct perf_wakeup_entry wakeup; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 727624d..c58cc64 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1526,6 +1526,22 @@ out: return ret; } +static int perf_fasync(int fd, struct file *filp, int on) +{ + struct perf_counter *counter = filp->private_data; + struct inode *inode = filp->f_path.dentry->d_inode; + int retval; + + mutex_lock(&inode->i_mutex); + retval = fasync_helper(fd, filp, on, &counter->fasync); + mutex_unlock(&inode->i_mutex); + + if (retval < 0) + return retval; + + return 0; +} + static const struct file_operations perf_fops = { .release = perf_release, .read = perf_read, @@ -1533,6 +1549,7 @@ static const struct file_operations perf_fops = { .unlocked_ioctl = perf_ioctl, .compat_ioctl = perf_ioctl, .mmap = perf_mmap, + .fasync = perf_fasync, }; /* @@ -1549,7 +1566,7 @@ void perf_counter_wakeup(struct perf_counter *counter) rcu_read_lock(); data = rcu_dereference(counter->data); if (data) { - (void)atomic_xchg(&data->wakeup, POLL_IN); + atomic_set(&data->wakeup, POLL_IN); /* * Ensure all data writes are issued before updating the * user-space data head information. The matching rmb() @@ -1561,6 +1578,7 @@ void perf_counter_wakeup(struct perf_counter *counter) rcu_read_unlock(); wake_up_all(&counter->waitq); + kill_fasync(&counter->fasync, SIGIO, POLL_IN); } /* -- cgit v0.10.2 From 671dec5daf3b3c43c5777be282f00120a44cf37f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:45:02 +0200 Subject: perf_counter: generalize pending infrastructure Prepare the pending infrastructure to do more than wakeups. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094517.634732847@chello.nl> Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 8d5d11b..977fb15 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -321,8 +321,9 @@ struct perf_mmap_data { void *data_pages[0]; }; -struct perf_wakeup_entry { - struct perf_wakeup_entry *next; +struct perf_pending_entry { + struct perf_pending_entry *next; + void (*func)(struct perf_pending_entry *); }; /** @@ -401,7 +402,7 @@ struct perf_counter { wait_queue_head_t waitq; struct fasync_struct *fasync; /* optional: for NMIs */ - struct perf_wakeup_entry wakeup; + struct perf_pending_entry pending; void (*destroy)(struct perf_counter *); struct rcu_head rcu_head; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index c58cc64..0a2ade2 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1581,6 +1581,14 @@ void perf_counter_wakeup(struct perf_counter *counter) kill_fasync(&counter->fasync, SIGIO, POLL_IN); } +static void perf_pending_wakeup(struct perf_pending_entry *entry) +{ + struct perf_counter *counter = container_of(entry, + struct perf_counter, pending); + + perf_counter_wakeup(counter); +} + /* * Pending wakeups * @@ -1590,45 +1598,47 @@ void perf_counter_wakeup(struct perf_counter *counter) * single linked list and use cmpxchg() to add entries lockless. */ -#define PENDING_TAIL ((struct perf_wakeup_entry *)-1UL) +#define PENDING_TAIL ((struct perf_pending_entry *)-1UL) -static DEFINE_PER_CPU(struct perf_wakeup_entry *, perf_wakeup_head) = { +static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = { PENDING_TAIL, }; -static void perf_pending_queue(struct perf_counter *counter) +static void perf_pending_queue(struct perf_pending_entry *entry, + void (*func)(struct perf_pending_entry *)) { - struct perf_wakeup_entry **head; - struct perf_wakeup_entry *prev, *next; + struct perf_pending_entry **head; - if (cmpxchg(&counter->wakeup.next, NULL, PENDING_TAIL) != NULL) + if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL) return; - head = &get_cpu_var(perf_wakeup_head); + entry->func = func; + + head = &get_cpu_var(perf_pending_head); do { - prev = counter->wakeup.next = *head; - next = &counter->wakeup; - } while (cmpxchg(head, prev, next) != prev); + entry->next = *head; + } while (cmpxchg(head, entry->next, entry) != entry->next); set_perf_counter_pending(); - put_cpu_var(perf_wakeup_head); + put_cpu_var(perf_pending_head); } static int __perf_pending_run(void) { - struct perf_wakeup_entry *list; + struct perf_pending_entry *list; int nr = 0; - list = xchg(&__get_cpu_var(perf_wakeup_head), PENDING_TAIL); + list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL); while (list != PENDING_TAIL) { - struct perf_counter *counter = container_of(list, - struct perf_counter, wakeup); + void (*func)(struct perf_pending_entry *); + struct perf_pending_entry *entry = list; list = list->next; - counter->wakeup.next = NULL; + func = entry->func; + entry->next = NULL; /* * Ensure we observe the unqueue before we issue the wakeup, * so that we won't be waiting forever. @@ -1636,7 +1646,7 @@ static int __perf_pending_run(void) */ smp_wmb(); - perf_counter_wakeup(counter); + func(entry); nr++; } @@ -1658,7 +1668,7 @@ static inline int perf_not_pending(struct perf_counter *counter) * so that we do not miss the wakeup. -- see perf_pending_handle() */ smp_rmb(); - return counter->wakeup.next == NULL; + return counter->pending.next == NULL; } static void perf_pending_sync(struct perf_counter *counter) @@ -1695,9 +1705,10 @@ struct perf_output_handle { static inline void __perf_output_wakeup(struct perf_output_handle *handle) { - if (handle->nmi) - perf_pending_queue(handle->counter); - else + if (handle->nmi) { + perf_pending_queue(&handle->counter->pending, + perf_pending_wakeup); + } else perf_counter_wakeup(handle->counter); } -- cgit v0.10.2 From b6276f353bf490add62dcf7db0ebd75baa3e1a37 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:45:03 +0200 Subject: perf_counter: x86: self-IPI for pending work Implement set_perf_counter_pending() with a self-IPI so that it will run ASAP in a usable context. For now use a second IRQ vector, because the primary vector pokes the apic in funny ways that seem to confuse things. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094517.724626696@chello.nl> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index c2e6bed..fe24d28 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h @@ -50,6 +50,7 @@ BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) #ifdef CONFIG_PERF_COUNTERS BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR) +BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR) #endif #ifdef CONFIG_X86_MCE_P4THERMAL diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 2545442..f5ebe2a 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -14,6 +14,7 @@ typedef struct { #endif unsigned int generic_irqs; /* arch dependent */ unsigned int apic_perf_irqs; + unsigned int apic_pending_irqs; #ifdef CONFIG_SMP unsigned int irq_resched_count; unsigned int irq_call_count; diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index ae80f64..7309c0a 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -30,6 +30,7 @@ extern void apic_timer_interrupt(void); extern void generic_interrupt(void); extern void error_interrupt(void); extern void perf_counter_interrupt(void); +extern void perf_pending_interrupt(void); extern void spurious_interrupt(void); extern void thermal_interrupt(void); diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 3cbd79b..545bb81 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -117,6 +117,11 @@ #define GENERIC_INTERRUPT_VECTOR 0xed /* + * Performance monitoring pending work vector: + */ +#define LOCAL_PENDING_VECTOR 0xec + +/* * First APIC vector available to drivers: (vectors 0x30-0xee) we * start at 0x31(0x41) to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index e2b0e66..d08dd52 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -84,7 +84,8 @@ union cpuid10_edx { #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) -#define set_perf_counter_pending() do { } while (0) +extern void set_perf_counter_pending(void); + #define clear_perf_counter_pending() do { } while (0) #define test_perf_counter_pending() (0) diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index c74e20d..4384158 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -849,6 +849,20 @@ void smp_perf_counter_interrupt(struct pt_regs *regs) irq_exit(); } +void smp_perf_pending_interrupt(struct pt_regs *regs) +{ + irq_enter(); + ack_APIC_irq(); + inc_irq_stat(apic_pending_irqs); + perf_counter_do_pending(); + irq_exit(); +} + +void set_perf_counter_pending(void) +{ + apic->send_IPI_self(LOCAL_PENDING_VECTOR); +} + void perf_counters_lapic_init(int nmi) { u32 apic_val; diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 3f129d9..1d46cba 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1028,6 +1028,8 @@ apicinterrupt SPURIOUS_APIC_VECTOR \ #ifdef CONFIG_PERF_COUNTERS apicinterrupt LOCAL_PERF_VECTOR \ perf_counter_interrupt smp_perf_counter_interrupt +apicinterrupt LOCAL_PENDING_VECTOR \ + perf_pending_interrupt smp_perf_pending_interrupt #endif /* diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 9c27543..d465487 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -67,6 +67,10 @@ static int show_other_interrupts(struct seq_file *p, int prec) for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); seq_printf(p, " Performance counter interrupts\n"); + seq_printf(p, "PND: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs); + seq_printf(p, " Performance pending work\n"); #endif if (generic_interrupt_extension) { seq_printf(p, "PLT: "); @@ -171,6 +175,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu) sum += irq_stats(cpu)->apic_timer_irqs; sum += irq_stats(cpu)->irq_spurious_count; sum += irq_stats(cpu)->apic_perf_irqs; + sum += irq_stats(cpu)->apic_pending_irqs; #endif if (generic_interrupt_extension) sum += irq_stats(cpu)->generic_irqs; diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 925d87c..3190a6b 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -166,6 +166,7 @@ static void __init apic_intr_init(void) alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); # ifdef CONFIG_PERF_COUNTERS alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt); + alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt); # endif # ifdef CONFIG_X86_MCE_P4THERMAL diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index 665e2ab..53ceb26 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c @@ -156,6 +156,7 @@ static void __init apic_intr_init(void) /* Performance monitoring interrupt: */ #ifdef CONFIG_PERF_COUNTERS alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt); + alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt); #endif } -- cgit v0.10.2 From f6c7d5fe58b4846ee0cb4b98b6042489705eced4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:45:04 +0200 Subject: perf_counter: theres more to overflow than writing events Prepare for more generic overflow handling. The new perf_counter_overflow() method will handle the generic bits of the counter overflow, and can return a !0 return value, in which case the counter should be (soft) disabled, so that it won't count until it's properly disabled. XXX: do powerpc and swcounter Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094517.812109629@chello.nl> Signed-off-by: Ingo Molnar diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 0a4d14f..f88c35d 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -732,7 +732,7 @@ static void record_and_restart(struct perf_counter *counter, long val, * Finally record data if requested. */ if (record) - perf_counter_output(counter, 1, regs); + perf_counter_overflow(counter, 1, regs); } /* diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 4384158..1116a41 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -800,7 +800,8 @@ again: continue; perf_save_and_restart(counter); - perf_counter_output(counter, nmi, regs); + if (perf_counter_overflow(counter, nmi, regs)) + __pmc_generic_disable(counter, &counter->hw, bit); } hw_perf_ack_status(ack); diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 977fb15..ca2d4df 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -491,8 +491,8 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader, struct perf_counter_context *ctx, int cpu); extern void perf_counter_update_userpage(struct perf_counter *counter); -extern void perf_counter_output(struct perf_counter *counter, - int nmi, struct pt_regs *regs); +extern int perf_counter_overflow(struct perf_counter *counter, + int nmi, struct pt_regs *regs); /* * Return 1 for a software counter, 0 for a hardware counter */ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 0a2ade2..195e976 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1800,8 +1800,8 @@ static void perf_output_end(struct perf_output_handle *handle) rcu_read_unlock(); } -void perf_counter_output(struct perf_counter *counter, - int nmi, struct pt_regs *regs) +static void perf_counter_output(struct perf_counter *counter, + int nmi, struct pt_regs *regs) { int ret; u64 record_type = counter->hw_event.record_type; @@ -2034,6 +2034,17 @@ void perf_counter_munmap(unsigned long addr, unsigned long len, } /* + * Generic counter overflow handling. + */ + +int perf_counter_overflow(struct perf_counter *counter, + int nmi, struct pt_regs *regs) +{ + perf_counter_output(counter, nmi, regs); + return 0; +} + +/* * Generic software counter infrastructure */ @@ -2077,6 +2088,7 @@ static void perf_swcounter_set_period(struct perf_counter *counter) static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) { + enum hrtimer_restart ret = HRTIMER_RESTART; struct perf_counter *counter; struct pt_regs *regs; @@ -2092,12 +2104,14 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) !counter->hw_event.exclude_user) regs = task_pt_regs(current); - if (regs) - perf_counter_output(counter, 0, regs); + if (regs) { + if (perf_counter_overflow(counter, 0, regs)) + ret = HRTIMER_NORESTART; + } hrtimer_forward_now(hrtimer, ns_to_ktime(counter->hw.irq_period)); - return HRTIMER_RESTART; + return ret; } static void perf_swcounter_overflow(struct perf_counter *counter, @@ -2105,7 +2119,10 @@ static void perf_swcounter_overflow(struct perf_counter *counter, { perf_swcounter_update(counter); perf_swcounter_set_period(counter); - perf_counter_output(counter, nmi, regs); + if (perf_counter_overflow(counter, nmi, regs)) + /* soft-disable the counter */ + ; + } static int perf_swcounter_match(struct perf_counter *counter, -- cgit v0.10.2 From ebb3c4c4cb81d64cc041356915ec015e2c57092a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:45:05 +0200 Subject: perf_counter: fix the mlock accounting Reading through the code I saw I forgot the finish the mlock accounting. Do so now. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094517.899767331@chello.nl> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 195e976..c841563 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1461,13 +1461,14 @@ static void perf_mmap_close(struct vm_area_struct *vma) if (atomic_dec_and_mutex_lock(&counter->mmap_count, &counter->mmap_mutex)) { + vma->vm_mm->locked_vm -= counter->data->nr_pages + 1; perf_mmap_data_free(counter); mutex_unlock(&counter->mmap_mutex); } } static struct vm_operations_struct perf_mmap_vmops = { - .open = perf_mmap_open, + .open = perf_mmap_open, .close = perf_mmap_close, .fault = perf_mmap_fault, }; @@ -1499,24 +1500,32 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) if (vma->vm_pgoff != 0) return -EINVAL; - locked = vma_size >> PAGE_SHIFT; - locked += vma->vm_mm->locked_vm; + mutex_lock(&counter->mmap_mutex); + if (atomic_inc_not_zero(&counter->mmap_count)) { + if (nr_pages != counter->data->nr_pages) + ret = -EINVAL; + goto unlock; + } + + locked = vma->vm_mm->locked_vm; + locked += nr_pages + 1; lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; lock_limit >>= PAGE_SHIFT; - if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) - return -EPERM; - - mutex_lock(&counter->mmap_mutex); - if (atomic_inc_not_zero(&counter->mmap_count)) - goto out; + if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { + ret = -EPERM; + goto unlock; + } WARN_ON(counter->data); ret = perf_mmap_data_alloc(counter, nr_pages); - if (!ret) - atomic_set(&counter->mmap_count, 1); -out: + if (ret) + goto unlock; + + atomic_set(&counter->mmap_count, 1); + vma->vm_mm->locked_vm += nr_pages + 1; +unlock: mutex_unlock(&counter->mmap_mutex); vma->vm_flags &= ~VM_MAYWRITE; -- cgit v0.10.2 From 339f7c90b8a2f3aa2dd4267e79f797999e8a3c59 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:45:06 +0200 Subject: perf_counter: PERF_RECORD_TIME By popular request, provide means to log a timestamp along with the counter overflow event. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094518.024173282@chello.nl> Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index ca2d4df..928a7fa 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -102,6 +102,7 @@ enum perf_counter_record_format { PERF_RECORD_TID = 1U << 1, PERF_RECORD_GROUP = 1U << 2, PERF_RECORD_CALLCHAIN = 1U << 3, + PERF_RECORD_TIME = 1U << 4, }; /* @@ -221,6 +222,7 @@ enum perf_event_type { __PERF_EVENT_TID = PERF_RECORD_TID, __PERF_EVENT_GROUP = PERF_RECORD_GROUP, __PERF_EVENT_CALLCHAIN = PERF_RECORD_CALLCHAIN, + __PERF_EVENT_TIME = PERF_RECORD_TIME, }; #ifdef __KERNEL__ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index c841563..19990d1 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1826,6 +1826,7 @@ static void perf_counter_output(struct perf_counter *counter, } group_entry; struct perf_callchain_entry *callchain = NULL; int callchain_size = 0; + u64 time; header.type = PERF_EVENT_COUNTER_OVERFLOW; header.size = sizeof(header); @@ -1862,6 +1863,16 @@ static void perf_counter_output(struct perf_counter *counter, } } + if (record_type & PERF_RECORD_TIME) { + /* + * Maybe do better on x86 and provide cpu_clock_nmi() + */ + time = sched_clock(); + + header.type |= __PERF_EVENT_TIME; + header.size += sizeof(u64); + } + ret = perf_output_begin(&handle, counter, header.size, nmi); if (ret) return; @@ -1895,6 +1906,9 @@ static void perf_counter_output(struct perf_counter *counter, if (callchain) perf_output_copy(&handle, callchain, callchain_size); + if (record_type & PERF_RECORD_TIME) + perf_output_put(&handle, time); + perf_output_end(&handle); } -- cgit v0.10.2 From 79f146415623fe74f39af67c0f6adc208939a410 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:45:07 +0200 Subject: perf_counter: counter overflow limit Provide means to auto-disable the counter after 'n' overflow events. Create the counter with hw_event.disabled = 1, and then issue an ioctl(fd, PREF_COUNTER_IOC_REFRESH, n); to set the limit and enable the counter. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094518.083139737@chello.nl> Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 928a7fa..ef4dcbf 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -155,8 +155,9 @@ struct perf_counter_hw_event { /* * Ioctls that can be done on a perf counter fd: */ -#define PERF_COUNTER_IOC_ENABLE _IO('$', 0) -#define PERF_COUNTER_IOC_DISABLE _IO('$', 1) +#define PERF_COUNTER_IOC_ENABLE _IO ('$', 0) +#define PERF_COUNTER_IOC_DISABLE _IO ('$', 1) +#define PERF_COUNTER_IOC_REFRESH _IOW('$', 2, u32) /* * Structure of the page that can be mapped via mmap @@ -403,9 +404,14 @@ struct perf_counter { /* poll related */ wait_queue_head_t waitq; struct fasync_struct *fasync; - /* optional: for NMIs */ + + /* delayed work for NMIs and such */ + int pending_wakeup; + int pending_disable; struct perf_pending_entry pending; + atomic_t event_limit; + void (*destroy)(struct perf_counter *); struct rcu_head rcu_head; #endif diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 19990d1..c05e103 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -744,6 +744,12 @@ static void perf_counter_enable(struct perf_counter *counter) spin_unlock_irq(&ctx->lock); } +static void perf_counter_refresh(struct perf_counter *counter, int refresh) +{ + atomic_add(refresh, &counter->event_limit); + perf_counter_enable(counter); +} + /* * Enable a counter and all its children. */ @@ -1311,6 +1317,9 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case PERF_COUNTER_IOC_DISABLE: perf_counter_disable_family(counter); break; + case PERF_COUNTER_IOC_REFRESH: + perf_counter_refresh(counter, arg); + break; default: err = -ENOTTY; } @@ -1590,14 +1599,6 @@ void perf_counter_wakeup(struct perf_counter *counter) kill_fasync(&counter->fasync, SIGIO, POLL_IN); } -static void perf_pending_wakeup(struct perf_pending_entry *entry) -{ - struct perf_counter *counter = container_of(entry, - struct perf_counter, pending); - - perf_counter_wakeup(counter); -} - /* * Pending wakeups * @@ -1607,6 +1608,22 @@ static void perf_pending_wakeup(struct perf_pending_entry *entry) * single linked list and use cmpxchg() to add entries lockless. */ +static void perf_pending_counter(struct perf_pending_entry *entry) +{ + struct perf_counter *counter = container_of(entry, + struct perf_counter, pending); + + if (counter->pending_disable) { + counter->pending_disable = 0; + perf_counter_disable(counter); + } + + if (counter->pending_wakeup) { + counter->pending_wakeup = 0; + perf_counter_wakeup(counter); + } +} + #define PENDING_TAIL ((struct perf_pending_entry *)-1UL) static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = { @@ -1715,8 +1732,9 @@ struct perf_output_handle { static inline void __perf_output_wakeup(struct perf_output_handle *handle) { if (handle->nmi) { + handle->counter->pending_wakeup = 1; perf_pending_queue(&handle->counter->pending, - perf_pending_wakeup); + perf_pending_counter); } else perf_counter_wakeup(handle->counter); } @@ -2063,8 +2081,21 @@ void perf_counter_munmap(unsigned long addr, unsigned long len, int perf_counter_overflow(struct perf_counter *counter, int nmi, struct pt_regs *regs) { + int events = atomic_read(&counter->event_limit); + int ret = 0; + + if (events && atomic_dec_and_test(&counter->event_limit)) { + ret = 1; + if (nmi) { + counter->pending_disable = 1; + perf_pending_queue(&counter->pending, + perf_pending_counter); + } else + perf_counter_disable(counter); + } + perf_counter_output(counter, nmi, regs); - return 0; + return ret; } /* -- cgit v0.10.2 From 0c593b3411341e3a05a61f5527df36ab02bd11e8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:45:08 +0200 Subject: perf_counter: comment the perf_event_type stuff Describe the event format. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094518.211174347@chello.nl> Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index ef4dcbf..81220188 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -208,6 +208,20 @@ struct perf_event_header { enum perf_event_type { + /* + * The MMAP events record the PROT_EXEC mappings so that we can + * correlate userspace IPs to code. They have the following structure: + * + * struct { + * struct perf_event_header header; + * + * u32 pid, tid; + * u64 addr; + * u64 len; + * u64 pgoff; + * char filename[]; + * }; + */ PERF_EVENT_MMAP = 1, PERF_EVENT_MUNMAP = 2, @@ -217,6 +231,24 @@ enum perf_event_type { * * These events will have types of the form: * PERF_EVENT_COUNTER_OVERFLOW { | __PERF_EVENT_* } * + * + * struct { + * struct perf_event_header header; + * + * { u64 ip; } && __PERF_EVENT_IP + * { u32 pid, tid; } && __PERF_EVENT_TID + * + * { u64 nr; + * { u64 event, val; } cnt[nr]; } && __PERF_EVENT_GROUP + * + * { u16 nr, + * hv, + * kernel, + * user; + * u64 ips[nr]; } && __PERF_EVENT_CALLCHAIN + * + * { u64 time; } && __PERF_EVENT_TIME + * }; */ PERF_EVENT_COUNTER_OVERFLOW = 1UL << 31, __PERF_EVENT_IP = PERF_RECORD_IP, -- cgit v0.10.2 From 4c9e25428ff46b968a30f1dfafdba550cb6e4141 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:45:09 +0200 Subject: perf_counter: change event definition Currently the definition of an event is slightly ambiguous. We have wakeup events, for poll() and SIGIO, which are either generated when a record crosses a page boundary (hw_events.wakeup_events == 0), or every wakeup_events new records. Now a record can be either a counter overflow record, or a number of different things, like the mmap PROT_EXEC region notifications. Then there is the PERF_COUNTER_IOC_REFRESH event limit, which only considers counter overflows. This patch changes then wakeup_events and SIGIO notification to only consider overflow events. Furthermore it changes the SIGIO notification to report SIGHUP when the event limit is reached and the counter will be disabled. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094518.266679874@chello.nl> Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 81220188..0f5a400 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -439,6 +439,7 @@ struct perf_counter { /* delayed work for NMIs and such */ int pending_wakeup; + int pending_kill; int pending_disable; struct perf_pending_entry pending; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index c05e103..8c8eaf0 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1596,7 +1596,11 @@ void perf_counter_wakeup(struct perf_counter *counter) rcu_read_unlock(); wake_up_all(&counter->waitq); - kill_fasync(&counter->fasync, SIGIO, POLL_IN); + + if (counter->pending_kill) { + kill_fasync(&counter->fasync, SIGIO, counter->pending_kill); + counter->pending_kill = 0; + } } /* @@ -1727,6 +1731,7 @@ struct perf_output_handle { unsigned int head; int wakeup; int nmi; + int overflow; }; static inline void __perf_output_wakeup(struct perf_output_handle *handle) @@ -1741,7 +1746,7 @@ static inline void __perf_output_wakeup(struct perf_output_handle *handle) static int perf_output_begin(struct perf_output_handle *handle, struct perf_counter *counter, unsigned int size, - int nmi) + int nmi, int overflow) { struct perf_mmap_data *data; unsigned int offset, head; @@ -1751,8 +1756,9 @@ static int perf_output_begin(struct perf_output_handle *handle, if (!data) goto out; - handle->counter = counter; - handle->nmi = nmi; + handle->counter = counter; + handle->nmi = nmi; + handle->overflow = overflow; if (!data->nr_pages) goto fail; @@ -1816,7 +1822,7 @@ static void perf_output_end(struct perf_output_handle *handle) { int wakeup_events = handle->counter->hw_event.wakeup_events; - if (wakeup_events) { + if (handle->overflow && wakeup_events) { int events = atomic_inc_return(&handle->data->events); if (events >= wakeup_events) { atomic_sub(wakeup_events, &handle->data->events); @@ -1891,7 +1897,7 @@ static void perf_counter_output(struct perf_counter *counter, header.size += sizeof(u64); } - ret = perf_output_begin(&handle, counter, header.size, nmi); + ret = perf_output_begin(&handle, counter, header.size, nmi, 1); if (ret) return; @@ -1955,7 +1961,7 @@ static void perf_counter_mmap_output(struct perf_counter *counter, { struct perf_output_handle handle; int size = mmap_event->event.header.size; - int ret = perf_output_begin(&handle, counter, size, 0); + int ret = perf_output_begin(&handle, counter, size, 0, 0); if (ret) return; @@ -2084,8 +2090,10 @@ int perf_counter_overflow(struct perf_counter *counter, int events = atomic_read(&counter->event_limit); int ret = 0; + counter->pending_kill = POLL_IN; if (events && atomic_dec_and_test(&counter->event_limit)) { ret = 1; + counter->pending_kill = POLL_HUP; if (nmi) { counter->pending_disable = 1; perf_pending_queue(&counter->pending, -- cgit v0.10.2 From 4af4998b8aa35600f4c4a4f3c3a23baca6081d02 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:45:10 +0200 Subject: perf_counter: rework context time Since perf_counter_context is switched along with tasks, we can maintain the context time without using the task runtime clock. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094518.353552838@chello.nl> Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 0f5a400..7f5d353 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -477,14 +477,10 @@ struct perf_counter_context { struct task_struct *task; /* - * time_now is the current time in nanoseconds since an arbitrary - * point in the past. For per-task counters, this is based on the - * task clock, and for per-cpu counters it is based on the cpu clock. - * time_lost is an offset from the task/cpu clock, used to make it - * appear that time only passes while the context is scheduled in. + * Context clock, runs when context enabled. */ - u64 time_now; - u64 time_lost; + u64 time; + u64 timestamp; #endif }; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 8c8eaf0..84d85ab 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -117,7 +117,7 @@ counter_sched_out(struct perf_counter *counter, return; counter->state = PERF_COUNTER_STATE_INACTIVE; - counter->tstamp_stopped = ctx->time_now; + counter->tstamp_stopped = ctx->time; counter->hw_ops->disable(counter); counter->oncpu = -1; @@ -253,27 +253,20 @@ retry: spin_unlock_irq(&ctx->lock); } -/* - * Get the current time for this context. - * If this is a task context, we use the task's task clock, - * or for a per-cpu context, we use the cpu clock. - */ -static u64 get_context_time(struct perf_counter_context *ctx, int update) +static inline u64 perf_clock(void) { - struct task_struct *curr = ctx->task; - - if (!curr) - return cpu_clock(smp_processor_id()); - - return __task_delta_exec(curr, update) + curr->se.sum_exec_runtime; + return cpu_clock(smp_processor_id()); } /* * Update the record of the current time in a context. */ -static void update_context_time(struct perf_counter_context *ctx, int update) +static void update_context_time(struct perf_counter_context *ctx) { - ctx->time_now = get_context_time(ctx, update) - ctx->time_lost; + u64 now = perf_clock(); + + ctx->time += now - ctx->timestamp; + ctx->timestamp = now; } /* @@ -284,15 +277,17 @@ static void update_counter_times(struct perf_counter *counter) struct perf_counter_context *ctx = counter->ctx; u64 run_end; - if (counter->state >= PERF_COUNTER_STATE_INACTIVE) { - counter->total_time_enabled = ctx->time_now - - counter->tstamp_enabled; - if (counter->state == PERF_COUNTER_STATE_INACTIVE) - run_end = counter->tstamp_stopped; - else - run_end = ctx->time_now; - counter->total_time_running = run_end - counter->tstamp_running; - } + if (counter->state < PERF_COUNTER_STATE_INACTIVE) + return; + + counter->total_time_enabled = ctx->time - counter->tstamp_enabled; + + if (counter->state == PERF_COUNTER_STATE_INACTIVE) + run_end = counter->tstamp_stopped; + else + run_end = ctx->time; + + counter->total_time_running = run_end - counter->tstamp_running; } /* @@ -332,7 +327,7 @@ static void __perf_counter_disable(void *info) * If it is in error state, leave it in error state. */ if (counter->state >= PERF_COUNTER_STATE_INACTIVE) { - update_context_time(ctx, 1); + update_context_time(ctx); update_counter_times(counter); if (counter == counter->group_leader) group_sched_out(counter, cpuctx, ctx); @@ -426,7 +421,7 @@ counter_sched_in(struct perf_counter *counter, return -EAGAIN; } - counter->tstamp_running += ctx->time_now - counter->tstamp_stopped; + counter->tstamp_running += ctx->time - counter->tstamp_stopped; if (!is_software_counter(counter)) cpuctx->active_oncpu++; @@ -493,9 +488,9 @@ static void add_counter_to_ctx(struct perf_counter *counter, list_add_counter(counter, ctx); ctx->nr_counters++; counter->prev_state = PERF_COUNTER_STATE_OFF; - counter->tstamp_enabled = ctx->time_now; - counter->tstamp_running = ctx->time_now; - counter->tstamp_stopped = ctx->time_now; + counter->tstamp_enabled = ctx->time; + counter->tstamp_running = ctx->time; + counter->tstamp_stopped = ctx->time; } /* @@ -522,7 +517,7 @@ static void __perf_install_in_context(void *info) curr_rq_lock_irq_save(&flags); spin_lock(&ctx->lock); - update_context_time(ctx, 1); + update_context_time(ctx); /* * Protect the list operation against NMI by disabling the @@ -648,13 +643,13 @@ static void __perf_counter_enable(void *info) curr_rq_lock_irq_save(&flags); spin_lock(&ctx->lock); - update_context_time(ctx, 1); + update_context_time(ctx); counter->prev_state = counter->state; if (counter->state >= PERF_COUNTER_STATE_INACTIVE) goto unlock; counter->state = PERF_COUNTER_STATE_INACTIVE; - counter->tstamp_enabled = ctx->time_now - counter->total_time_enabled; + counter->tstamp_enabled = ctx->time - counter->total_time_enabled; /* * If the counter is in a group and isn't the group leader, @@ -737,8 +732,8 @@ static void perf_counter_enable(struct perf_counter *counter) */ if (counter->state == PERF_COUNTER_STATE_OFF) { counter->state = PERF_COUNTER_STATE_INACTIVE; - counter->tstamp_enabled = ctx->time_now - - counter->total_time_enabled; + counter->tstamp_enabled = + ctx->time - counter->total_time_enabled; } out: spin_unlock_irq(&ctx->lock); @@ -778,7 +773,7 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx, ctx->is_active = 0; if (likely(!ctx->nr_counters)) goto out; - update_context_time(ctx, 0); + update_context_time(ctx); flags = hw_perf_save_disable(); if (ctx->nr_active) { @@ -883,12 +878,7 @@ __perf_counter_sched_in(struct perf_counter_context *ctx, if (likely(!ctx->nr_counters)) goto out; - /* - * Add any time since the last sched_out to the lost time - * so it doesn't get included in the total_time_enabled and - * total_time_running measures for counters in the context. - */ - ctx->time_lost = get_context_time(ctx, 0) - ctx->time_now; + ctx->timestamp = perf_clock(); flags = hw_perf_save_disable(); @@ -1043,8 +1033,8 @@ int perf_counter_task_enable(void) if (counter->state > PERF_COUNTER_STATE_OFF) continue; counter->state = PERF_COUNTER_STATE_INACTIVE; - counter->tstamp_enabled = ctx->time_now - - counter->total_time_enabled; + counter->tstamp_enabled = + ctx->time - counter->total_time_enabled; counter->hw_event.disabled = 0; } hw_perf_restore(perf_flags); @@ -1113,7 +1103,7 @@ static void __read(void *info) curr_rq_lock_irq_save(&flags); if (ctx->is_active) - update_context_time(ctx, 1); + update_context_time(ctx); counter->hw_ops->read(counter); update_counter_times(counter); curr_rq_unlock_irq_restore(&flags); -- cgit v0.10.2 From a39d6f2556c4a19f58f538c6aa28bf8faca4fcb8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:45:11 +0200 Subject: perf_counter: rework the task clock software counter Rework the task clock software counter to use the context time instead of the task runtime clock, this removes the last such user. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094518.445450972@chello.nl> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 84d85ab..56b7eb5 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -974,9 +974,6 @@ int perf_counter_task_disable(void) curr_rq_lock_irq_save(&flags); cpu = smp_processor_id(); - /* force the update of the task clock: */ - __task_delta_exec(curr, 1); - perf_counter_task_sched_out(curr, cpu); spin_lock(&ctx->lock); @@ -1017,9 +1014,6 @@ int perf_counter_task_enable(void) curr_rq_lock_irq_save(&flags); cpu = smp_processor_id(); - /* force the update of the task clock: */ - __task_delta_exec(curr, 1); - perf_counter_task_sched_out(curr, cpu); spin_lock(&ctx->lock); @@ -2347,38 +2341,28 @@ static const struct hw_perf_counter_ops perf_ops_cpu_clock = { * Software counter: task time clock */ -/* - * Called from within the scheduler: - */ -static u64 task_clock_perf_counter_val(struct perf_counter *counter, int update) -{ - struct task_struct *curr = counter->task; - u64 delta; - - delta = __task_delta_exec(curr, update); - - return curr->se.sum_exec_runtime + delta; -} - -static void task_clock_perf_counter_update(struct perf_counter *counter, u64 now) +static void task_clock_perf_counter_update(struct perf_counter *counter) { - u64 prev; + u64 prev, now; s64 delta; - prev = atomic64_read(&counter->hw.prev_count); - - atomic64_set(&counter->hw.prev_count, now); + update_context_time(counter->ctx); + now = counter->ctx->time; + prev = atomic64_xchg(&counter->hw.prev_count, now); delta = now - prev; - atomic64_add(delta, &counter->count); } static int task_clock_perf_counter_enable(struct perf_counter *counter) { struct hw_perf_counter *hwc = &counter->hw; + u64 now; + + update_context_time(counter->ctx); + now = counter->ctx->time; - atomic64_set(&hwc->prev_count, task_clock_perf_counter_val(counter, 0)); + atomic64_set(&hwc->prev_count, now); hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hwc->hrtimer.function = perf_swcounter_hrtimer; if (hwc->irq_period) { @@ -2393,14 +2377,12 @@ static int task_clock_perf_counter_enable(struct perf_counter *counter) static void task_clock_perf_counter_disable(struct perf_counter *counter) { hrtimer_cancel(&counter->hw.hrtimer); - task_clock_perf_counter_update(counter, - task_clock_perf_counter_val(counter, 0)); + task_clock_perf_counter_update(counter); } static void task_clock_perf_counter_read(struct perf_counter *counter) { - task_clock_perf_counter_update(counter, - task_clock_perf_counter_val(counter, 1)); + task_clock_perf_counter_update(counter); } static const struct hw_perf_counter_ops perf_ops_task_clock = { -- cgit v0.10.2 From 849691a6cd40270ff5f4a8846d5f6bf8df663ffc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:45:12 +0200 Subject: perf_counter: remove rq->lock usage Now that all the task runtime clock users are gone, remove the ugly rq->lock usage from perf counters, which solves the nasty deadlock seen when a software task clock counter was read from an NMI overflow context. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094518.531137582@chello.nl> Signed-off-by: Ingo Molnar diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index b6d2887..080d1fd 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -85,8 +85,6 @@ static inline unsigned int kstat_irqs(unsigned int irq) /* * Lock/unlock the current runqueue - to extract task statistics: */ -extern void curr_rq_lock_irq_save(unsigned long *flags); -extern void curr_rq_unlock_irq_restore(unsigned long *flags); extern unsigned long long __task_delta_exec(struct task_struct *tsk, int update); extern unsigned long long task_delta_exec(struct task_struct *); diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 56b7eb5..f4f7596 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -172,8 +172,7 @@ static void __perf_counter_remove_from_context(void *info) if (ctx->task && cpuctx->task_ctx != ctx) return; - curr_rq_lock_irq_save(&flags); - spin_lock(&ctx->lock); + spin_lock_irqsave(&ctx->lock, flags); counter_sched_out(counter, cpuctx, ctx); @@ -198,8 +197,7 @@ static void __perf_counter_remove_from_context(void *info) perf_max_counters - perf_reserved_percpu); } - spin_unlock(&ctx->lock); - curr_rq_unlock_irq_restore(&flags); + spin_unlock_irqrestore(&ctx->lock, flags); } @@ -319,8 +317,7 @@ static void __perf_counter_disable(void *info) if (ctx->task && cpuctx->task_ctx != ctx) return; - curr_rq_lock_irq_save(&flags); - spin_lock(&ctx->lock); + spin_lock_irqsave(&ctx->lock, flags); /* * If the counter is on, turn it off. @@ -336,8 +333,7 @@ static void __perf_counter_disable(void *info) counter->state = PERF_COUNTER_STATE_OFF; } - spin_unlock(&ctx->lock); - curr_rq_unlock_irq_restore(&flags); + spin_unlock_irqrestore(&ctx->lock, flags); } /* @@ -515,8 +511,7 @@ static void __perf_install_in_context(void *info) if (ctx->task && cpuctx->task_ctx != ctx) return; - curr_rq_lock_irq_save(&flags); - spin_lock(&ctx->lock); + spin_lock_irqsave(&ctx->lock, flags); update_context_time(ctx); /* @@ -565,8 +560,7 @@ static void __perf_install_in_context(void *info) unlock: hw_perf_restore(perf_flags); - spin_unlock(&ctx->lock); - curr_rq_unlock_irq_restore(&flags); + spin_unlock_irqrestore(&ctx->lock, flags); } /* @@ -641,8 +635,7 @@ static void __perf_counter_enable(void *info) if (ctx->task && cpuctx->task_ctx != ctx) return; - curr_rq_lock_irq_save(&flags); - spin_lock(&ctx->lock); + spin_lock_irqsave(&ctx->lock, flags); update_context_time(ctx); counter->prev_state = counter->state; @@ -678,8 +671,7 @@ static void __perf_counter_enable(void *info) } unlock: - spin_unlock(&ctx->lock); - curr_rq_unlock_irq_restore(&flags); + spin_unlock_irqrestore(&ctx->lock, flags); } /* @@ -971,7 +963,7 @@ int perf_counter_task_disable(void) if (likely(!ctx->nr_counters)) return 0; - curr_rq_lock_irq_save(&flags); + local_irq_save(flags); cpu = smp_processor_id(); perf_counter_task_sched_out(curr, cpu); @@ -992,9 +984,7 @@ int perf_counter_task_disable(void) hw_perf_restore(perf_flags); - spin_unlock(&ctx->lock); - - curr_rq_unlock_irq_restore(&flags); + spin_unlock_irqrestore(&ctx->lock, flags); return 0; } @@ -1011,7 +1001,7 @@ int perf_counter_task_enable(void) if (likely(!ctx->nr_counters)) return 0; - curr_rq_lock_irq_save(&flags); + local_irq_save(flags); cpu = smp_processor_id(); perf_counter_task_sched_out(curr, cpu); @@ -1037,7 +1027,7 @@ int perf_counter_task_enable(void) perf_counter_task_sched_in(curr, cpu); - curr_rq_unlock_irq_restore(&flags); + local_irq_restore(flags); return 0; } @@ -1095,12 +1085,12 @@ static void __read(void *info) struct perf_counter_context *ctx = counter->ctx; unsigned long flags; - curr_rq_lock_irq_save(&flags); + local_irq_save(flags); if (ctx->is_active) update_context_time(ctx); counter->hw_ops->read(counter); update_counter_times(counter); - curr_rq_unlock_irq_restore(&flags); + local_irq_restore(flags); } static u64 perf_counter_read(struct perf_counter *counter) @@ -2890,7 +2880,7 @@ __perf_counter_exit_task(struct task_struct *child, * Be careful about zapping the list - IRQ/NMI context * could still be processing it: */ - curr_rq_lock_irq_save(&flags); + local_irq_save(flags); perf_flags = hw_perf_save_disable(); cpuctx = &__get_cpu_var(perf_cpu_context); @@ -2903,7 +2893,7 @@ __perf_counter_exit_task(struct task_struct *child, child_ctx->nr_counters--; hw_perf_restore(perf_flags); - curr_rq_unlock_irq_restore(&flags); + local_irq_restore(flags); } parent_counter = child_counter->parent; diff --git a/kernel/sched.c b/kernel/sched.c index f76e3c0..0de2f81 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -997,26 +997,6 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) } } -void curr_rq_lock_irq_save(unsigned long *flags) - __acquires(rq->lock) -{ - struct rq *rq; - - local_irq_save(*flags); - rq = cpu_rq(smp_processor_id()); - spin_lock(&rq->lock); -} - -void curr_rq_unlock_irq_restore(unsigned long *flags) - __releases(rq->lock) -{ - struct rq *rq; - - rq = cpu_rq(smp_processor_id()); - spin_unlock(&rq->lock); - local_irq_restore(*flags); -} - void task_rq_unlock_wait(struct task_struct *p) { struct rq *rq = task_rq(p); -- cgit v0.10.2 From bce379bf358738ab8efc8cda2594a299ac685085 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 6 Apr 2009 11:45:13 +0200 Subject: perf_counter: minimize context time updates Push the update_context_time() calls up the stack so that we get less invokations and thereby a less noisy output: before: # ./perfstat -e 1:0 -e 1:1 -e 1:1 -e 1:1 -l ls > /dev/null Performance counter stats for 'ls': 10.163691 cpu clock ticks (msecs) (scaled from 98.94%) 10.215360 task clock ticks (msecs) (scaled from 98.18%) 10.185549 task clock ticks (msecs) (scaled from 98.53%) 10.183581 task clock ticks (msecs) (scaled from 98.71%) Wall-clock time elapsed: 11.912858 msecs after: # ./perfstat -e 1:0 -e 1:1 -e 1:1 -e 1:1 -l ls > /dev/null Performance counter stats for 'ls': 9.316630 cpu clock ticks (msecs) 9.280789 task clock ticks (msecs) 9.280789 task clock ticks (msecs) 9.280789 task clock ticks (msecs) Wall-clock time elapsed: 9.574872 msecs Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090406094518.618876874@chello.nl> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index f4f7596..863703b 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -319,6 +319,8 @@ static void __perf_counter_disable(void *info) spin_lock_irqsave(&ctx->lock, flags); + update_context_time(ctx); + /* * If the counter is on, turn it off. * If it is in error state, leave it in error state. @@ -797,6 +799,8 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu) if (likely(!cpuctx->task_ctx)) return; + update_context_time(ctx); + regs = task_pt_regs(task); perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs); __perf_counter_sched_out(ctx, cpuctx); @@ -2336,7 +2340,6 @@ static void task_clock_perf_counter_update(struct perf_counter *counter) u64 prev, now; s64 delta; - update_context_time(counter->ctx); now = counter->ctx->time; prev = atomic64_xchg(&counter->hw.prev_count, now); @@ -2349,7 +2352,6 @@ static int task_clock_perf_counter_enable(struct perf_counter *counter) struct hw_perf_counter *hwc = &counter->hw; u64 now; - update_context_time(counter->ctx); now = counter->ctx->time; atomic64_set(&hwc->prev_count, now); @@ -2372,6 +2374,7 @@ static void task_clock_perf_counter_disable(struct perf_counter *counter) static void task_clock_perf_counter_read(struct perf_counter *counter) { + update_context_time(counter->ctx); task_clock_perf_counter_update(counter); } -- cgit v0.10.2 From 6278af660ff83fbafb18e53fc2747eb2ee6780fa Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Thu, 2 Apr 2009 10:40:28 +0200 Subject: perf_counter tools: kerneltop: display per function percentage along with event count ------------------------------------------------------------------------------ KernelTop: 90551 irqs/sec kernel:15.0% [NMI, 100000 CPU cycles], (all, 4 CPUs) ------------------------------------------------------------------------------ events pcnt RIP kernel function ______ ______ _____ ________________ _______________ 16871.00 - 19.1% - ffffffff80328e20 : clear_page_c 8810.00 - 9.9% - ffffffff8048ce80 : page_fault 4746.00 - 5.4% - ffffffff8048cae2 : _spin_lock 4428.00 - 5.0% - ffffffff80328e70 : copy_page_c 3340.00 - 3.8% - ffffffff80329090 : copy_user_generic_string! 2679.00 - 3.0% - ffffffff8028a16b : get_page_from_freelist 2254.00 - 2.5% - ffffffff80296f19 : unmap_vmas 2082.00 - 2.4% - ffffffff80297e19 : handle_mm_fault 1754.00 - 2.0% - ffffffff80288dc8 : __rmqueue_smallest 1553.00 - 1.8% - ffffffff8048ca58 : _spin_lock_irqsave 1400.00 - 1.6% - ffffffff8028cdc8 : release_pages 1337.00 - 1.5% - ffffffff80285400 : find_get_page 1335.00 - 1.5% - ffffffff80225a23 : do_page_fault 1299.00 - 1.5% - ffffffff802ba8e7 : __d_lookup 1174.00 - 1.3% - ffffffff802b38f3 : __link_path_walk 1155.00 - 1.3% - ffffffff802843e1 : perf_swcounter_ctx_event! 1137.00 - 1.3% - ffffffff8028d118 : ____pagevec_lru_add 963.00 - 1.1% - ffffffff802a670b : kmem_cache_alloc 885.00 - 1.0% - ffffffff8024bc61 : __wake_up_bit Display per function percentage along with event count. Signed-off-by: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c index 4f8d791..15f3a5f 100644 --- a/Documentation/perf_counter/kerneltop.c +++ b/Documentation/perf_counter/kerneltop.c @@ -636,16 +636,20 @@ static void print_sym_table(void) int counter; float events_per_sec = events/delay_secs; float kevents_per_sec = (events-userspace_events)/delay_secs; + float sum_kevents = 0.0; events = userspace_events = 0; memcpy(tmp, sym_table, sizeof(sym_table[0])*sym_table_count); qsort(tmp, sym_table_count, sizeof(tmp[0]), compare); + for (i = 0; i < sym_table_count && tmp[i].count[0]; i++) + sum_kevents += tmp[i].count[0]; + write(1, CONSOLE_CLEAR, strlen(CONSOLE_CLEAR)); printf( "------------------------------------------------------------------------------\n"); - printf( " KernelTop:%8.0f irqs/sec kernel:%3.1f%% [%s, ", + printf( " KernelTop:%8.0f irqs/sec kernel:%4.1f%% [%s, ", events_per_sec, 100.0 - (100.0*((events_per_sec-kevents_per_sec)/events_per_sec)), nmi ? "NMI" : "IRQ"); @@ -679,34 +683,31 @@ static void print_sym_table(void) printf("------------------------------------------------------------------------------\n\n"); if (nr_counters == 1) - printf(" events"); + printf(" events pcnt"); else - printf(" weight events"); + printf(" weight events pcnt"); printf(" RIP kernel function\n" - " ______ ______ ________________ _______________\n\n" + " ______ ______ _____ ________________ _______________\n\n" ); - printed = 0; - for (i = 0; i < sym_table_count; i++) { + for (i = 0, printed = 0; i < sym_table_count; i++) { + float pcnt; int count; - if (nr_counters == 1) { - if (printed <= 18 && - tmp[i].count[0] >= count_filter) { - printf("%19.2f - %016llx : %s\n", - sym_weight(tmp + i), tmp[i].addr, tmp[i].sym); - printed++; - } - } else { - if (printed <= 18 && - tmp[i].count[0] >= count_filter) { - printf("%8.1f %10ld - %016llx : %s\n", - sym_weight(tmp + i), - tmp[i].count[0], - tmp[i].addr, tmp[i].sym); - printed++; - } + if (printed <= 18 && tmp[i].count[0] >= count_filter) { + pcnt = 100.0 - (100.0*((sum_kevents-tmp[i].count[0])/sum_kevents)); + + if (nr_counters == 1) + printf("%19.2f - %4.1f%% - %016llx : %s\n", + sym_weight(tmp + i), + pcnt, tmp[i].addr, tmp[i].sym); + else + printf("%8.1f %10ld - %4.1f%% - %016llx : %s\n", + sym_weight(tmp + i), + tmp[i].count[0], + pcnt, tmp[i].addr, tmp[i].sym); + printed++; } /* * Add decay to the counts: -- cgit v0.10.2 From 98c2aaf8be5baf7193be37fb28bce8e7327158bc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 7 Apr 2009 11:30:17 +0200 Subject: x86, perfcounters: add atomic64_xchg() Complete atomic64_t support on the 32-bit side by adding atomic64_xch(). Cc: Peter Zijlstra LKML-Reference: <20090406094518.445450972@chello.nl> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h index 977250e..aff9f1f 100644 --- a/arch/x86/include/asm/atomic_32.h +++ b/arch/x86/include/asm/atomic_32.h @@ -291,19 +291,37 @@ atomic64_cmpxchg(atomic64_t *ptr, unsigned long long old_val, } /** - * atomic64_set - set atomic64 variable + * atomic64_xchg - xchg atomic64 variable * @ptr: pointer to type atomic64_t * @new_val: value to assign + * @old_val: old value that was there * - * Atomically sets the value of @ptr to @new_val. + * Atomically xchgs the value of @ptr to @new_val and returns + * the old value. */ -static inline void atomic64_set(atomic64_t *ptr, unsigned long long new_val) + +static inline unsigned long long +atomic64_xchg(atomic64_t *ptr, unsigned long long new_val) { unsigned long long old_val; do { old_val = atomic_read(ptr); } while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val); + + return old_val; +} + +/** + * atomic64_set - set atomic64 variable + * @ptr: pointer to type atomic64_t + * @new_val: value to assign + * + * Atomically sets the value of @ptr to @new_val. + */ +static inline void atomic64_set(atomic64_t *ptr, unsigned long long new_val) +{ + atomic64_xchg(ptr, new_val); } /** -- cgit v0.10.2 From cac94f979326212831c0ea44ed9ea1622b4f4e93 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 3 Apr 2009 16:43:33 +0200 Subject: x86, bts: fix race when bts tracer is removed When the bts tracer is removed while the traced task is running, the write to clear the bts tracer pointer races with context switch code. Read the tracer once during a context switch. When a new tracer is installed, the bts tracer is set in the ds context before the tracer is initialized in order to claim the context for that tracer. This may result in write accesses using an uninitialized trace configuration when scheduling timestamps have been requested. Store active tracing flags separately and only set active flags after the tracing configuration has been initialized. Reviewed-by: Oleg Nesterov Signed-off-by: Markus Metzger Cc: roland@redhat.com Cc: eranian@googlemail.com Cc: juan.villacis@intel.com Cc: ak@linux.jf.intel.com LKML-Reference: <20090403144548.881338000@intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index b1d6e1f..c730155 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -89,6 +89,9 @@ struct bts_tracer { /* Buffer overflow notification function: */ bts_ovfl_callback_t ovfl; + + /* Active flags affecting trace collection. */ + unsigned int flags; }; struct pebs_tracer { @@ -799,6 +802,8 @@ void ds_suspend_bts(struct bts_tracer *tracer) if (!tracer) return; + tracer->flags = 0; + task = tracer->ds.context->task; if (!task || (task == current)) @@ -820,6 +825,8 @@ void ds_resume_bts(struct bts_tracer *tracer) if (!tracer) return; + tracer->flags = tracer->trace.ds.flags; + task = tracer->ds.context->task; control = ds_cfg.ctl[dsf_bts]; @@ -1037,43 +1044,52 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) } } +static inline void ds_take_timestamp(struct ds_context *context, + enum bts_qualifier qualifier, + struct task_struct *task) +{ + struct bts_tracer *tracer = context->bts_master; + struct bts_struct ts; + + /* Prevent compilers from reading the tracer pointer twice. */ + barrier(); + + if (!tracer || !(tracer->flags & BTS_TIMESTAMPS)) + return; + + memset(&ts, 0, sizeof(ts)); + ts.qualifier = qualifier; + ts.variant.timestamp.jiffies = jiffies_64; + ts.variant.timestamp.pid = task->pid; + + bts_write(tracer, &ts); +} + /* * Change the DS configuration from tracing prev to tracing next. */ void ds_switch_to(struct task_struct *prev, struct task_struct *next) { - struct ds_context *prev_ctx = prev->thread.ds_ctx; - struct ds_context *next_ctx = next->thread.ds_ctx; + struct ds_context *prev_ctx = prev->thread.ds_ctx; + struct ds_context *next_ctx = next->thread.ds_ctx; + unsigned long debugctlmsr = next->thread.debugctlmsr; + + /* Make sure all data is read before we start. */ + barrier(); if (prev_ctx) { update_debugctlmsr(0); - if (prev_ctx->bts_master && - (prev_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) { - struct bts_struct ts = { - .qualifier = bts_task_departs, - .variant.timestamp.jiffies = jiffies_64, - .variant.timestamp.pid = prev->pid - }; - bts_write(prev_ctx->bts_master, &ts); - } + ds_take_timestamp(prev_ctx, bts_task_departs, prev); } if (next_ctx) { - if (next_ctx->bts_master && - (next_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) { - struct bts_struct ts = { - .qualifier = bts_task_arrives, - .variant.timestamp.jiffies = jiffies_64, - .variant.timestamp.pid = next->pid - }; - bts_write(next_ctx->bts_master, &ts); - } + ds_take_timestamp(next_ctx, bts_task_arrives, next); wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds); } - update_debugctlmsr(next->thread.debugctlmsr); + update_debugctlmsr(debugctlmsr); } void ds_copy_thread(struct task_struct *tsk, struct task_struct *father) -- cgit v0.10.2 From a26b89f05d194413c7238e0bea071054f6b5d3c8 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 3 Apr 2009 16:43:34 +0200 Subject: sched, hw-branch-tracer: add wait_task_context_switch() function to sched.h Add a function to wait until some other task has been switched out at least once. This differs from wait_task_inactive() subtly, in that the latter will wait until the task has left the CPU. Signed-off-by: Markus Metzger Cc: markus.t.metzger@gmail.com Cc: roland@redhat.com Cc: eranian@googlemail.com Cc: oleg@redhat.com Cc: juan.villacis@intel.com Cc: ak@linux.jf.intel.com LKML-Reference: <20090403144549.794157000@intel.com> Signed-off-by: Ingo Molnar diff --git a/include/linux/sched.h b/include/linux/sched.h index b94f354..a5b9a83 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1993,8 +1993,10 @@ extern void set_task_comm(struct task_struct *tsk, char *from); extern char *get_task_comm(char *to, struct task_struct *tsk); #ifdef CONFIG_SMP +extern void wait_task_context_switch(struct task_struct *p); extern unsigned long wait_task_inactive(struct task_struct *, long match_state); #else +static inline void wait_task_context_switch(struct task_struct *p) {} static inline unsigned long wait_task_inactive(struct task_struct *p, long match_state) { diff --git a/kernel/sched.c b/kernel/sched.c index 6cc1fd5..f91bc81 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2003,6 +2003,49 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req) } /* + * wait_task_context_switch - wait for a thread to complete at least one + * context switch. + * + * @p must not be current. + */ +void wait_task_context_switch(struct task_struct *p) +{ + unsigned long nvcsw, nivcsw, flags; + int running; + struct rq *rq; + + nvcsw = p->nvcsw; + nivcsw = p->nivcsw; + for (;;) { + /* + * The runqueue is assigned before the actual context + * switch. We need to take the runqueue lock. + * + * We could check initially without the lock but it is + * very likely that we need to take the lock in every + * iteration. + */ + rq = task_rq_lock(p, &flags); + running = task_running(rq, p); + task_rq_unlock(rq, &flags); + + if (likely(!running)) + break; + /* + * The switch count is incremented before the actual + * context switch. We thus wait for two switches to be + * sure at least one completed. + */ + if ((p->nvcsw - nvcsw) > 1) + break; + if ((p->nivcsw - nivcsw) > 1) + break; + + cpu_relax(); + } +} + +/* * wait_task_inactive - wait for a thread to unschedule. * * If @match_state is nonzero, it's the @p->state value just checked and -- cgit v0.10.2 From e2b371f00a6f529f6362654239bdec8dcd510760 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 3 Apr 2009 16:43:35 +0200 Subject: mm, x86, ptrace, bts: defer branch trace stopping When a ptraced task is unlinked, we need to stop branch tracing for that task. Since the unlink is called with interrupts disabled, and we need interrupts enabled to stop branch tracing, we defer the work. Collect all branch tracing related stuff in a branch tracing context. Reviewed-by: Oleg Nesterov Signed-off-by: Markus Metzger Cc: Andrew Morton Cc: Peter Zijlstra Cc: roland@redhat.com Cc: eranian@googlemail.com Cc: juan.villacis@intel.com Cc: ak@linux.jf.intel.com LKML-Reference: <20090403144550.712401000@intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 34c5237..2483807 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -458,10 +458,6 @@ struct thread_struct { /* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */ struct ds_context *ds_ctx; #endif /* CONFIG_X86_DS */ -#ifdef CONFIG_X86_PTRACE_BTS -/* the signal to send on a bts buffer overflow */ - unsigned int bts_ovfl_signal; -#endif /* CONFIG_X86_PTRACE_BTS */ }; static inline unsigned long native_get_debugreg(int regno) diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index fe9345c..7c21d1e 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -577,17 +578,119 @@ static int ioperm_get(struct task_struct *target, } #ifdef CONFIG_X86_PTRACE_BTS +/* + * A branch trace store context. + * + * Contexts may only be installed by ptrace_bts_config() and only for + * ptraced tasks. + * + * Contexts are destroyed when the tracee is detached from the tracer. + * The actual destruction work requires interrupts enabled, so the + * work is deferred and will be scheduled during __ptrace_unlink(). + * + * Contexts hold an additional task_struct reference on the traced + * task, as well as a reference on the tracer's mm. + * + * Ptrace already holds a task_struct for the duration of ptrace operations, + * but since destruction is deferred, it may be executed after both + * tracer and tracee exited. + */ +struct bts_context { + /* The branch trace handle. */ + struct bts_tracer *tracer; + + /* The buffer used to store the branch trace and its size. */ + void *buffer; + unsigned int size; + + /* The mm that paid for the above buffer. */ + struct mm_struct *mm; + + /* The task this context belongs to. */ + struct task_struct *task; + + /* The signal to send on a bts buffer overflow. */ + unsigned int bts_ovfl_signal; + + /* The work struct to destroy a context. */ + struct work_struct work; +}; + +static inline void alloc_bts_buffer(struct bts_context *context, + unsigned int size) +{ + void *buffer; + + buffer = alloc_locked_buffer(size); + if (buffer) { + context->buffer = buffer; + context->size = size; + context->mm = get_task_mm(current); + } +} + +static inline void free_bts_buffer(struct bts_context *context) +{ + if (!context->buffer) + return; + + kfree(context->buffer); + context->buffer = NULL; + + refund_locked_buffer_memory(context->mm, context->size); + context->size = 0; + + mmput(context->mm); + context->mm = NULL; +} + +static void free_bts_context_work(struct work_struct *w) +{ + struct bts_context *context; + + context = container_of(w, struct bts_context, work); + + ds_release_bts(context->tracer); + put_task_struct(context->task); + free_bts_buffer(context); + kfree(context); +} + +static inline void free_bts_context(struct bts_context *context) +{ + INIT_WORK(&context->work, free_bts_context_work); + schedule_work(&context->work); +} + +static inline struct bts_context *alloc_bts_context(struct task_struct *task) +{ + struct bts_context *context = kzalloc(sizeof(*context), GFP_KERNEL); + if (context) { + context->task = task; + task->bts = context; + + get_task_struct(task); + } + + return context; +} + static int ptrace_bts_read_record(struct task_struct *child, size_t index, struct bts_struct __user *out) { + struct bts_context *context; const struct bts_trace *trace; struct bts_struct bts; const unsigned char *at; int error; - trace = ds_read_bts(child->bts); + context = child->bts; + if (!context) + return -ESRCH; + + trace = ds_read_bts(context->tracer); if (!trace) - return -EPERM; + return -ESRCH; at = trace->ds.top - ((index + 1) * trace->ds.size); if ((void *)at < trace->ds.begin) @@ -596,7 +699,7 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index, if (!trace->read) return -EOPNOTSUPP; - error = trace->read(child->bts, at, &bts); + error = trace->read(context->tracer, at, &bts); if (error < 0) return error; @@ -610,13 +713,18 @@ static int ptrace_bts_drain(struct task_struct *child, long size, struct bts_struct __user *out) { + struct bts_context *context; const struct bts_trace *trace; const unsigned char *at; int error, drained = 0; - trace = ds_read_bts(child->bts); + context = child->bts; + if (!context) + return -ESRCH; + + trace = ds_read_bts(context->tracer); if (!trace) - return -EPERM; + return -ESRCH; if (!trace->read) return -EOPNOTSUPP; @@ -627,9 +735,8 @@ static int ptrace_bts_drain(struct task_struct *child, for (at = trace->ds.begin; (void *)at < trace->ds.top; out++, drained++, at += trace->ds.size) { struct bts_struct bts; - int error; - error = trace->read(child->bts, at, &bts); + error = trace->read(context->tracer, at, &bts); if (error < 0) return error; @@ -639,35 +746,18 @@ static int ptrace_bts_drain(struct task_struct *child, memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); - error = ds_reset_bts(child->bts); + error = ds_reset_bts(context->tracer); if (error < 0) return error; return drained; } -static int ptrace_bts_allocate_buffer(struct task_struct *child, size_t size) -{ - child->bts_buffer = alloc_locked_buffer(size); - if (!child->bts_buffer) - return -ENOMEM; - - child->bts_size = size; - - return 0; -} - -static void ptrace_bts_free_buffer(struct task_struct *child) -{ - free_locked_buffer(child->bts_buffer, child->bts_size); - child->bts_buffer = NULL; - child->bts_size = 0; -} - static int ptrace_bts_config(struct task_struct *child, long cfg_size, const struct ptrace_bts_config __user *ucfg) { + struct bts_context *context; struct ptrace_bts_config cfg; unsigned int flags = 0; @@ -677,28 +767,31 @@ static int ptrace_bts_config(struct task_struct *child, if (copy_from_user(&cfg, ucfg, sizeof(cfg))) return -EFAULT; - if (child->bts) { - ds_release_bts(child->bts); - child->bts = NULL; - } + context = child->bts; + if (!context) + context = alloc_bts_context(child); + if (!context) + return -ENOMEM; if (cfg.flags & PTRACE_BTS_O_SIGNAL) { if (!cfg.signal) return -EINVAL; - child->thread.bts_ovfl_signal = cfg.signal; return -EOPNOTSUPP; + context->bts_ovfl_signal = cfg.signal; } - if ((cfg.flags & PTRACE_BTS_O_ALLOC) && - (cfg.size != child->bts_size)) { - int error; + ds_release_bts(context->tracer); + context->tracer = NULL; - ptrace_bts_free_buffer(child); + if ((cfg.flags & PTRACE_BTS_O_ALLOC) && (cfg.size != context->size)) { + free_bts_buffer(context); + if (!cfg.size) + return 0; - error = ptrace_bts_allocate_buffer(child, cfg.size); - if (error < 0) - return error; + alloc_bts_buffer(context, cfg.size); + if (!context->buffer) + return -ENOMEM; } if (cfg.flags & PTRACE_BTS_O_TRACE) @@ -707,15 +800,13 @@ static int ptrace_bts_config(struct task_struct *child, if (cfg.flags & PTRACE_BTS_O_SCHED) flags |= BTS_TIMESTAMPS; - child->bts = ds_request_bts(child, child->bts_buffer, child->bts_size, - /* ovfl = */ NULL, /* th = */ (size_t)-1, - flags); - if (IS_ERR(child->bts)) { - int error = PTR_ERR(child->bts); - - ptrace_bts_free_buffer(child); - child->bts = NULL; + context->tracer = ds_request_bts(child, context->buffer, context->size, + NULL, (size_t)-1, flags); + if (unlikely(IS_ERR(context->tracer))) { + int error = PTR_ERR(context->tracer); + free_bts_buffer(context); + context->tracer = NULL; return error; } @@ -726,20 +817,25 @@ static int ptrace_bts_status(struct task_struct *child, long cfg_size, struct ptrace_bts_config __user *ucfg) { + struct bts_context *context; const struct bts_trace *trace; struct ptrace_bts_config cfg; + context = child->bts; + if (!context) + return -ESRCH; + if (cfg_size < sizeof(cfg)) return -EIO; - trace = ds_read_bts(child->bts); + trace = ds_read_bts(context->tracer); if (!trace) - return -EPERM; + return -ESRCH; memset(&cfg, 0, sizeof(cfg)); - cfg.size = trace->ds.end - trace->ds.begin; - cfg.signal = child->thread.bts_ovfl_signal; - cfg.bts_size = sizeof(struct bts_struct); + cfg.size = trace->ds.end - trace->ds.begin; + cfg.signal = context->bts_ovfl_signal; + cfg.bts_size = sizeof(struct bts_struct); if (cfg.signal) cfg.flags |= PTRACE_BTS_O_SIGNAL; @@ -758,67 +854,56 @@ static int ptrace_bts_status(struct task_struct *child, static int ptrace_bts_clear(struct task_struct *child) { + struct bts_context *context; const struct bts_trace *trace; - trace = ds_read_bts(child->bts); + context = child->bts; + if (!context) + return -ESRCH; + + trace = ds_read_bts(context->tracer); if (!trace) - return -EPERM; + return -ESRCH; memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); - return ds_reset_bts(child->bts); + return ds_reset_bts(context->tracer); } static int ptrace_bts_size(struct task_struct *child) { + struct bts_context *context; const struct bts_trace *trace; - trace = ds_read_bts(child->bts); + context = child->bts; + if (!context) + return -ESRCH; + + trace = ds_read_bts(context->tracer); if (!trace) - return -EPERM; + return -ESRCH; return (trace->ds.top - trace->ds.begin) / trace->ds.size; } -static void ptrace_bts_fork(struct task_struct *tsk) +static inline void ptrace_bts_fork(struct task_struct *tsk) { tsk->bts = NULL; - tsk->bts_buffer = NULL; - tsk->bts_size = 0; - tsk->thread.bts_ovfl_signal = 0; } -static void ptrace_bts_untrace(struct task_struct *child) +/* + * Called from __ptrace_unlink() after the child has been moved back + * to its original parent. + */ +static inline void ptrace_bts_untrace(struct task_struct *child) { if (unlikely(child->bts)) { - ds_release_bts(child->bts); + free_bts_context(child->bts); child->bts = NULL; - - /* We cannot update total_vm and locked_vm since - child's mm is already gone. But we can reclaim the - memory. */ - kfree(child->bts_buffer); - child->bts_buffer = NULL; - child->bts_size = 0; } } - -static void ptrace_bts_detach(struct task_struct *child) -{ - /* - * Ptrace_detach() races with ptrace_untrace() in case - * the child dies and is reaped by another thread. - * - * We only do the memory accounting at this point and - * leave the buffer deallocation and the bts tracer - * release to ptrace_bts_untrace() which will be called - * later on with tasklist_lock held. - */ - release_locked_buffer(child->bts_buffer, child->bts_size); -} #else static inline void ptrace_bts_fork(struct task_struct *tsk) {} -static inline void ptrace_bts_detach(struct task_struct *child) {} static inline void ptrace_bts_untrace(struct task_struct *child) {} #endif /* CONFIG_X86_PTRACE_BTS */ @@ -843,7 +928,6 @@ void ptrace_disable(struct task_struct *child) #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); #endif - ptrace_bts_detach(child); } #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION diff --git a/include/linux/mm.h b/include/linux/mm.h index bff1f0d..64d8ed2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -13,6 +13,7 @@ #include #include #include +#include struct mempolicy; struct anon_vma; @@ -1321,6 +1322,6 @@ void vmemmap_populate_print_last(void); extern void *alloc_locked_buffer(size_t size); extern void free_locked_buffer(void *buffer, size_t size); -extern void release_locked_buffer(void *buffer, size_t size); +extern void refund_locked_buffer_memory(struct mm_struct *mm, size_t size); #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index a5b9a83..52b8cd0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -96,8 +96,8 @@ struct exec_domain; struct futex_pi_state; struct robust_list_head; struct bio; -struct bts_tracer; struct fs_struct; +struct bts_context; /* * List of flags we want to share for kernel threads, @@ -1210,12 +1210,7 @@ struct task_struct { * This is the tracer handle for the ptrace BTS extension. * This field actually belongs to the ptracer task. */ - struct bts_tracer *bts; - /* - * The buffer to hold the BTS data. - */ - void *bts_buffer; - size_t bts_size; + struct bts_context *bts; #endif /* CONFIG_X86_PTRACE_BTS */ /* PID/PID hash table linkage. */ diff --git a/mm/mlock.c b/mm/mlock.c index cbe9e05..749383b 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -660,21 +660,20 @@ void *alloc_locked_buffer(size_t size) return buffer; } -void release_locked_buffer(void *buffer, size_t size) +void refund_locked_buffer_memory(struct mm_struct *mm, size_t size) { unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; - down_write(¤t->mm->mmap_sem); + down_write(&mm->mmap_sem); - current->mm->total_vm -= pgsz; - current->mm->locked_vm -= pgsz; + mm->total_vm -= pgsz; + mm->locked_vm -= pgsz; - up_write(¤t->mm->mmap_sem); + up_write(&mm->mmap_sem); } void free_locked_buffer(void *buffer, size_t size) { - release_locked_buffer(buffer, size); - + refund_locked_buffer_memory(current->mm, size); kfree(buffer); } -- cgit v0.10.2 From 8d99b3ac2726e5edd97ad147fa5c1f2acb63a745 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 3 Apr 2009 16:43:36 +0200 Subject: x86, bts: wait until traced task has been scheduled out In order to stop branch tracing for a running task, we need to first clear the branch tracing control bits before we may free the tracing buffer. If the traced task is running, the cpu might still trace that task after the branch trace control bits have cleared. Wait until the traced task has been scheduled out before proceeding. A similar problem affects the task debug store context. We first remove the context, then we need to wait until the task has been scheduled out before we can free the context memory. Reviewed-by: Oleg Nesterov Signed-off-by: Markus Metzger Cc: roland@redhat.com Cc: eranian@googlemail.com Cc: juan.villacis@intel.com Cc: ak@linux.jf.intel.com LKML-Reference: <20090403144551.919636000@intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index c730155..5cd137a 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -299,6 +299,7 @@ static inline struct ds_context *ds_get_context(struct task_struct *task) static inline void ds_put_context(struct ds_context *context) { + struct task_struct *task; unsigned long irq; if (!context) @@ -313,14 +314,20 @@ static inline void ds_put_context(struct ds_context *context) *(context->this) = NULL; - if (context->task) - clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR); + task = context->task; + + if (task) + clear_tsk_thread_flag(task, TIF_DS_AREA_MSR); - if (!context->task || (context->task == current)) + if (!task || (task == current)) wrmsrl(MSR_IA32_DS_AREA, 0); spin_unlock_irqrestore(&ds_lock, irq); + /* The context might still be in use for context switching. */ + if (task && (task != current)) + wait_task_context_switch(task); + kfree(context); } @@ -781,15 +788,23 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task, void ds_release_bts(struct bts_tracer *tracer) { + struct task_struct *task; + if (!tracer) return; + task = tracer->ds.context->task; + ds_suspend_bts(tracer); WARN_ON_ONCE(tracer->ds.context->bts_master != tracer); tracer->ds.context->bts_master = NULL; - put_tracer(tracer->ds.context->task); + /* Make sure tracing stopped and the tracer is not in use. */ + if (task && (task != current)) + wait_task_context_switch(task); + + put_tracer(task); ds_put_context(tracer->ds.context); kfree(tracer); -- cgit v0.10.2 From 38f801129ad07b9afa7f9bd3779f61b805416d8c Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 3 Apr 2009 16:43:37 +0200 Subject: x86, bts: fix race between per-task and per-cpu branch tracing Per-task branch tracing installs a debug store context with the traced task. This immediately results in the branch trace control bits to be cleared for the next context switch of that task, if not set before. Either per-cpu or per-task tracing are allowed at the same time. An active per-cpu tracing would be disabled even if the per-task tracing request is rejected and the task debug store context removed. Check the tracing type (per-cpu or per-task) before installing a task debug store context. Signed-off-by: Markus Metzger Cc: roland@redhat.com Cc: eranian@googlemail.com Cc: oleg@redhat.com Cc: juan.villacis@intel.com Cc: ak@linux.jf.intel.com LKML-Reference: <20090403144552.856000000@intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 5cd137a..f03f117 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -193,12 +193,28 @@ static DEFINE_SPINLOCK(ds_lock); */ static atomic_t tracers = ATOMIC_INIT(0); -static inline void get_tracer(struct task_struct *task) +static inline int get_tracer(struct task_struct *task) { - if (task) + int error; + + spin_lock_irq(&ds_lock); + + if (task) { + error = -EPERM; + if (atomic_read(&tracers) < 0) + goto out; atomic_inc(&tracers); - else + } else { + error = -EPERM; + if (atomic_read(&tracers) > 0) + goto out; atomic_dec(&tracers); + } + + error = 0; +out: + spin_unlock_irq(&ds_lock); + return error; } static inline void put_tracer(struct task_struct *task) @@ -209,14 +225,6 @@ static inline void put_tracer(struct task_struct *task) atomic_inc(&tracers); } -static inline int check_tracer(struct task_struct *task) -{ - return task ? - (atomic_read(&tracers) >= 0) : - (atomic_read(&tracers) <= 0); -} - - /* * The DS context is either attached to a thread or to a cpu: * - in the former case, the thread_struct contains a pointer to the @@ -677,6 +685,10 @@ struct bts_tracer *ds_request_bts(struct task_struct *task, if (ovfl) goto out; + error = get_tracer(task); + if (error < 0) + goto out; + /* * Per-cpu tracing is typically requested using smp_call_function(). * We must not sleep. @@ -684,7 +696,7 @@ struct bts_tracer *ds_request_bts(struct task_struct *task, error = -ENOMEM; tracer = kzalloc(sizeof(*tracer), GFP_ATOMIC); if (!tracer) - goto out; + goto out_put_tracer; tracer->ovfl = ovfl; error = ds_request(&tracer->ds, &tracer->trace.ds, @@ -696,13 +708,8 @@ struct bts_tracer *ds_request_bts(struct task_struct *task, spin_lock_irqsave(&ds_lock, irq); error = -EPERM; - if (!check_tracer(task)) - goto out_unlock; - get_tracer(task); - - error = -EPERM; if (tracer->ds.context->bts_master) - goto out_put_tracer; + goto out_unlock; tracer->ds.context->bts_master = tracer; spin_unlock_irqrestore(&ds_lock, irq); @@ -716,13 +723,13 @@ struct bts_tracer *ds_request_bts(struct task_struct *task, return tracer; - out_put_tracer: - put_tracer(task); out_unlock: spin_unlock_irqrestore(&ds_lock, irq); ds_put_context(tracer->ds.context); out_tracer: kfree(tracer); + out_put_tracer: + put_tracer(task); out: return ERR_PTR(error); } @@ -741,6 +748,10 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task, if (ovfl) goto out; + error = get_tracer(task); + if (error < 0) + goto out; + /* * Per-cpu tracing is typically requested using smp_call_function(). * We must not sleep. @@ -748,7 +759,7 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task, error = -ENOMEM; tracer = kzalloc(sizeof(*tracer), GFP_ATOMIC); if (!tracer) - goto out; + goto out_put_tracer; tracer->ovfl = ovfl; error = ds_request(&tracer->ds, &tracer->trace.ds, @@ -759,13 +770,8 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task, spin_lock_irqsave(&ds_lock, irq); error = -EPERM; - if (!check_tracer(task)) - goto out_unlock; - get_tracer(task); - - error = -EPERM; if (tracer->ds.context->pebs_master) - goto out_put_tracer; + goto out_unlock; tracer->ds.context->pebs_master = tracer; spin_unlock_irqrestore(&ds_lock, irq); @@ -775,13 +781,13 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task, return tracer; - out_put_tracer: - put_tracer(task); out_unlock: spin_unlock_irqrestore(&ds_lock, irq); ds_put_context(tracer->ds.context); out_tracer: kfree(tracer); + out_put_tracer: + put_tracer(task); out: return ERR_PTR(error); } @@ -804,8 +810,8 @@ void ds_release_bts(struct bts_tracer *tracer) if (task && (task != current)) wait_task_context_switch(task); - put_tracer(task); ds_put_context(tracer->ds.context); + put_tracer(task); kfree(tracer); } @@ -861,16 +867,20 @@ void ds_resume_bts(struct bts_tracer *tracer) void ds_release_pebs(struct pebs_tracer *tracer) { + struct task_struct *task; + if (!tracer) return; + task = tracer->ds.context->task; + ds_suspend_pebs(tracer); WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer); tracer->ds.context->pebs_master = NULL; - put_tracer(tracer->ds.context->task); ds_put_context(tracer->ds.context); + put_tracer(task); kfree(tracer); } -- cgit v0.10.2 From 15879d042164650b93d83281ad5f87ad323bfbfe Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 3 Apr 2009 16:43:38 +0200 Subject: x86, bts: use trace_clock_global() for timestamps Rename the bts_struct timestamp field to event. Use trace_clock_global() for time measurement. Reported-by: Ingo Molnar Signed-off-by: Markus Metzger Cc: roland@redhat.com Cc: eranian@googlemail.com Cc: oleg@redhat.com Cc: juan.villacis@intel.com Cc: ak@linux.jf.intel.com LKML-Reference: <20090403144553.773216000@intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h index a8f672b..772f141 100644 --- a/arch/x86/include/asm/ds.h +++ b/arch/x86/include/asm/ds.h @@ -170,9 +170,9 @@ struct bts_struct { } lbr; /* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */ struct { - __u64 jiffies; + __u64 clock; pid_t pid; - } timestamp; + } event; } variant; }; diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index f03f117..2071b99 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -471,7 +472,7 @@ enum bts_field { bts_flags, bts_qual = bts_from, - bts_jiffies = bts_to, + bts_clock = bts_to, bts_pid = bts_flags, bts_qual_mask = (bts_qual_max - 1), @@ -517,8 +518,8 @@ bts_read(struct bts_tracer *tracer, const void *at, struct bts_struct *out) memset(out, 0, sizeof(*out)); if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) { out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask); - out->variant.timestamp.jiffies = bts_get(at, bts_jiffies); - out->variant.timestamp.pid = bts_get(at, bts_pid); + out->variant.event.clock = bts_get(at, bts_clock); + out->variant.event.pid = bts_get(at, bts_pid); } else { out->qualifier = bts_branch; out->variant.lbr.from = bts_get(at, bts_from); @@ -555,8 +556,8 @@ static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in) case bts_task_arrives: case bts_task_departs: bts_set(raw, bts_qual, (bts_escape | in->qualifier)); - bts_set(raw, bts_jiffies, in->variant.timestamp.jiffies); - bts_set(raw, bts_pid, in->variant.timestamp.pid); + bts_set(raw, bts_clock, in->variant.event.clock); + bts_set(raw, bts_pid, in->variant.event.pid); break; default: return -EINVAL; @@ -1083,9 +1084,9 @@ static inline void ds_take_timestamp(struct ds_context *context, return; memset(&ts, 0, sizeof(ts)); - ts.qualifier = qualifier; - ts.variant.timestamp.jiffies = jiffies_64; - ts.variant.timestamp.pid = task->pid; + ts.qualifier = qualifier; + ts.variant.event.clock = trace_clock_global(); + ts.variant.event.pid = task->pid; bts_write(tracer, &ts); } -- cgit v0.10.2 From 35bb7600c17762bb129588c1877d2717fe325289 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 3 Apr 2009 16:43:39 +0200 Subject: x86, debugctlmsr: add _on_cpu variants to debugctlmsr functions Add functions to get and set the debugctlmsr on different cpus. Signed-off-by: Markus Metzger Cc: roland@redhat.com Cc: eranian@googlemail.com Cc: oleg@redhat.com Cc: juan.villacis@intel.com Cc: ak@linux.jf.intel.com LKML-Reference: <20090403144554.738772000@intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 2483807..1efeb49 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -785,6 +785,21 @@ static inline unsigned long get_debugctlmsr(void) return debugctlmsr; } +static inline unsigned long get_debugctlmsr_on_cpu(int cpu) +{ + u64 debugctlmsr = 0; + u32 val1, val2; + +#ifndef CONFIG_X86_DEBUGCTLMSR + if (boot_cpu_data.x86 < 6) + return 0; +#endif + rdmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, &val1, &val2); + debugctlmsr = val1 | ((u64)val2 << 32); + + return debugctlmsr; +} + static inline void update_debugctlmsr(unsigned long debugctlmsr) { #ifndef CONFIG_X86_DEBUGCTLMSR @@ -794,6 +809,18 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr) wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); } +static inline void update_debugctlmsr_on_cpu(int cpu, + unsigned long debugctlmsr) +{ +#ifndef CONFIG_X86_DEBUGCTLMSR + if (boot_cpu_data.x86 < 6) + return; +#endif + wrmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, + (u32)((u64)debugctlmsr), + (u32)((u64)debugctlmsr >> 32)); +} + /* * from system description table in BIOS. Mostly for MCA use, but * others may find it useful: -- cgit v0.10.2 From de79f54f5347ad7ec6ff55ccbb6d4ab2a21f6a93 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 3 Apr 2009 16:43:40 +0200 Subject: x86, bts, hw-branch-tracer: add _noirq variants to the debug store interface The hw-branch-tracer uses debug store functions from an on_each_cpu() context, which is simply wrong since the functions may sleep. Add _noirq variants for most functions, which may be called with interrupts disabled. Separate per-cpu and per-task tracing and allow per-cpu tracing to be controlled from any cpu. Make the hw-branch-tracer use the new debug store interface, synchronize with hotplug cpu event using get/put_online_cpus(), and remove the unnecessary spinlock. Make the ptrace bts and the ds selftest code use the new interface. Defer the ds selftest. Signed-off-by: Markus Metzger Cc: roland@redhat.com Cc: eranian@googlemail.com Cc: oleg@redhat.com Cc: juan.villacis@intel.com Cc: ak@linux.jf.intel.com LKML-Reference: <20090403144555.658136000@intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h index 772f141..413e127 100644 --- a/arch/x86/include/asm/ds.h +++ b/arch/x86/include/asm/ds.h @@ -15,8 +15,8 @@ * - buffer allocation (memory accounting) * * - * Copyright (C) 2007-2008 Intel Corporation. - * Markus Metzger , 2007-2008 + * Copyright (C) 2007-2009 Intel Corporation. + * Markus Metzger , 2007-2009 */ #ifndef _ASM_X86_DS_H @@ -83,8 +83,10 @@ enum ds_feature { * The interrupt threshold is independent from the overflow callback * to allow users to use their own overflow interrupt handling mechanism. * - * task: the task to request recording for; - * NULL for per-cpu recording on the current cpu + * The function might sleep. + * + * task: the task to request recording for + * cpu: the cpu to request recording for * base: the base pointer for the (non-pageable) buffer; * size: the size of the provided buffer in bytes * ovfl: pointer to a function to be called on buffer overflow; @@ -93,19 +95,28 @@ enum ds_feature { * -1 if no interrupt threshold is requested. * flags: a bit-mask of the above flags */ -extern struct bts_tracer *ds_request_bts(struct task_struct *task, - void *base, size_t size, - bts_ovfl_callback_t ovfl, - size_t th, unsigned int flags); -extern struct pebs_tracer *ds_request_pebs(struct task_struct *task, - void *base, size_t size, - pebs_ovfl_callback_t ovfl, - size_t th, unsigned int flags); +extern struct bts_tracer *ds_request_bts_task(struct task_struct *task, + void *base, size_t size, + bts_ovfl_callback_t ovfl, + size_t th, unsigned int flags); +extern struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size, + bts_ovfl_callback_t ovfl, + size_t th, unsigned int flags); +extern struct pebs_tracer *ds_request_pebs_task(struct task_struct *task, + void *base, size_t size, + pebs_ovfl_callback_t ovfl, + size_t th, unsigned int flags); +extern struct pebs_tracer *ds_request_pebs_cpu(int cpu, + void *base, size_t size, + pebs_ovfl_callback_t ovfl, + size_t th, unsigned int flags); /* * Release BTS or PEBS resources * Suspend and resume BTS or PEBS tracing * + * Must be called with irq's enabled. + * * tracer: the tracer handle returned from ds_request_~() */ extern void ds_release_bts(struct bts_tracer *tracer); @@ -115,6 +126,28 @@ extern void ds_release_pebs(struct pebs_tracer *tracer); extern void ds_suspend_pebs(struct pebs_tracer *tracer); extern void ds_resume_pebs(struct pebs_tracer *tracer); +/* + * Release BTS or PEBS resources + * Suspend and resume BTS or PEBS tracing + * + * Cpu tracers must call this on the traced cpu. + * Task tracers must call ds_release_~_noirq() for themselves. + * + * May be called with irq's disabled. + * + * Returns 0 if successful; + * -EPERM if the cpu tracer does not trace the current cpu. + * -EPERM if the task tracer does not trace itself. + * + * tracer: the tracer handle returned from ds_request_~() + */ +extern int ds_release_bts_noirq(struct bts_tracer *tracer); +extern int ds_suspend_bts_noirq(struct bts_tracer *tracer); +extern int ds_resume_bts_noirq(struct bts_tracer *tracer); +extern int ds_release_pebs_noirq(struct pebs_tracer *tracer); +extern int ds_suspend_pebs_noirq(struct pebs_tracer *tracer); +extern int ds_resume_pebs_noirq(struct pebs_tracer *tracer); + /* * The raw DS buffer state as it is used for BTS and PEBS recording. diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 2071b99..21a3852 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -245,60 +245,50 @@ struct ds_context { struct pebs_tracer *pebs_master; /* Use count: */ - unsigned long count; + unsigned long count; /* Pointer to the context pointer field: */ struct ds_context **this; - /* The traced task; NULL for current cpu: */ + /* The traced task; NULL for cpu tracing: */ struct task_struct *task; -}; -static DEFINE_PER_CPU(struct ds_context *, system_context_array); + /* The traced cpu; only valid if task is NULL: */ + int cpu; +}; -#define system_context per_cpu(system_context_array, smp_processor_id()) +static DEFINE_PER_CPU(struct ds_context *, cpu_context); -static inline struct ds_context *ds_get_context(struct task_struct *task) +static struct ds_context *ds_get_context(struct task_struct *task, int cpu) { struct ds_context **p_context = - (task ? &task->thread.ds_ctx : &system_context); + (task ? &task->thread.ds_ctx : &per_cpu(cpu_context, cpu)); struct ds_context *context = NULL; struct ds_context *new_context = NULL; - unsigned long irq; - /* - * Chances are small that we already have a context. - * - * Contexts for per-cpu tracing are allocated using - * smp_call_function(). We must not sleep. - */ - new_context = kzalloc(sizeof(*new_context), GFP_ATOMIC); + /* Chances are small that we already have a context. */ + new_context = kzalloc(sizeof(*new_context), GFP_KERNEL); if (!new_context) return NULL; - spin_lock_irqsave(&ds_lock, irq); + spin_lock_irq(&ds_lock); context = *p_context; - if (!context) { + if (likely(!context)) { context = new_context; context->this = p_context; context->task = task; + context->cpu = cpu; context->count = 0; - if (task) - set_tsk_thread_flag(task, TIF_DS_AREA_MSR); - - if (!task || (task == current)) - wrmsrl(MSR_IA32_DS_AREA, (unsigned long)context->ds); - *p_context = context; } context->count++; - spin_unlock_irqrestore(&ds_lock, irq); + spin_unlock_irq(&ds_lock); if (context != new_context) kfree(new_context); @@ -306,7 +296,7 @@ static inline struct ds_context *ds_get_context(struct task_struct *task) return context; } -static inline void ds_put_context(struct ds_context *context) +static void ds_put_context(struct ds_context *context) { struct task_struct *task; unsigned long irq; @@ -328,8 +318,15 @@ static inline void ds_put_context(struct ds_context *context) if (task) clear_tsk_thread_flag(task, TIF_DS_AREA_MSR); - if (!task || (task == current)) - wrmsrl(MSR_IA32_DS_AREA, 0); + /* + * We leave the (now dangling) pointer to the DS configuration in + * the DS_AREA msr. This is as good or as bad as replacing it with + * NULL - the hardware would crash if we enabled tracing. + * + * This saves us some problems with having to write an msr on a + * different cpu while preventing others from doing the same for the + * next context for that same cpu. + */ spin_unlock_irqrestore(&ds_lock, irq); @@ -340,6 +337,31 @@ static inline void ds_put_context(struct ds_context *context) kfree(context); } +static void ds_install_ds_area(struct ds_context *context) +{ + unsigned long ds; + + ds = (unsigned long)context->ds; + + /* + * There is a race between the bts master and the pebs master. + * + * The thread/cpu access is synchronized via get/put_cpu() for + * task tracing and via wrmsr_on_cpu for cpu tracing. + * + * If bts and pebs are collected for the same task or same cpu, + * the same confiuration is written twice. + */ + if (context->task) { + get_cpu(); + if (context->task == current) + wrmsrl(MSR_IA32_DS_AREA, ds); + set_tsk_thread_flag(context->task, TIF_DS_AREA_MSR); + put_cpu(); + } else + wrmsr_on_cpu(context->cpu, MSR_IA32_DS_AREA, + (u32)((u64)ds), (u32)((u64)ds >> 32)); +} /* * Call the tracer's callback on a buffer overflow. @@ -622,6 +644,7 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, * The value for 'no threshold' is -1, which will set the * threshold outside of the buffer, just like we want it. */ + ith *= ds_cfg.sizeof_rec[qual]; trace->ith = (void *)(buffer + size - ith); trace->flags = flags; @@ -630,7 +653,7 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, enum ds_qualifier qual, struct task_struct *task, - void *base, size_t size, size_t th, unsigned int flags) + int cpu, void *base, size_t size, size_t th) { struct ds_context *context; int error; @@ -643,7 +666,7 @@ static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, if (!base) goto out; - /* We require some space to do alignment adjustments below. */ + /* We need space for alignment adjustments in ds_init_ds_trace(). */ error = -EINVAL; if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual])) goto out; @@ -660,25 +683,27 @@ static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, tracer->size = size; error = -ENOMEM; - context = ds_get_context(task); + context = ds_get_context(task, cpu); if (!context) goto out; tracer->context = context; - ds_init_ds_trace(trace, qual, base, size, th, flags); + /* + * Defer any tracer-specific initialization work for the context until + * context ownership has been clarified. + */ error = 0; out: return error; } -struct bts_tracer *ds_request_bts(struct task_struct *task, - void *base, size_t size, - bts_ovfl_callback_t ovfl, size_t th, - unsigned int flags) +static struct bts_tracer *ds_request_bts(struct task_struct *task, int cpu, + void *base, size_t size, + bts_ovfl_callback_t ovfl, size_t th, + unsigned int flags) { struct bts_tracer *tracer; - unsigned long irq; int error; /* Buffer overflow notification is not yet implemented. */ @@ -690,42 +715,46 @@ struct bts_tracer *ds_request_bts(struct task_struct *task, if (error < 0) goto out; - /* - * Per-cpu tracing is typically requested using smp_call_function(). - * We must not sleep. - */ error = -ENOMEM; - tracer = kzalloc(sizeof(*tracer), GFP_ATOMIC); + tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); if (!tracer) goto out_put_tracer; tracer->ovfl = ovfl; + /* Do some more error checking and acquire a tracing context. */ error = ds_request(&tracer->ds, &tracer->trace.ds, - ds_bts, task, base, size, th, flags); + ds_bts, task, cpu, base, size, th); if (error < 0) goto out_tracer; - - spin_lock_irqsave(&ds_lock, irq); + /* Claim the bts part of the tracing context we acquired above. */ + spin_lock_irq(&ds_lock); error = -EPERM; if (tracer->ds.context->bts_master) goto out_unlock; tracer->ds.context->bts_master = tracer; - spin_unlock_irqrestore(&ds_lock, irq); + spin_unlock_irq(&ds_lock); + /* + * Now that we own the bts part of the context, let's complete the + * initialization for that part. + */ + ds_init_ds_trace(&tracer->trace.ds, ds_bts, base, size, th, flags); + ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); + ds_install_ds_area(tracer->ds.context); tracer->trace.read = bts_read; tracer->trace.write = bts_write; - ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); + /* Start tracing. */ ds_resume_bts(tracer); return tracer; out_unlock: - spin_unlock_irqrestore(&ds_lock, irq); + spin_unlock_irq(&ds_lock); ds_put_context(tracer->ds.context); out_tracer: kfree(tracer); @@ -735,13 +764,27 @@ struct bts_tracer *ds_request_bts(struct task_struct *task, return ERR_PTR(error); } -struct pebs_tracer *ds_request_pebs(struct task_struct *task, - void *base, size_t size, - pebs_ovfl_callback_t ovfl, size_t th, - unsigned int flags) +struct bts_tracer *ds_request_bts_task(struct task_struct *task, + void *base, size_t size, + bts_ovfl_callback_t ovfl, + size_t th, unsigned int flags) +{ + return ds_request_bts(task, 0, base, size, ovfl, th, flags); +} + +struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size, + bts_ovfl_callback_t ovfl, + size_t th, unsigned int flags) +{ + return ds_request_bts(NULL, cpu, base, size, ovfl, th, flags); +} + +static struct pebs_tracer *ds_request_pebs(struct task_struct *task, int cpu, + void *base, size_t size, + pebs_ovfl_callback_t ovfl, size_t th, + unsigned int flags) { struct pebs_tracer *tracer; - unsigned long irq; int error; /* Buffer overflow notification is not yet implemented. */ @@ -753,37 +796,43 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task, if (error < 0) goto out; - /* - * Per-cpu tracing is typically requested using smp_call_function(). - * We must not sleep. - */ error = -ENOMEM; - tracer = kzalloc(sizeof(*tracer), GFP_ATOMIC); + tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); if (!tracer) goto out_put_tracer; tracer->ovfl = ovfl; + /* Do some more error checking and acquire a tracing context. */ error = ds_request(&tracer->ds, &tracer->trace.ds, - ds_pebs, task, base, size, th, flags); + ds_pebs, task, cpu, base, size, th); if (error < 0) goto out_tracer; - spin_lock_irqsave(&ds_lock, irq); + /* Claim the pebs part of the tracing context we acquired above. */ + spin_lock_irq(&ds_lock); error = -EPERM; if (tracer->ds.context->pebs_master) goto out_unlock; tracer->ds.context->pebs_master = tracer; - spin_unlock_irqrestore(&ds_lock, irq); + spin_unlock_irq(&ds_lock); + /* + * Now that we own the pebs part of the context, let's complete the + * initialization for that part. + */ + ds_init_ds_trace(&tracer->trace.ds, ds_pebs, base, size, th, flags); ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); + ds_install_ds_area(tracer->ds.context); + + /* Start tracing. */ ds_resume_pebs(tracer); return tracer; out_unlock: - spin_unlock_irqrestore(&ds_lock, irq); + spin_unlock_irq(&ds_lock); ds_put_context(tracer->ds.context); out_tracer: kfree(tracer); @@ -793,16 +842,26 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task, return ERR_PTR(error); } -void ds_release_bts(struct bts_tracer *tracer) +struct pebs_tracer *ds_request_pebs_task(struct task_struct *task, + void *base, size_t size, + pebs_ovfl_callback_t ovfl, + size_t th, unsigned int flags) { - struct task_struct *task; + return ds_request_pebs(task, 0, base, size, ovfl, th, flags); +} - if (!tracer) - return; +struct pebs_tracer *ds_request_pebs_cpu(int cpu, void *base, size_t size, + pebs_ovfl_callback_t ovfl, + size_t th, unsigned int flags) +{ + return ds_request_pebs(NULL, cpu, base, size, ovfl, th, flags); +} - task = tracer->ds.context->task; +static void ds_free_bts(struct bts_tracer *tracer) +{ + struct task_struct *task; - ds_suspend_bts(tracer); + task = tracer->ds.context->task; WARN_ON_ONCE(tracer->ds.context->bts_master != tracer); tracer->ds.context->bts_master = NULL; @@ -817,9 +876,69 @@ void ds_release_bts(struct bts_tracer *tracer) kfree(tracer); } +void ds_release_bts(struct bts_tracer *tracer) +{ + might_sleep(); + + if (!tracer) + return; + + ds_suspend_bts(tracer); + ds_free_bts(tracer); +} + +int ds_release_bts_noirq(struct bts_tracer *tracer) +{ + struct task_struct *task; + unsigned long irq; + int error; + + if (!tracer) + return 0; + + task = tracer->ds.context->task; + + local_irq_save(irq); + + error = -EPERM; + if (!task && + (tracer->ds.context->cpu != smp_processor_id())) + goto out; + + error = -EPERM; + if (task && (task != current)) + goto out; + + ds_suspend_bts_noirq(tracer); + ds_free_bts(tracer); + + error = 0; + out: + local_irq_restore(irq); + return error; +} + +static void update_task_debugctlmsr(struct task_struct *task, + unsigned long debugctlmsr) +{ + task->thread.debugctlmsr = debugctlmsr; + + get_cpu(); + if (task == current) + update_debugctlmsr(debugctlmsr); + + if (task->thread.debugctlmsr) + set_tsk_thread_flag(task, TIF_DEBUGCTLMSR); + else + clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR); + put_cpu(); +} + void ds_suspend_bts(struct bts_tracer *tracer) { struct task_struct *task; + unsigned long debugctlmsr; + int cpu; if (!tracer) return; @@ -827,29 +946,60 @@ void ds_suspend_bts(struct bts_tracer *tracer) tracer->flags = 0; task = tracer->ds.context->task; + cpu = tracer->ds.context->cpu; - if (!task || (task == current)) - update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL); + WARN_ON(!task && irqs_disabled()); - if (task) { - task->thread.debugctlmsr &= ~BTS_CONTROL; + debugctlmsr = (task ? + task->thread.debugctlmsr : + get_debugctlmsr_on_cpu(cpu)); + debugctlmsr &= ~BTS_CONTROL; - if (!task->thread.debugctlmsr) - clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR); - } + if (task) + update_task_debugctlmsr(task, debugctlmsr); + else + update_debugctlmsr_on_cpu(cpu, debugctlmsr); } -void ds_resume_bts(struct bts_tracer *tracer) +int ds_suspend_bts_noirq(struct bts_tracer *tracer) { struct task_struct *task; - unsigned long control; + unsigned long debugctlmsr, irq; + int cpu, error = 0; if (!tracer) - return; + return 0; - tracer->flags = tracer->trace.ds.flags; + tracer->flags = 0; task = tracer->ds.context->task; + cpu = tracer->ds.context->cpu; + + local_irq_save(irq); + + error = -EPERM; + if (!task && (cpu != smp_processor_id())) + goto out; + + debugctlmsr = (task ? + task->thread.debugctlmsr : + get_debugctlmsr()); + debugctlmsr &= ~BTS_CONTROL; + + if (task) + update_task_debugctlmsr(task, debugctlmsr); + else + update_debugctlmsr(debugctlmsr); + + error = 0; + out: + local_irq_restore(irq); + return error; +} + +static unsigned long ds_bts_control(struct bts_tracer *tracer) +{ + unsigned long control; control = ds_cfg.ctl[dsf_bts]; if (!(tracer->trace.ds.flags & BTS_KERNEL)) @@ -857,25 +1007,77 @@ void ds_resume_bts(struct bts_tracer *tracer) if (!(tracer->trace.ds.flags & BTS_USER)) control |= ds_cfg.ctl[dsf_bts_user]; - if (task) { - task->thread.debugctlmsr |= control; - set_tsk_thread_flag(task, TIF_DEBUGCTLMSR); - } - - if (!task || (task == current)) - update_debugctlmsr(get_debugctlmsr() | control); + return control; } -void ds_release_pebs(struct pebs_tracer *tracer) +void ds_resume_bts(struct bts_tracer *tracer) { struct task_struct *task; + unsigned long debugctlmsr; + int cpu; if (!tracer) return; + tracer->flags = tracer->trace.ds.flags; + task = tracer->ds.context->task; + cpu = tracer->ds.context->cpu; - ds_suspend_pebs(tracer); + WARN_ON(!task && irqs_disabled()); + + debugctlmsr = (task ? + task->thread.debugctlmsr : + get_debugctlmsr_on_cpu(cpu)); + debugctlmsr |= ds_bts_control(tracer); + + if (task) + update_task_debugctlmsr(task, debugctlmsr); + else + update_debugctlmsr_on_cpu(cpu, debugctlmsr); +} + +int ds_resume_bts_noirq(struct bts_tracer *tracer) +{ + struct task_struct *task; + unsigned long debugctlmsr, irq; + int cpu, error = 0; + + if (!tracer) + return 0; + + tracer->flags = tracer->trace.ds.flags; + + task = tracer->ds.context->task; + cpu = tracer->ds.context->cpu; + + local_irq_save(irq); + + error = -EPERM; + if (!task && (cpu != smp_processor_id())) + goto out; + + debugctlmsr = (task ? + task->thread.debugctlmsr : + get_debugctlmsr()); + debugctlmsr |= ds_bts_control(tracer); + + if (task) + update_task_debugctlmsr(task, debugctlmsr); + else + update_debugctlmsr(debugctlmsr); + + error = 0; + out: + local_irq_restore(irq); + return error; +} + +static void ds_free_pebs(struct pebs_tracer *tracer) +{ + struct task_struct *task; + + task = tracer->ds.context->task; WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer); tracer->ds.context->pebs_master = NULL; @@ -886,16 +1088,68 @@ void ds_release_pebs(struct pebs_tracer *tracer) kfree(tracer); } +void ds_release_pebs(struct pebs_tracer *tracer) +{ + might_sleep(); + + if (!tracer) + return; + + ds_suspend_pebs(tracer); + ds_free_pebs(tracer); +} + +int ds_release_pebs_noirq(struct pebs_tracer *tracer) +{ + struct task_struct *task; + unsigned long irq; + int error; + + if (!tracer) + return 0; + + task = tracer->ds.context->task; + + local_irq_save(irq); + + error = -EPERM; + if (!task && + (tracer->ds.context->cpu != smp_processor_id())) + goto out; + + error = -EPERM; + if (task && (task != current)) + goto out; + + ds_suspend_pebs_noirq(tracer); + ds_free_pebs(tracer); + + error = 0; + out: + local_irq_restore(irq); + return error; +} + void ds_suspend_pebs(struct pebs_tracer *tracer) { } +int ds_suspend_pebs_noirq(struct pebs_tracer *tracer) +{ + return 0; +} + void ds_resume_pebs(struct pebs_tracer *tracer) { } +int ds_resume_pebs_noirq(struct pebs_tracer *tracer) +{ + return 0; +} + const struct bts_trace *ds_read_bts(struct bts_tracer *tracer) { if (!tracer) @@ -1004,26 +1258,6 @@ ds_configure(const struct ds_configuration *cfg, printk(KERN_INFO "[ds] pebs not available\n"); } - if (ds_cfg.sizeof_rec[ds_bts]) { - int error; - - error = ds_selftest_bts(); - if (error) { - WARN(1, "[ds] selftest failed. disabling bts.\n"); - ds_cfg.sizeof_rec[ds_bts] = 0; - } - } - - if (ds_cfg.sizeof_rec[ds_pebs]) { - int error; - - error = ds_selftest_pebs(); - if (error) { - WARN(1, "[ds] selftest failed. disabling pebs.\n"); - ds_cfg.sizeof_rec[ds_pebs] = 0; - } - } - printk(KERN_INFO "[ds] sizes: address: %u bit, ", 8 * ds_cfg.sizeof_ptr_field); printk("bts/pebs record: %u/%u bytes\n", @@ -1127,3 +1361,29 @@ void ds_copy_thread(struct task_struct *tsk, struct task_struct *father) void ds_exit_thread(struct task_struct *tsk) { } + +static __init int ds_selftest(void) +{ + if (ds_cfg.sizeof_rec[ds_bts]) { + int error; + + error = ds_selftest_bts(); + if (error) { + WARN(1, "[ds] selftest failed. disabling bts.\n"); + ds_cfg.sizeof_rec[ds_bts] = 0; + } + } + + if (ds_cfg.sizeof_rec[ds_pebs]) { + int error; + + error = ds_selftest_pebs(); + if (error) { + WARN(1, "[ds] selftest failed. disabling pebs.\n"); + ds_cfg.sizeof_rec[ds_pebs] = 0; + } + } + + return 0; +} +device_initcall(ds_selftest); diff --git a/arch/x86/kernel/ds_selftest.c b/arch/x86/kernel/ds_selftest.c index 8c46fbf..e5a263c 100644 --- a/arch/x86/kernel/ds_selftest.c +++ b/arch/x86/kernel/ds_selftest.c @@ -10,11 +10,12 @@ #include #include +#include #include -#define DS_SELFTEST_BUFFER_SIZE 1021 /* Intentionally chose an odd size. */ +#define BUFFER_SIZE 1021 /* Intentionally chose an odd size. */ static int ds_selftest_bts_consistency(const struct bts_trace *trace) @@ -125,12 +126,12 @@ int ds_selftest_bts(void) struct bts_tracer *tracer; int error = 0; void *top; - unsigned char buffer[DS_SELFTEST_BUFFER_SIZE]; + unsigned char buffer[BUFFER_SIZE]; printk(KERN_INFO "[ds] bts selftest..."); - tracer = ds_request_bts(NULL, buffer, DS_SELFTEST_BUFFER_SIZE, - NULL, (size_t)-1, BTS_KERNEL); + tracer = ds_request_bts_cpu(smp_processor_id(), buffer, BUFFER_SIZE, + NULL, (size_t)-1, BTS_KERNEL); if (IS_ERR(tracer)) { error = PTR_ERR(tracer); tracer = NULL; diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 7c21d1e..adbb243 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -800,8 +800,9 @@ static int ptrace_bts_config(struct task_struct *child, if (cfg.flags & PTRACE_BTS_O_SCHED) flags |= BTS_TIMESTAMPS; - context->tracer = ds_request_bts(child, context->buffer, context->size, - NULL, (size_t)-1, flags); + context->tracer = + ds_request_bts_task(child, context->buffer, context->size, + NULL, (size_t)-1, flags); if (unlikely(IS_ERR(context->tracer))) { int error = PTR_ERR(context->tracer); diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c index 8b2109a..50565d8 100644 --- a/kernel/trace/trace_hw_branches.c +++ b/kernel/trace/trace_hw_branches.c @@ -4,7 +4,6 @@ * Copyright (C) 2008-2009 Intel Corporation. * Markus Metzger , 2008-2009 */ -#include #include #include #include @@ -21,168 +20,113 @@ #define BTS_BUFFER_SIZE (1 << 13) -/* - * The tracer lock protects the below per-cpu tracer array. - * It needs to be held to: - * - start tracing on all cpus - * - stop tracing on all cpus - * - start tracing on a single hotplug cpu - * - stop tracing on a single hotplug cpu - * - read the trace from all cpus - * - read the trace from a single cpu - */ -static DEFINE_SPINLOCK(bts_tracer_lock); static DEFINE_PER_CPU(struct bts_tracer *, tracer); static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], buffer); #define this_tracer per_cpu(tracer, smp_processor_id()) -#define this_buffer per_cpu(buffer, smp_processor_id()) static int trace_hw_branches_enabled __read_mostly; static int trace_hw_branches_suspended __read_mostly; static struct trace_array *hw_branch_trace __read_mostly; -/* - * Initialize the tracer for the current cpu. - * The argument is ignored. - * - * pre: bts_tracer_lock must be locked. - */ -static void bts_trace_init_cpu(void *arg) +static void bts_trace_init_cpu(int cpu) { - if (this_tracer) - ds_release_bts(this_tracer); + per_cpu(tracer, cpu) = + ds_request_bts_cpu(cpu, per_cpu(buffer, cpu), BTS_BUFFER_SIZE, + NULL, (size_t)-1, BTS_KERNEL); - this_tracer = ds_request_bts(NULL, this_buffer, BTS_BUFFER_SIZE, - NULL, (size_t)-1, BTS_KERNEL); - if (IS_ERR(this_tracer)) { - this_tracer = NULL; - return; - } + if (IS_ERR(per_cpu(tracer, cpu))) + per_cpu(tracer, cpu) = NULL; } static int bts_trace_init(struct trace_array *tr) { - int cpu, avail; - - spin_lock(&bts_tracer_lock); + int cpu; hw_branch_trace = tr; + trace_hw_branches_enabled = 0; - on_each_cpu(bts_trace_init_cpu, NULL, 1); - - /* Check on how many cpus we could enable tracing */ - avail = 0; - for_each_online_cpu(cpu) - if (per_cpu(tracer, cpu)) - avail++; + get_online_cpus(); + for_each_online_cpu(cpu) { + bts_trace_init_cpu(cpu); - trace_hw_branches_enabled = (avail ? 1 : 0); + if (likely(per_cpu(tracer, cpu))) + trace_hw_branches_enabled = 1; + } trace_hw_branches_suspended = 0; - - spin_unlock(&bts_tracer_lock); - + put_online_cpus(); /* If we could not enable tracing on a single cpu, we fail. */ - return avail ? 0 : -EOPNOTSUPP; -} - -/* - * Release the tracer for the current cpu. - * The argument is ignored. - * - * pre: bts_tracer_lock must be locked. - */ -static void bts_trace_release_cpu(void *arg) -{ - if (this_tracer) { - ds_release_bts(this_tracer); - this_tracer = NULL; - } + return trace_hw_branches_enabled ? 0 : -EOPNOTSUPP; } static void bts_trace_reset(struct trace_array *tr) { - spin_lock(&bts_tracer_lock); + int cpu; - on_each_cpu(bts_trace_release_cpu, NULL, 1); + get_online_cpus(); + for_each_online_cpu(cpu) { + if (likely(per_cpu(tracer, cpu))) { + ds_release_bts(per_cpu(tracer, cpu)); + per_cpu(tracer, cpu) = NULL; + } + } trace_hw_branches_enabled = 0; trace_hw_branches_suspended = 0; - - spin_unlock(&bts_tracer_lock); -} - -/* - * Resume tracing on the current cpu. - * The argument is ignored. - * - * pre: bts_tracer_lock must be locked. - */ -static void bts_trace_resume_cpu(void *arg) -{ - if (this_tracer) - ds_resume_bts(this_tracer); + put_online_cpus(); } static void bts_trace_start(struct trace_array *tr) { - spin_lock(&bts_tracer_lock); + int cpu; - on_each_cpu(bts_trace_resume_cpu, NULL, 1); + get_online_cpus(); + for_each_online_cpu(cpu) + if (likely(per_cpu(tracer, cpu))) + ds_resume_bts(per_cpu(tracer, cpu)); trace_hw_branches_suspended = 0; - - spin_unlock(&bts_tracer_lock); -} - -/* - * Suspend tracing on the current cpu. - * The argument is ignored. - * - * pre: bts_tracer_lock must be locked. - */ -static void bts_trace_suspend_cpu(void *arg) -{ - if (this_tracer) - ds_suspend_bts(this_tracer); + put_online_cpus(); } static void bts_trace_stop(struct trace_array *tr) { - spin_lock(&bts_tracer_lock); + int cpu; - on_each_cpu(bts_trace_suspend_cpu, NULL, 1); + get_online_cpus(); + for_each_online_cpu(cpu) + if (likely(per_cpu(tracer, cpu))) + ds_suspend_bts(per_cpu(tracer, cpu)); trace_hw_branches_suspended = 1; - - spin_unlock(&bts_tracer_lock); + put_online_cpus(); } static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb, unsigned long action, void *hcpu) { - unsigned int cpu = (unsigned long)hcpu; - - spin_lock(&bts_tracer_lock); - - if (!trace_hw_branches_enabled) - goto out; + int cpu = (long)hcpu; switch (action) { case CPU_ONLINE: case CPU_DOWN_FAILED: - smp_call_function_single(cpu, bts_trace_init_cpu, NULL, 1); - - if (trace_hw_branches_suspended) - smp_call_function_single(cpu, bts_trace_suspend_cpu, - NULL, 1); + /* The notification is sent with interrupts enabled. */ + if (trace_hw_branches_enabled) { + bts_trace_init_cpu(cpu); + + if (trace_hw_branches_suspended && + likely(per_cpu(tracer, cpu))) + ds_suspend_bts(per_cpu(tracer, cpu)); + } break; + case CPU_DOWN_PREPARE: - smp_call_function_single(cpu, bts_trace_release_cpu, NULL, 1); - break; + /* The notification is sent with interrupts enabled. */ + if (likely(per_cpu(tracer, cpu))) { + ds_release_bts(per_cpu(tracer, cpu)); + per_cpu(tracer, cpu) = NULL; + } } - out: - spin_unlock(&bts_tracer_lock); return NOTIFY_DONE; } @@ -274,7 +218,7 @@ static void trace_bts_at(const struct bts_trace *trace, void *at) /* * Collect the trace on the current cpu and write it into the ftrace buffer. * - * pre: bts_tracer_lock must be locked + * pre: tracing must be suspended on the current cpu */ static void trace_bts_cpu(void *arg) { @@ -291,10 +235,9 @@ static void trace_bts_cpu(void *arg) if (unlikely(!this_tracer)) return; - ds_suspend_bts(this_tracer); trace = ds_read_bts(this_tracer); if (!trace) - goto out; + return; for (at = trace->ds.top; (void *)at < trace->ds.end; at += trace->ds.size) @@ -303,18 +246,27 @@ static void trace_bts_cpu(void *arg) for (at = trace->ds.begin; (void *)at < trace->ds.top; at += trace->ds.size) trace_bts_at(trace, at); - -out: - ds_resume_bts(this_tracer); } static void trace_bts_prepare(struct trace_iterator *iter) { - spin_lock(&bts_tracer_lock); + int cpu; + get_online_cpus(); + for_each_online_cpu(cpu) + if (likely(per_cpu(tracer, cpu))) + ds_suspend_bts(per_cpu(tracer, cpu)); + /* + * We need to collect the trace on the respective cpu since ftrace + * implicitly adds the record for the current cpu. + * Once that is more flexible, we could collect the data from any cpu. + */ on_each_cpu(trace_bts_cpu, iter->tr, 1); - spin_unlock(&bts_tracer_lock); + for_each_online_cpu(cpu) + if (likely(per_cpu(tracer, cpu))) + ds_resume_bts(per_cpu(tracer, cpu)); + put_online_cpus(); } static void trace_bts_close(struct trace_iterator *iter) @@ -324,12 +276,11 @@ static void trace_bts_close(struct trace_iterator *iter) void trace_hw_branch_oops(void) { - spin_lock(&bts_tracer_lock); - - if (trace_hw_branches_enabled) + if (this_tracer) { + ds_suspend_bts_noirq(this_tracer); trace_bts_cpu(hw_branch_trace); - - spin_unlock(&bts_tracer_lock); + ds_resume_bts_noirq(this_tracer); + } } struct tracer bts_tracer __read_mostly = -- cgit v0.10.2 From 4d657e51dfc042216febd4a007c6f36881f9256d Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 3 Apr 2009 16:43:41 +0200 Subject: x86, hw-branch-tracer: allocate selftest iterator on heap Allocate the trace_iterator for the hw-branch-tracer selftest on the heap. Signed-off-by: Markus Metzger Cc: roland@redhat.com Cc: eranian@googlemail.com Cc: oleg@redhat.com Cc: juan.villacis@intel.com Cc: ak@linux.jf.intel.com LKML-Reference: <20090403144556.578777000@intel.com> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 499d01c..00dd648 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -757,7 +757,7 @@ int trace_selftest_startup_hw_branches(struct tracer *trace, struct trace_array *tr) { - struct trace_iterator iter; + struct trace_iterator *iter; struct tracer tracer; unsigned long count; int ret; @@ -777,17 +777,21 @@ trace_selftest_startup_hw_branches(struct tracer *trace, * The hw-branch tracer needs to collect the trace from the various * cpu trace buffers - before tracing is stopped. */ - memset(&iter, 0, sizeof(iter)); + iter = kzalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return -ENOMEM; + memcpy(&tracer, trace, sizeof(tracer)); - iter.trace = &tracer; - iter.tr = tr; - iter.pos = -1; - mutex_init(&iter.mutex); + iter->trace = &tracer; + iter->tr = tr; + iter->pos = -1; + mutex_init(&iter->mutex); - trace->open(&iter); + trace->open(iter); - mutex_destroy(&iter.mutex); + mutex_destroy(&iter->mutex); + kfree(iter); tracing_stop(); -- cgit v0.10.2 From 353afeea24cc51aafc0ff21a72ec740b6f0af50c Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 3 Apr 2009 16:43:42 +0200 Subject: x86, ds: fix compiler warning Size_t is defined differently on i386 and x86_64. Change type to avoid compiler warning. Signed-off-by: Markus Metzger Cc: roland@redhat.com Cc: eranian@googlemail.com Cc: oleg@redhat.com Cc: juan.villacis@intel.com Cc: ak@linux.jf.intel.com LKML-Reference: <20090403144557.523964000@intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/ds_selftest.c b/arch/x86/kernel/ds_selftest.c index e5a263c..e1ba510 100644 --- a/arch/x86/kernel/ds_selftest.c +++ b/arch/x86/kernel/ds_selftest.c @@ -87,7 +87,7 @@ static int ds_selftest_bts_read(struct bts_tracer *tracer, /* Now to the test itself. */ for (at = from; (void *)at < to; at += trace->ds.size) { struct bts_struct bts; - size_t index; + unsigned long index; int error; if (((void *)at - trace->ds.begin) % trace->ds.size) { -- cgit v0.10.2 From 84f201139245c30777ff858e71b8d7e134b8c3ed Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 3 Apr 2009 16:43:43 +0200 Subject: x86, ds: fix bounds check in ds selftest Fix a bad bounds check in the debug store selftest. Signed-off-by: Markus Metzger Cc: roland@redhat.com Cc: eranian@googlemail.com Cc: oleg@redhat.com Cc: juan.villacis@intel.com Cc: ak@linux.jf.intel.com LKML-Reference: <20090403144558.450027000@intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/ds_selftest.c b/arch/x86/kernel/ds_selftest.c index e1ba510..cccc19a 100644 --- a/arch/x86/kernel/ds_selftest.c +++ b/arch/x86/kernel/ds_selftest.c @@ -47,8 +47,13 @@ static int ds_selftest_bts_consistency(const struct bts_trace *trace) printk(KERN_CONT "bad bts buffer setup..."); error = -1; } + /* + * We allow top in [begin; end], since its not clear when the + * overflow adjustment happens: after the increment or before the + * write. + */ if ((trace->ds.top < trace->ds.begin) || - (trace->ds.end <= trace->ds.top)) { + (trace->ds.end < trace->ds.top)) { printk(KERN_CONT "bts top out of bounds..."); error = -1; } -- cgit v0.10.2 From 01f6569ece6915616f6cae1d7d8b46ab8da9c1bd Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 3 Apr 2009 16:43:44 +0200 Subject: x86, ds: selftest each cpu Perform debug store selftests on each cpu. Cover both the normal and the _noirq variant of the debug store interface. Signed-off-by: Markus Metzger Cc: roland@redhat.com Cc: eranian@googlemail.com Cc: oleg@redhat.com Cc: juan.villacis@intel.com Cc: ak@linux.jf.intel.com LKML-Reference: <20090403144559.394583000@intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/ds_selftest.c b/arch/x86/kernel/ds_selftest.c index cccc19a..599a963 100644 --- a/arch/x86/kernel/ds_selftest.c +++ b/arch/x86/kernel/ds_selftest.c @@ -11,13 +11,21 @@ #include #include #include +#include #include -#define BUFFER_SIZE 1021 /* Intentionally chose an odd size. */ +#define BUFFER_SIZE 521 /* Intentionally chose an odd size. */ +struct ds_selftest_bts_conf { + struct bts_tracer *tracer; + int error; + int (*suspend)(struct bts_tracer *); + int (*resume)(struct bts_tracer *); +}; + static int ds_selftest_bts_consistency(const struct bts_trace *trace) { int error = 0; @@ -125,36 +133,32 @@ static int ds_selftest_bts_read(struct bts_tracer *tracer, return 0; } -int ds_selftest_bts(void) +static void ds_selftest_bts_cpu(void *arg) { + struct ds_selftest_bts_conf *conf = arg; const struct bts_trace *trace; - struct bts_tracer *tracer; - int error = 0; void *top; - unsigned char buffer[BUFFER_SIZE]; - printk(KERN_INFO "[ds] bts selftest..."); - - tracer = ds_request_bts_cpu(smp_processor_id(), buffer, BUFFER_SIZE, - NULL, (size_t)-1, BTS_KERNEL); - if (IS_ERR(tracer)) { - error = PTR_ERR(tracer); - tracer = NULL; + if (IS_ERR(conf->tracer)) { + conf->error = PTR_ERR(conf->tracer); + conf->tracer = NULL; printk(KERN_CONT - "initialization failed (err: %d)...", error); - goto out; + "initialization failed (err: %d)...", conf->error); + return; } - /* The return should already give us enough trace. */ - ds_suspend_bts(tracer); + /* We should meanwhile have enough trace. */ + conf->error = conf->suspend(conf->tracer); + if (conf->error < 0) + return; /* Let's see if we can access the trace. */ - trace = ds_read_bts(tracer); + trace = ds_read_bts(conf->tracer); - error = ds_selftest_bts_consistency(trace); - if (error < 0) - goto out; + conf->error = ds_selftest_bts_consistency(trace); + if (conf->error < 0) + return; /* If everything went well, we should have a few trace entries. */ if (trace->ds.top == trace->ds.begin) { @@ -168,10 +172,11 @@ int ds_selftest_bts(void) } /* Let's try to read the trace we collected. */ - error = ds_selftest_bts_read(tracer, trace, + conf->error = + ds_selftest_bts_read(conf->tracer, trace, trace->ds.begin, trace->ds.top); - if (error < 0) - goto out; + if (conf->error < 0) + return; /* * Let's read the trace again. @@ -179,26 +184,31 @@ int ds_selftest_bts(void) */ top = trace->ds.top; - trace = ds_read_bts(tracer); - error = ds_selftest_bts_consistency(trace); - if (error < 0) - goto out; + trace = ds_read_bts(conf->tracer); + conf->error = ds_selftest_bts_consistency(trace); + if (conf->error < 0) + return; if (top != trace->ds.top) { printk(KERN_CONT "suspend not working..."); - error = -1; - goto out; + conf->error = -1; + return; } /* Let's collect some more trace - see if resume is working. */ - ds_resume_bts(tracer); - ds_suspend_bts(tracer); + conf->error = conf->resume(conf->tracer); + if (conf->error < 0) + return; + + conf->error = conf->suspend(conf->tracer); + if (conf->error < 0) + return; - trace = ds_read_bts(tracer); + trace = ds_read_bts(conf->tracer); - error = ds_selftest_bts_consistency(trace); - if (error < 0) - goto out; + conf->error = ds_selftest_bts_consistency(trace); + if (conf->error < 0) + return; if (trace->ds.top == top) { /* @@ -210,35 +220,113 @@ int ds_selftest_bts(void) printk(KERN_CONT "no resume progress/overflow..."); - error = ds_selftest_bts_read(tracer, trace, + conf->error = + ds_selftest_bts_read(conf->tracer, trace, trace->ds.begin, trace->ds.end); } else if (trace->ds.top < top) { /* * We had a buffer overflow - the entire buffer should * contain trace records. */ - error = ds_selftest_bts_read(tracer, trace, + conf->error = + ds_selftest_bts_read(conf->tracer, trace, trace->ds.begin, trace->ds.end); } else { /* * It is quite likely that the buffer did not overflow. * Let's just check the delta trace. */ - error = ds_selftest_bts_read(tracer, trace, - top, trace->ds.top); + conf->error = + ds_selftest_bts_read(conf->tracer, trace, top, + trace->ds.top); } - if (error < 0) - goto out; + if (conf->error < 0) + return; - error = 0; + conf->error = 0; +} - /* The final test: release the tracer while tracing is suspended. */ - out: - ds_release_bts(tracer); +static int ds_suspend_bts_wrap(struct bts_tracer *tracer) +{ + ds_suspend_bts(tracer); + return 0; +} + +static int ds_resume_bts_wrap(struct bts_tracer *tracer) +{ + ds_resume_bts(tracer); + return 0; +} - printk(KERN_CONT "%s.\n", (error ? "failed" : "passed")); +static void ds_release_bts_noirq_wrap(void *tracer) +{ + (void)ds_release_bts_noirq(tracer); +} - return error; +static int ds_selftest_bts_bad_release_noirq(int cpu, + struct bts_tracer *tracer) +{ + int error = -EPERM; + + /* Try to release the tracer on the wrong cpu. */ + get_cpu(); + if (cpu != smp_processor_id()) { + error = ds_release_bts_noirq(tracer); + if (error != -EPERM) + printk(KERN_CONT "release on wrong cpu..."); + } + put_cpu(); + + return error ? 0 : -1; +} + +int ds_selftest_bts(void) +{ + struct ds_selftest_bts_conf conf; + unsigned char buffer[BUFFER_SIZE]; + int cpu; + + printk(KERN_INFO "[ds] bts selftest..."); + conf.error = 0; + + get_online_cpus(); + for_each_online_cpu(cpu) { + conf.suspend = ds_suspend_bts_wrap; + conf.resume = ds_resume_bts_wrap; + conf.tracer = + ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, + NULL, (size_t)-1, BTS_KERNEL); + ds_selftest_bts_cpu(&conf); + ds_release_bts(conf.tracer); + if (conf.error < 0) + goto out; + + conf.suspend = ds_suspend_bts_noirq; + conf.resume = ds_resume_bts_noirq; + conf.tracer = + ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, + NULL, (size_t)-1, BTS_KERNEL); + smp_call_function_single(cpu, ds_selftest_bts_cpu, &conf, 1); + if (conf.error >= 0) { + conf.error = + ds_selftest_bts_bad_release_noirq(cpu, + conf.tracer); + /* We must not release the tracer twice. */ + if (conf.error < 0) + conf.tracer = NULL; + } + smp_call_function_single(cpu, ds_release_bts_noirq_wrap, + conf.tracer, 1); + if (conf.error < 0) + goto out; + } + + conf.error = 0; + out: + put_online_cpus(); + printk(KERN_CONT "%s.\n", (conf.error ? "failed" : "passed")); + + return conf.error; } int ds_selftest_pebs(void) -- cgit v0.10.2 From 3a68eef945b234f286406d96dc690fe17863c203 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 3 Apr 2009 16:43:45 +0200 Subject: x86, ds: add task tracing selftest Add selftests to cover per-task branch tracing. Signed-off-by: Markus Metzger Cc: roland@redhat.com Cc: eranian@googlemail.com Cc: oleg@redhat.com Cc: juan.villacis@intel.com Cc: ak@linux.jf.intel.com LKML-Reference: <20090403144600.329346000@intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/ds_selftest.c b/arch/x86/kernel/ds_selftest.c index 599a963..a40b253 100644 --- a/arch/x86/kernel/ds_selftest.c +++ b/arch/x86/kernel/ds_selftest.c @@ -280,10 +280,51 @@ static int ds_selftest_bts_bad_release_noirq(int cpu, return error ? 0 : -1; } +static int ds_selftest_bts_bad_request_cpu(int cpu, void *buffer) +{ + struct bts_tracer *tracer; + int error; + + /* Try to request cpu tracing while task tracing is active. */ + tracer = ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, NULL, + (size_t)-1, BTS_KERNEL); + error = PTR_ERR(tracer); + if (!IS_ERR(tracer)) { + ds_release_bts(tracer); + error = 0; + } + + if (error != -EPERM) + printk(KERN_CONT "cpu/task tracing overlap..."); + + return error ? 0 : -1; +} + +static int ds_selftest_bts_bad_request_task(void *buffer) +{ + struct bts_tracer *tracer; + int error; + + /* Try to request cpu tracing while task tracing is active. */ + tracer = ds_request_bts_task(current, buffer, BUFFER_SIZE, NULL, + (size_t)-1, BTS_KERNEL); + error = PTR_ERR(tracer); + if (!IS_ERR(tracer)) { + error = 0; + ds_release_bts(tracer); + } + + if (error != -EPERM) + printk(KERN_CONT "task/cpu tracing overlap..."); + + return error ? 0 : -1; +} + int ds_selftest_bts(void) { struct ds_selftest_bts_conf conf; unsigned char buffer[BUFFER_SIZE]; + unsigned long irq; int cpu; printk(KERN_INFO "[ds] bts selftest..."); @@ -297,6 +338,8 @@ int ds_selftest_bts(void) ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, NULL, (size_t)-1, BTS_KERNEL); ds_selftest_bts_cpu(&conf); + if (conf.error >= 0) + conf.error = ds_selftest_bts_bad_request_task(buffer); ds_release_bts(conf.tracer); if (conf.error < 0) goto out; @@ -315,12 +358,40 @@ int ds_selftest_bts(void) if (conf.error < 0) conf.tracer = NULL; } + if (conf.error >= 0) + conf.error = ds_selftest_bts_bad_request_task(buffer); smp_call_function_single(cpu, ds_release_bts_noirq_wrap, conf.tracer, 1); if (conf.error < 0) goto out; } + conf.suspend = ds_suspend_bts_wrap; + conf.resume = ds_resume_bts_wrap; + conf.tracer = + ds_request_bts_task(current, buffer, BUFFER_SIZE, + NULL, (size_t)-1, BTS_KERNEL); + ds_selftest_bts_cpu(&conf); + if (conf.error >= 0) + conf.error = ds_selftest_bts_bad_request_cpu(0, buffer); + ds_release_bts(conf.tracer); + if (conf.error < 0) + goto out; + + conf.suspend = ds_suspend_bts_noirq; + conf.resume = ds_resume_bts_noirq; + conf.tracer = + ds_request_bts_task(current, buffer, BUFFER_SIZE, + NULL, (size_t)-1, BTS_KERNEL); + local_irq_save(irq); + ds_selftest_bts_cpu(&conf); + if (conf.error >= 0) + conf.error = ds_selftest_bts_bad_request_cpu(0, buffer); + ds_release_bts_noirq(conf.tracer); + local_irq_restore(irq); + if (conf.error < 0) + goto out; + conf.error = 0; out: put_online_cpus(); -- cgit v0.10.2 From 2311f0de21c17b2a8b960677a9cccfbfa52beb35 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 3 Apr 2009 16:43:46 +0200 Subject: x86, ds: add leakage warning Add a warning in case a debug store context is not removed before the task it is attached to is freed. Remove the old warning at thread exit. It is too early. Declare the debug store context field in thread_struct unconditionally. Remove ds_copy_thread() and ds_exit_thread() and do the work directly in process*.c. Signed-off-by: Markus Metzger Cc: roland@redhat.com Cc: eranian@googlemail.com Cc: oleg@redhat.com Cc: juan.villacis@intel.com Cc: ak@linux.jf.intel.com LKML-Reference: <20090403144601.254472000@intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h index 413e127..149e520 100644 --- a/arch/x86/include/asm/ds.h +++ b/arch/x86/include/asm/ds.h @@ -285,21 +285,12 @@ extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *); */ extern void ds_switch_to(struct task_struct *prev, struct task_struct *next); -/* - * Task clone/init and cleanup work - */ -extern void ds_copy_thread(struct task_struct *tsk, struct task_struct *father); -extern void ds_exit_thread(struct task_struct *tsk); - #else /* CONFIG_X86_DS */ struct cpuinfo_x86; static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {} static inline void ds_switch_to(struct task_struct *prev, struct task_struct *next) {} -static inline void ds_copy_thread(struct task_struct *tsk, - struct task_struct *father) {} -static inline void ds_exit_thread(struct task_struct *tsk) {} #endif /* CONFIG_X86_DS */ #endif /* _ASM_X86_DS_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 1efeb49..7c39de7 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -454,10 +454,8 @@ struct thread_struct { unsigned io_bitmap_max; /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ unsigned long debugctlmsr; -#ifdef CONFIG_X86_DS -/* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */ + /* Debug Store context; see asm/ds.h */ struct ds_context *ds_ctx; -#endif /* CONFIG_X86_DS */ }; static inline unsigned long native_get_debugreg(int regno) diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 21a3852..71cab3b 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -1352,16 +1352,6 @@ void ds_switch_to(struct task_struct *prev, struct task_struct *next) update_debugctlmsr(debugctlmsr); } -void ds_copy_thread(struct task_struct *tsk, struct task_struct *father) -{ - clear_tsk_thread_flag(tsk, TIF_DS_AREA_MSR); - tsk->thread.ds_ctx = NULL; -} - -void ds_exit_thread(struct task_struct *tsk) -{ -} - static __init int ds_selftest(void) { if (ds_cfg.sizeof_rec[ds_bts]) { diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ca98915..fb5dfb8 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -14,6 +14,7 @@ #include #include #include +#include unsigned long idle_halt; EXPORT_SYMBOL(idle_halt); @@ -45,6 +46,8 @@ void free_thread_xstate(struct task_struct *tsk) kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); tsk->thread.xstate = NULL; } + + WARN(tsk->thread.ds_ctx, "leaking DS context\n"); } void free_thread_info(struct thread_info *ti) @@ -83,8 +86,6 @@ void exit_thread(void) put_cpu(); kfree(bp); } - - ds_exit_thread(current); } void flush_thread(void) diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 76f8f84..b5e4bfe 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -290,7 +290,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, p->thread.io_bitmap_max = 0; } - ds_copy_thread(p, current); + clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); + p->thread.ds_ctx = NULL; clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); p->thread.debugctlmsr = 0; diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index b751a41..5a1a1de 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -335,7 +335,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, goto out; } - ds_copy_thread(p, me); + clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); + p->thread.ds_ctx = NULL; clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); p->thread.debugctlmsr = 0; -- cgit v0.10.2 From ee811517a5604aa63fae803b7c044712699e1303 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 3 Apr 2009 16:43:47 +0200 Subject: x86, ds: use single debug store cpu configuration Use a single configuration for all cpus. Reported-by: Ingo Molnar Signed-off-by: Markus Metzger Cc: roland@redhat.com Cc: eranian@googlemail.com Cc: oleg@redhat.com Cc: juan.villacis@intel.com Cc: ak@linux.jf.intel.com LKML-Reference: <20090403144602.191165000@intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 71cab3b..443f415 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -47,9 +47,8 @@ struct ds_configuration { /* Control bit-masks indexed by enum ds_feature: */ unsigned long ctl[dsf_ctl_max]; }; -static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array); +static struct ds_configuration ds_cfg __read_mostly; -#define ds_cfg per_cpu(ds_cfg_array, smp_processor_id()) /* Maximal size of a DS configuration: */ #define MAX_SIZEOF_DS (12 * 8) @@ -1268,6 +1267,10 @@ ds_configure(const struct ds_configuration *cfg, void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) { + /* Only configure the first cpu. Others are identical. */ + if (ds_cfg.name) + return; + switch (c->x86) { case 0x6: switch (c->x86_model) { -- cgit v0.10.2 From 0f4814065ff8c24ca8bfd75c9b73502be152c287 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 3 Apr 2009 16:43:48 +0200 Subject: x86, ptrace: add bts context unconditionally Add the ptrace bts context field to task_struct unconditionally. Initialize the field directly in copy_process(). Remove all the unneeded functionality used to initialize that field. Signed-off-by: Markus Metzger Cc: roland@redhat.com Cc: eranian@googlemail.com Cc: oleg@redhat.com Cc: juan.villacis@intel.com Cc: ak@linux.jf.intel.com LKML-Reference: <20090403144603.292754000@intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index e304b66..5cdd19f 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -235,12 +235,11 @@ extern int do_get_thread_area(struct task_struct *p, int idx, extern int do_set_thread_area(struct task_struct *p, int idx, struct user_desc __user *info, int can_allocate); -extern void x86_ptrace_untrace(struct task_struct *); -extern void x86_ptrace_fork(struct task_struct *child, - unsigned long clone_flags); +#ifdef CONFIG_X86_PTRACE_BTS +extern void ptrace_bts_untrace(struct task_struct *tsk); -#define arch_ptrace_untrace(tsk) x86_ptrace_untrace(tsk) -#define arch_ptrace_fork(child, flags) x86_ptrace_fork(child, flags) +#define arch_ptrace_untrace(tsk) ptrace_bts_untrace(tsk) +#endif /* CONFIG_X86_PTRACE_BTS */ #endif /* __KERNEL__ */ diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index adbb243..b32a8ee 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -887,37 +887,19 @@ static int ptrace_bts_size(struct task_struct *child) return (trace->ds.top - trace->ds.begin) / trace->ds.size; } -static inline void ptrace_bts_fork(struct task_struct *tsk) -{ - tsk->bts = NULL; -} - /* * Called from __ptrace_unlink() after the child has been moved back * to its original parent. */ -static inline void ptrace_bts_untrace(struct task_struct *child) +void ptrace_bts_untrace(struct task_struct *child) { if (unlikely(child->bts)) { free_bts_context(child->bts); child->bts = NULL; } } -#else -static inline void ptrace_bts_fork(struct task_struct *tsk) {} -static inline void ptrace_bts_untrace(struct task_struct *child) {} #endif /* CONFIG_X86_PTRACE_BTS */ -void x86_ptrace_fork(struct task_struct *child, unsigned long clone_flags) -{ - ptrace_bts_fork(child); -} - -void x86_ptrace_untrace(struct task_struct *child) -{ - ptrace_bts_untrace(child); -} - /* * Called by kernel/ptrace.c when detaching.. * diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 67c1565..59e133d 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -95,7 +95,6 @@ extern void __ptrace_link(struct task_struct *child, struct task_struct *new_parent); extern void __ptrace_unlink(struct task_struct *child); extern void exit_ptrace(struct task_struct *tracer); -extern void ptrace_fork(struct task_struct *task, unsigned long clone_flags); #define PTRACE_MODE_READ 1 #define PTRACE_MODE_ATTACH 2 /* Returns 0 on success, -errno on denial. */ @@ -327,15 +326,6 @@ static inline void user_enable_block_step(struct task_struct *task) #define arch_ptrace_untrace(task) do { } while (0) #endif -#ifndef arch_ptrace_fork -/* - * Do machine-specific work to initialize a new task. - * - * This is called from copy_process(). - */ -#define arch_ptrace_fork(child, clone_flags) do { } while (0) -#endif - extern int task_current_syscall(struct task_struct *target, long *callno, unsigned long args[6], unsigned int maxargs, unsigned long *sp, unsigned long *pc); diff --git a/include/linux/sched.h b/include/linux/sched.h index 52b8cd0..451186a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1205,13 +1205,11 @@ struct task_struct { struct list_head ptraced; struct list_head ptrace_entry; -#ifdef CONFIG_X86_PTRACE_BTS /* * This is the tracer handle for the ptrace BTS extension. * This field actually belongs to the ptracer task. */ struct bts_context *bts; -#endif /* CONFIG_X86_PTRACE_BTS */ /* PID/PID hash table linkage. */ struct pid_link pids[PIDTYPE_MAX]; diff --git a/kernel/fork.c b/kernel/fork.c index 660c2b8..69bde7a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1086,8 +1086,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, #ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; /* not blocked yet */ #endif - if (unlikely(current->ptrace)) - ptrace_fork(p, clone_flags); + + p->bts = NULL; /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p, clone_flags); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index aaad0ec..321127d 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -27,16 +27,6 @@ /* - * Initialize a new task whose father had been ptraced. - * - * Called from copy_process(). - */ -void ptrace_fork(struct task_struct *child, unsigned long clone_flags) -{ - arch_ptrace_fork(child, clone_flags); -} - -/* * ptrace a task: make the debugger its new parent and * move it to the ptrace list. * -- cgit v0.10.2 From 6047550d3d26fed88b18a208b31f8b90b5ef3e9b Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 3 Apr 2009 16:43:49 +0200 Subject: x86, ds: dont use TIF_DEBUGCTLMSR Debug store already uses TIF_DS_AREA_MSR to trigger debug store context switch handling. No need to use TIF_DEBUGCTLMSR, as well. Signed-off-by: Markus Metzger Cc: roland@redhat.com Cc: eranian@googlemail.com Cc: oleg@redhat.com Cc: juan.villacis@intel.com Cc: ak@linux.jf.intel.com LKML-Reference: <20090403144604.256645000@intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 443f415..cab2832 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -925,11 +925,6 @@ static void update_task_debugctlmsr(struct task_struct *task, get_cpu(); if (task == current) update_debugctlmsr(debugctlmsr); - - if (task->thread.debugctlmsr) - set_tsk_thread_flag(task, TIF_DEBUGCTLMSR); - else - clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR); put_cpu(); } -- cgit v0.10.2 From 608780a9048efa3e85fbc4d8649b26805cc588aa Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 3 Apr 2009 16:43:50 +0200 Subject: x86, ds: fix bad ds_reset_pebs() Ds_reset_pebs() passed the wrong qualifier to a shared function resulting in a reset of bts, rather than pebs. Reported-by: Stephane Eranian Signed-off-by: Markus Metzger Cc: roland@redhat.com Cc: eranian@googlemail.com Cc: oleg@redhat.com Cc: juan.villacis@intel.com Cc: ak@linux.jf.intel.com LKML-Reference: <20090403144605.206510000@intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index cab2832..ebfb0fd 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -1186,7 +1186,7 @@ int ds_reset_pebs(struct pebs_tracer *tracer) tracer->trace.ds.top = tracer->trace.ds.begin; - ds_set(tracer->ds.context->ds, ds_bts, ds_index, + ds_set(tracer->ds.context->ds, ds_pebs, ds_index, (unsigned long)tracer->trace.ds.top); return 0; -- cgit v0.10.2 From 150f5164c1258e05b7dea16f29e592f354c48f34 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 3 Apr 2009 16:43:51 +0200 Subject: x86, ds: allow small debug store buffers Check the buffer size more precisely to allow buffers for exactly one element provided the base address is already properly aligned. Add a debug store selftest. Reported-by: Stephane Eranian Signed-off-by: Markus Metzger Cc: roland@redhat.com Cc: eranian@googlemail.com Cc: oleg@redhat.com Cc: juan.villacis@intel.com Cc: ak@linux.jf.intel.com LKML-Reference: <20090403144606.139137000@intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index ebfb0fd..4e05157 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -656,6 +656,7 @@ static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, { struct ds_context *context; int error; + size_t req_size; error = -EOPNOTSUPP; if (!ds_cfg.sizeof_rec[qual]) @@ -665,9 +666,13 @@ static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, if (!base) goto out; - /* We need space for alignment adjustments in ds_init_ds_trace(). */ + req_size = ds_cfg.sizeof_rec[qual]; + /* We might need space for alignment adjustments. */ + if (!IS_ALIGNED((unsigned long)base, DS_ALIGNMENT)) + req_size += DS_ALIGNMENT; + error = -EINVAL; - if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual])) + if (size < req_size) goto out; if (th != (size_t)-1) { diff --git a/arch/x86/kernel/ds_selftest.c b/arch/x86/kernel/ds_selftest.c index a40b253..5f104a0 100644 --- a/arch/x86/kernel/ds_selftest.c +++ b/arch/x86/kernel/ds_selftest.c @@ -16,8 +16,8 @@ #include -#define BUFFER_SIZE 521 /* Intentionally chose an odd size. */ - +#define BUFFER_SIZE 521 /* Intentionally chose an odd size. */ +#define SMALL_BUFFER_SIZE 24 /* A single bts entry. */ struct ds_selftest_bts_conf { struct bts_tracer *tracer; @@ -381,7 +381,7 @@ int ds_selftest_bts(void) conf.suspend = ds_suspend_bts_noirq; conf.resume = ds_resume_bts_noirq; conf.tracer = - ds_request_bts_task(current, buffer, BUFFER_SIZE, + ds_request_bts_task(current, buffer, SMALL_BUFFER_SIZE, NULL, (size_t)-1, BTS_KERNEL); local_irq_save(irq); ds_selftest_bts_cpu(&conf); -- cgit v0.10.2 From 017bc617657c928cb9a0c45a7a7e9f4e66695347 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 3 Apr 2009 16:43:52 +0200 Subject: x86, ds: support Core i7 Add debug store support for Core i7. Core i7 adds a reset value for each performance counter and a new PEBS record format. Signed-off-by: Markus Metzger Cc: roland@redhat.com Cc: eranian@googlemail.com Cc: oleg@redhat.com Cc: juan.villacis@intel.com Cc: ak@linux.jf.intel.com LKML-Reference: <20090403144607.088997000@intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h index 149e520..70dac19 100644 --- a/arch/x86/include/asm/ds.h +++ b/arch/x86/include/asm/ds.h @@ -234,8 +234,12 @@ struct bts_trace { struct pebs_trace { struct ds_trace ds; - /* the PEBS reset value */ - unsigned long long reset_value; + /* the number of valid counters in the below array */ + unsigned int counters; + +#define MAX_PEBS_COUNTERS 4 + /* the counter reset value */ + unsigned long long counter_reset[MAX_PEBS_COUNTERS]; }; @@ -270,9 +274,11 @@ extern int ds_reset_pebs(struct pebs_tracer *tracer); * Returns 0 on success; -Eerrno on error * * tracer: the tracer handle returned from ds_request_pebs() + * counter: the index of the counter * value: the new counter reset value */ -extern int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value); +extern int ds_set_pebs_reset(struct pebs_tracer *tracer, + unsigned int counter, u64 value); /* * Initialization diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 4e05157..48bfe13 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -44,6 +44,9 @@ struct ds_configuration { /* The size of a BTS/PEBS record in bytes: */ unsigned char sizeof_rec[2]; + /* The number of pebs counter reset values in the DS structure. */ + unsigned char nr_counter_reset; + /* Control bit-masks indexed by enum ds_feature: */ unsigned long ctl[dsf_ctl_max]; }; @@ -51,7 +54,7 @@ static struct ds_configuration ds_cfg __read_mostly; /* Maximal size of a DS configuration: */ -#define MAX_SIZEOF_DS (12 * 8) +#define MAX_SIZEOF_DS 0x80 /* Maximal size of a BTS record: */ #define MAX_SIZEOF_BTS (3 * 8) @@ -59,6 +62,12 @@ static struct ds_configuration ds_cfg __read_mostly; /* BTS and PEBS buffer alignment: */ #define DS_ALIGNMENT (1 << 3) +/* Number of buffer pointers in DS: */ +#define NUM_DS_PTR_FIELDS 8 + +/* Size of a pebs reset value in DS: */ +#define PEBS_RESET_FIELD_SIZE 8 + /* Mask of control bits in the DS MSR register: */ #define BTS_CONTROL \ ( ds_cfg.ctl[dsf_bts] | \ @@ -1164,9 +1173,12 @@ const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer) return NULL; ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); - tracer->trace.reset_value = - *(u64 *)(tracer->ds.context->ds + - (ds_cfg.sizeof_ptr_field * 8)); + + tracer->trace.counters = ds_cfg.nr_counter_reset; + memcpy(tracer->trace.counter_reset, + tracer->ds.context->ds + + (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field), + ds_cfg.nr_counter_reset * PEBS_RESET_FIELD_SIZE); return &tracer->trace; } @@ -1197,13 +1209,18 @@ int ds_reset_pebs(struct pebs_tracer *tracer) return 0; } -int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value) +int ds_set_pebs_reset(struct pebs_tracer *tracer, + unsigned int counter, u64 value) { if (!tracer) return -EINVAL; + if (ds_cfg.nr_counter_reset < counter) + return -EINVAL; + *(u64 *)(tracer->ds.context->ds + - (ds_cfg.sizeof_ptr_field * 8)) = value; + (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field) + + (counter * PEBS_RESET_FIELD_SIZE)) = value; return 0; } @@ -1213,16 +1230,26 @@ static const struct ds_configuration ds_cfg_netburst = { .ctl[dsf_bts] = (1 << 2) | (1 << 3), .ctl[dsf_bts_kernel] = (1 << 5), .ctl[dsf_bts_user] = (1 << 6), + .nr_counter_reset = 1, }; static const struct ds_configuration ds_cfg_pentium_m = { .name = "Pentium M", .ctl[dsf_bts] = (1 << 6) | (1 << 7), + .nr_counter_reset = 1, }; static const struct ds_configuration ds_cfg_core2_atom = { .name = "Core 2/Atom", .ctl[dsf_bts] = (1 << 6) | (1 << 7), .ctl[dsf_bts_kernel] = (1 << 9), .ctl[dsf_bts_user] = (1 << 10), + .nr_counter_reset = 1, +}; +static const struct ds_configuration ds_cfg_core_i7 = { + .name = "Core i7", + .ctl[dsf_bts] = (1 << 6) | (1 << 7), + .ctl[dsf_bts_kernel] = (1 << 9), + .ctl[dsf_bts_user] = (1 << 10), + .nr_counter_reset = 4, }; static void @@ -1239,6 +1266,32 @@ ds_configure(const struct ds_configuration *cfg, nr_pebs_fields = 18; #endif + /* + * Starting with version 2, architectural performance + * monitoring supports a format specifier. + */ + if ((cpuid_eax(0xa) & 0xff) > 1) { + unsigned long perf_capabilities, format; + + rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_capabilities); + + format = (perf_capabilities >> 8) & 0xf; + + switch (format) { + case 0: + nr_pebs_fields = 18; + break; + case 1: + nr_pebs_fields = 22; + break; + default: + printk(KERN_INFO + "[ds] unknown PEBS format: %lu\n", format); + nr_pebs_fields = 0; + break; + } + } + memset(&ds_cfg, 0, sizeof(ds_cfg)); ds_cfg = *cfg; @@ -1262,7 +1315,7 @@ ds_configure(const struct ds_configuration *cfg, printk("bts/pebs record: %u/%u bytes\n", ds_cfg.sizeof_rec[ds_bts], ds_cfg.sizeof_rec[ds_pebs]); - WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_ptr_field)); + WARN_ON_ONCE(MAX_PEBS_COUNTERS < ds_cfg.nr_counter_reset); } void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) @@ -1284,6 +1337,8 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) ds_configure(&ds_cfg_core2_atom, c); break; case 0x1a: /* Core i7 */ + ds_configure(&ds_cfg_core_i7, c); + break; default: /* Sorry, don't know about them. */ break; -- cgit v0.10.2 From a5dec5573f3c7e63f2f9b5852b9759ea342a5ff9 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 27 Mar 2009 14:55:44 +0800 Subject: tracing: use macros to denote usec and nsec per second Impact: cleanup Use USEC_PER_SEC and NSEC_PER_SEC instead of 1000000 and 1000000000. Signed-off-by: Li Zefan LKML-Reference: <49CC7870.9000309@cn.fujitsu.com> Acked-by: Frederic Weisbecker Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index 7a30fc4..a29ef23 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "trace.h" #include "trace_output.h" @@ -67,7 +68,7 @@ initcall_call_print_line(struct trace_iterator *iter) trace_assign_type(field, entry); call = &field->boot_call; ts = iter->ts; - nsec_rem = do_div(ts, 1000000000); + nsec_rem = do_div(ts, NSEC_PER_SEC); ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n", (unsigned long)ts, nsec_rem, call->func, call->caller); @@ -92,7 +93,7 @@ initcall_ret_print_line(struct trace_iterator *iter) trace_assign_type(field, entry); init_ret = &field->boot_ret; ts = iter->ts; - nsec_rem = do_div(ts, 1000000000); + nsec_rem = do_div(ts, NSEC_PER_SEC); ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s " "returned %d after %llu msecs\n", diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 8e37fcd..d53b45e 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -9,6 +9,8 @@ #include #include #include +#include + #include #include "trace.h" @@ -174,7 +176,7 @@ static enum print_line_t mmio_print_rw(struct trace_iterator *iter) struct mmiotrace_rw *rw; struct trace_seq *s = &iter->seq; unsigned long long t = ns2usecs(iter->ts); - unsigned long usec_rem = do_div(t, 1000000ULL); + unsigned long usec_rem = do_div(t, USEC_PER_SEC); unsigned secs = (unsigned long)t; int ret = 1; @@ -221,7 +223,7 @@ static enum print_line_t mmio_print_map(struct trace_iterator *iter) struct mmiotrace_map *m; struct trace_seq *s = &iter->seq; unsigned long long t = ns2usecs(iter->ts); - unsigned long usec_rem = do_div(t, 1000000ULL); + unsigned long usec_rem = do_div(t, USEC_PER_SEC); unsigned secs = (unsigned long)t; int ret; -- cgit v0.10.2 From 5452af664f6fba26b80eb2c8c4ceae2999d5cf56 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Mar 2009 00:25:38 +0100 Subject: tracing/ftrace: factorize the tracing files creation Impact: cleanup Most of the tracing files creation follow the same pattern: ret = debugfs_create_file(...) if (!ret) pr_warning("Couldn't create ... entry\n") Unify it! Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker LKML-Reference: <1238109938-11840-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Steven Rostedt diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 678e3d6..6ea5a1a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2698,38 +2698,23 @@ static const struct file_operations ftrace_graph_fops = { static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { - struct dentry *entry; - entry = debugfs_create_file("available_filter_functions", 0444, - d_tracer, NULL, &ftrace_avail_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'available_filter_functions' entry\n"); + trace_create_file("available_filter_functions", 0444, + d_tracer, NULL, &ftrace_avail_fops); - entry = debugfs_create_file("failures", 0444, - d_tracer, NULL, &ftrace_failures_fops); - if (!entry) - pr_warning("Could not create debugfs 'failures' entry\n"); + trace_create_file("failures", 0444, + d_tracer, NULL, &ftrace_failures_fops); - entry = debugfs_create_file("set_ftrace_filter", 0644, d_tracer, - NULL, &ftrace_filter_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'set_ftrace_filter' entry\n"); + trace_create_file("set_ftrace_filter", 0644, d_tracer, + NULL, &ftrace_filter_fops); - entry = debugfs_create_file("set_ftrace_notrace", 0644, d_tracer, + trace_create_file("set_ftrace_notrace", 0644, d_tracer, NULL, &ftrace_notrace_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'set_ftrace_notrace' entry\n"); #ifdef CONFIG_FUNCTION_GRAPH_TRACER - entry = debugfs_create_file("set_graph_function", 0444, d_tracer, + trace_create_file("set_graph_function", 0444, d_tracer, NULL, &ftrace_graph_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'set_graph_function' entry\n"); #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ return 0; @@ -2987,7 +2972,6 @@ static const struct file_operations ftrace_pid_fops = { static __init int ftrace_init_debugfs(void) { struct dentry *d_tracer; - struct dentry *entry; d_tracer = tracing_init_dentry(); if (!d_tracer) @@ -2995,11 +2979,8 @@ static __init int ftrace_init_debugfs(void) ftrace_init_dyn_debugfs(d_tracer); - entry = debugfs_create_file("set_ftrace_pid", 0644, d_tracer, - NULL, &ftrace_pid_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'set_ftrace_pid' entry\n"); + trace_create_file("set_ftrace_pid", 0644, d_tracer, + NULL, &ftrace_pid_fops); ftrace_profile_debugfs(d_tracer); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 960cbf4..74a1180 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2845,14 +2845,11 @@ static const struct file_operations rb_simple_fops = { static __init int rb_init_debugfs(void) { struct dentry *d_tracer; - struct dentry *entry; d_tracer = tracing_init_dentry(); - entry = debugfs_create_file("tracing_on", 0644, d_tracer, - &ring_buffer_flags, &rb_simple_fops); - if (!entry) - pr_warning("Could not create debugfs 'tracing_on' entry\n"); + trace_create_file("tracing_on", 0644, d_tracer, + &ring_buffer_flags, &rb_simple_fops); return 0; } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 32653c8..0615751 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3581,7 +3581,7 @@ struct dentry *tracing_dentry_percpu(void) static void tracing_init_debugfs_percpu(long cpu) { struct dentry *d_percpu = tracing_dentry_percpu(); - struct dentry *entry, *d_cpu; + struct dentry *d_cpu; /* strlen(cpu) + MAX(log10(cpu)) + '\0' */ char cpu_dir[7]; @@ -3596,21 +3596,15 @@ static void tracing_init_debugfs_percpu(long cpu) } /* per cpu trace_pipe */ - entry = debugfs_create_file("trace_pipe", 0444, d_cpu, - (void *) cpu, &tracing_pipe_fops); - if (!entry) - pr_warning("Could not create debugfs 'trace_pipe' entry\n"); + trace_create_file("trace_pipe", 0444, d_cpu, + (void *) cpu, &tracing_pipe_fops); /* per cpu trace */ - entry = debugfs_create_file("trace", 0644, d_cpu, - (void *) cpu, &tracing_fops); - if (!entry) - pr_warning("Could not create debugfs 'trace' entry\n"); + trace_create_file("trace", 0644, d_cpu, + (void *) cpu, &tracing_fops); - entry = debugfs_create_file("trace_pipe_raw", 0444, d_cpu, - (void *) cpu, &tracing_buffers_fops); - if (!entry) - pr_warning("Could not create debugfs 'trace_pipe_raw' entry\n"); + trace_create_file("trace_pipe_raw", 0444, d_cpu, + (void *) cpu, &tracing_buffers_fops); } #ifdef CONFIG_FTRACE_SELFTEST @@ -3766,6 +3760,22 @@ static const struct file_operations trace_options_core_fops = { .write = trace_options_core_write, }; +struct dentry *trace_create_file(const char *name, + mode_t mode, + struct dentry *parent, + void *data, + const struct file_operations *fops) +{ + struct dentry *ret; + + ret = debugfs_create_file(name, mode, parent, data, fops); + if (!ret) + pr_warning("Could not create debugfs '%s' entry\n", name); + + return ret; +} + + static struct dentry *trace_options_init_dentry(void) { struct dentry *d_tracer; @@ -3793,7 +3803,6 @@ create_trace_option_file(struct trace_option_dentry *topt, struct tracer_opt *opt) { struct dentry *t_options; - struct dentry *entry; t_options = trace_options_init_dentry(); if (!t_options) @@ -3802,11 +3811,9 @@ create_trace_option_file(struct trace_option_dentry *topt, topt->flags = flags; topt->opt = opt; - entry = debugfs_create_file(opt->name, 0644, t_options, topt, + topt->entry = trace_create_file(opt->name, 0644, t_options, topt, &trace_options_fops); - topt->entry = entry; - } static struct trace_option_dentry * @@ -3861,123 +3868,81 @@ static struct dentry * create_trace_option_core_file(const char *option, long index) { struct dentry *t_options; - struct dentry *entry; t_options = trace_options_init_dentry(); if (!t_options) return NULL; - entry = debugfs_create_file(option, 0644, t_options, (void *)index, + return trace_create_file(option, 0644, t_options, (void *)index, &trace_options_core_fops); - - return entry; } static __init void create_trace_options_dir(void) { struct dentry *t_options; - struct dentry *entry; int i; t_options = trace_options_init_dentry(); if (!t_options) return; - for (i = 0; trace_options[i]; i++) { - entry = create_trace_option_core_file(trace_options[i], i); - if (!entry) - pr_warning("Could not create debugfs %s entry\n", - trace_options[i]); - } + for (i = 0; trace_options[i]; i++) + create_trace_option_core_file(trace_options[i], i); } static __init int tracer_init_debugfs(void) { struct dentry *d_tracer; - struct dentry *entry; int cpu; d_tracer = tracing_init_dentry(); - entry = debugfs_create_file("tracing_enabled", 0644, d_tracer, - &global_trace, &tracing_ctrl_fops); - if (!entry) - pr_warning("Could not create debugfs 'tracing_enabled' entry\n"); + trace_create_file("tracing_enabled", 0644, d_tracer, + &global_trace, &tracing_ctrl_fops); - entry = debugfs_create_file("trace_options", 0644, d_tracer, - NULL, &tracing_iter_fops); - if (!entry) - pr_warning("Could not create debugfs 'trace_options' entry\n"); + trace_create_file("trace_options", 0644, d_tracer, + NULL, &tracing_iter_fops); - create_trace_options_dir(); + trace_create_file("tracing_cpumask", 0644, d_tracer, + NULL, &tracing_cpumask_fops); + + trace_create_file("trace", 0644, d_tracer, + (void *) TRACE_PIPE_ALL_CPU, &tracing_fops); + + trace_create_file("available_tracers", 0444, d_tracer, + &global_trace, &show_traces_fops); + + trace_create_file("current_tracer", 0444, d_tracer, + &global_trace, &set_tracer_fops); + + trace_create_file("tracing_max_latency", 0644, d_tracer, + &tracing_max_latency, &tracing_max_lat_fops); + + trace_create_file("tracing_thresh", 0644, d_tracer, + &tracing_thresh, &tracing_max_lat_fops); - entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer, - NULL, &tracing_cpumask_fops); - if (!entry) - pr_warning("Could not create debugfs 'tracing_cpumask' entry\n"); - - entry = debugfs_create_file("trace", 0644, d_tracer, - (void *) TRACE_PIPE_ALL_CPU, &tracing_fops); - if (!entry) - pr_warning("Could not create debugfs 'trace' entry\n"); - - entry = debugfs_create_file("available_tracers", 0444, d_tracer, - &global_trace, &show_traces_fops); - if (!entry) - pr_warning("Could not create debugfs 'available_tracers' entry\n"); - - entry = debugfs_create_file("current_tracer", 0444, d_tracer, - &global_trace, &set_tracer_fops); - if (!entry) - pr_warning("Could not create debugfs 'current_tracer' entry\n"); - - entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer, - &tracing_max_latency, - &tracing_max_lat_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'tracing_max_latency' entry\n"); - - entry = debugfs_create_file("tracing_thresh", 0644, d_tracer, - &tracing_thresh, &tracing_max_lat_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'tracing_thresh' entry\n"); - entry = debugfs_create_file("README", 0644, d_tracer, - NULL, &tracing_readme_fops); - if (!entry) - pr_warning("Could not create debugfs 'README' entry\n"); - - entry = debugfs_create_file("trace_pipe", 0444, d_tracer, + trace_create_file("README", 0644, d_tracer, + NULL, &tracing_readme_fops); + + trace_create_file("trace_pipe", 0444, d_tracer, (void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'trace_pipe' entry\n"); - - entry = debugfs_create_file("buffer_size_kb", 0644, d_tracer, - &global_trace, &tracing_entries_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'buffer_size_kb' entry\n"); - - entry = debugfs_create_file("trace_marker", 0220, d_tracer, - NULL, &tracing_mark_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'trace_marker' entry\n"); + + trace_create_file("buffer_size_kb", 0644, d_tracer, + &global_trace, &tracing_entries_fops); + + trace_create_file("trace_marker", 0220, d_tracer, + NULL, &tracing_mark_fops); #ifdef CONFIG_DYNAMIC_FTRACE - entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer, - &ftrace_update_tot_cnt, - &tracing_dyn_info_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'dyn_ftrace_total_info' entry\n"); + trace_create_file("dyn_ftrace_total_info", 0444, d_tracer, + &ftrace_update_tot_cnt, &tracing_dyn_info_fops); #endif #ifdef CONFIG_SYSPROF_TRACER init_tracer_sysprof_debugfs(d_tracer); #endif + create_trace_options_dir(); + for_each_tracing_cpu(cpu) tracing_init_debugfs_percpu(cpu); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 47aa6d0..f76a8f8 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -470,6 +470,12 @@ void trace_wake_up(void); void tracing_reset(struct trace_array *tr, int cpu); void tracing_reset_online_cpus(struct trace_array *tr); int tracing_open_generic(struct inode *inode, struct file *filp); +struct dentry *trace_create_file(const char *name, + mode_t mode, + struct dentry *parent, + void *data, + const struct file_operations *fops); + struct dentry *tracing_init_dentry(void); void init_tracer_sysprof_debugfs(struct dentry *d_tracer); diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index 22cba99..199de9c 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c @@ -28,4 +28,3 @@ void ftrace_profile_disable(int event_id) return event->profile_disable(event); } } - diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index eb81556..9bece96 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c @@ -245,17 +245,13 @@ static const struct file_operations ftrace_formats_fops = { static __init int init_trace_printk_function_export(void) { struct dentry *d_tracer; - struct dentry *entry; d_tracer = tracing_init_dentry(); if (!d_tracer) return 0; - entry = debugfs_create_file("printk_formats", 0444, d_tracer, + trace_create_file("printk_formats", 0444, d_tracer, NULL, &ftrace_formats_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'printk_formats' entry\n"); return 0; } diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index c750f65..1796f00 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -352,19 +352,14 @@ __setup("stacktrace", enable_stacktrace); static __init int stack_trace_init(void) { struct dentry *d_tracer; - struct dentry *entry; d_tracer = tracing_init_dentry(); - entry = debugfs_create_file("stack_max_size", 0644, d_tracer, - &max_stack_size, &stack_max_size_fops); - if (!entry) - pr_warning("Could not create debugfs 'stack_max_size' entry\n"); + trace_create_file("stack_max_size", 0644, d_tracer, + &max_stack_size, &stack_max_size_fops); - entry = debugfs_create_file("stack_trace", 0444, d_tracer, - NULL, &stack_trace_fops); - if (!entry) - pr_warning("Could not create debugfs 'stack_trace' entry\n"); + trace_create_file("stack_trace", 0444, d_tracer, + NULL, &stack_trace_fops); if (stack_tracer_enabled) register_ftrace_function(&trace_ops); diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index 91fd19c..e04b76c 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c @@ -321,11 +321,7 @@ static const struct file_operations sysprof_sample_fops = { void init_tracer_sysprof_debugfs(struct dentry *d_tracer) { - struct dentry *entry; - entry = debugfs_create_file("sysprof_sample_period", 0644, + trace_create_file("sysprof_sample_period", 0644, d_tracer, NULL, &sysprof_sample_fops); - if (entry) - return; - pr_warning("Could not create debugfs 'sysprof_sample_period' entry\n"); } -- cgit v0.10.2 From 597af81537654097b67fd7a0c92775e66d4a86fe Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 3 Apr 2009 15:24:12 -0400 Subject: function-graph: use int instead of atomic for ftrace_graph_active Impact: cleanup The variable ftrace_graph_active is only modified under the ftrace_lock mutex, thus an atomic is not necessary for modification. Reported-by: Andrew Morton Signed-off-by: Steven Rostedt diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 6ea5a1a..8e6a0b5 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3092,7 +3092,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, #ifdef CONFIG_FUNCTION_GRAPH_TRACER -static atomic_t ftrace_graph_active; +static int ftrace_graph_active; static struct notifier_block ftrace_suspend_notifier; int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace) @@ -3244,7 +3244,7 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc, mutex_lock(&ftrace_lock); /* we currently allow only one tracer registered at a time */ - if (atomic_read(&ftrace_graph_active)) { + if (ftrace_graph_active) { ret = -EBUSY; goto out; } @@ -3252,10 +3252,10 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc, ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call; register_pm_notifier(&ftrace_suspend_notifier); - atomic_inc(&ftrace_graph_active); + ftrace_graph_active++; ret = start_graph_tracing(); if (ret) { - atomic_dec(&ftrace_graph_active); + ftrace_graph_active--; goto out; } @@ -3273,10 +3273,10 @@ void unregister_ftrace_graph(void) { mutex_lock(&ftrace_lock); - if (!unlikely(atomic_read(&ftrace_graph_active))) + if (unlikely(!ftrace_graph_active)) goto out; - atomic_dec(&ftrace_graph_active); + ftrace_graph_active--; unregister_trace_sched_switch(ftrace_graph_probe_sched_switch); ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; ftrace_graph_entry = ftrace_graph_entry_stub; @@ -3290,7 +3290,7 @@ void unregister_ftrace_graph(void) /* Allocate a return stack for newly created task */ void ftrace_graph_init_task(struct task_struct *t) { - if (atomic_read(&ftrace_graph_active)) { + if (ftrace_graph_active) { t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH * sizeof(struct ftrace_ret_stack), GFP_KERNEL); -- cgit v0.10.2 From dcef788eb9659b61a2110284fcce3ca6e63480d2 Mon Sep 17 00:00:00 2001 From: Zhaolei Date: Tue, 31 Mar 2009 15:26:14 +0800 Subject: ftrace: clean up enable logic for sched_switch Unify sched_switch and sched_wakeup's action to following logic: Do record_cmdline when start_cmdline_record() is called. Start tracing events when the tracer is started. Signed-off-by: Zhao Lei LKML-Reference: <49D1C596.5050203@cn.fujitsu.com> Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 9117cea..9d8cccd 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -29,13 +29,13 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev, int cpu; int pc; - if (!sched_ref || sched_stopped) + if (unlikely(!sched_ref)) return; tracing_record_cmdline(prev); tracing_record_cmdline(next); - if (!tracer_enabled) + if (!tracer_enabled || sched_stopped) return; pc = preempt_count(); @@ -56,15 +56,15 @@ probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success) unsigned long flags; int cpu, pc; - if (!likely(tracer_enabled)) + if (unlikely(!sched_ref)) return; - pc = preempt_count(); tracing_record_cmdline(current); - if (sched_stopped) + if (!tracer_enabled || sched_stopped) return; + pc = preempt_count(); local_irq_save(flags); cpu = raw_smp_processor_id(); data = ctx_trace->data[cpu]; -- cgit v0.10.2 From 9c83633ad38138855181af6936e8ac570ef7e2cb Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 7 Apr 2009 14:48:16 +0300 Subject: missing unlock in jfs_quota_write() We should unlock &inode->i_mutex on the error path. This bug was in ext2_quota_write(). I sent a patch to them today as well. Found by smatch (http://repo.or.cz/w/smatch.git). Compile tested. regards, dan carpenter Signed-off-by: Dan Carpenter Signed-off-by: Dave Kleikamp diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 6f21adf..d9b0e92 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -720,8 +720,10 @@ static ssize_t jfs_quota_write(struct super_block *sb, int type, blk++; } out: - if (len == towrite) + if (len == towrite) { + mutex_unlock(&inode->i_mutex); return err; + } if (inode->i_size < off+len-towrite) i_size_write(inode, off+len-towrite); inode->i_version++; -- cgit v0.10.2 From 6553e192d48af88184029066c30c9464516ea0b7 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 6 Apr 2009 16:59:32 +0100 Subject: ASoC: Display return code when failing to add a DAPM kcontrol Signed-off-by: Mark Brown diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 735903a..46485de 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -357,8 +357,9 @@ static int dapm_new_mixer(struct snd_soc_codec *codec, path->long_name); ret = snd_ctl_add(codec->card, path->kcontrol); if (ret < 0) { - printk(KERN_ERR "asoc: failed to add dapm kcontrol %s\n", - path->long_name); + printk(KERN_ERR "asoc: failed to add dapm kcontrol %s: %d\n", + path->long_name, + ret); kfree(path->long_name); path->long_name = NULL; return ret; -- cgit v0.10.2 From 06f409d76f1d382167eb1cadde2e23a73272865d Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 7 Apr 2009 18:10:13 +0100 Subject: ASoC: Provide core support for symmetric sample rates Many devices require symmetric configurations of capture and playback data formats, often due to shared clocking but sometimes also due to other shared playback and record configuration in the device. Start providing core support for this by allowing the DAIs or the machine to specify that the sample rates used should be kept symmetric. A flag symmetric_rates is provided in the snd_soc_dai and snd_soc_dai_link structures. If this is set in either of the DAIs or in the machine then a constraint will be applied when a stream is already open preventing any changes in sample rate. Signed-off-by: Mark Brown diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index 1367647..22b729f 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -208,6 +208,7 @@ struct snd_soc_dai { /* DAI capabilities */ struct snd_soc_pcm_stream capture; struct snd_soc_pcm_stream playback; + unsigned int symmetric_rates:1; /* DAI runtime info */ struct snd_pcm_runtime *runtime; diff --git a/include/sound/soc.h b/include/sound/soc.h index a40bc6f..b1f2f88 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -417,6 +417,12 @@ struct snd_soc_dai_link { /* codec/machine specific init - e.g. add machine controls */ int (*init)(struct snd_soc_codec *codec); + /* Symmetry requirements */ + unsigned int symmetric_rates:1; + + /* Symmetry data - only valid if symmetry is being enforced */ + unsigned int rate; + /* DAI pcm */ struct snd_pcm *pcm; }; diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 99712f6..dd28009 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -113,6 +113,35 @@ static int soc_ac97_dev_register(struct snd_soc_codec *codec) } #endif +static int soc_pcm_apply_symmetry(struct snd_pcm_substream *substream) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct snd_soc_device *socdev = rtd->socdev; + struct snd_soc_card *card = socdev->card; + struct snd_soc_dai_link *machine = rtd->dai; + struct snd_soc_dai *cpu_dai = machine->cpu_dai; + struct snd_soc_dai *codec_dai = machine->codec_dai; + int ret; + + if (codec_dai->symmetric_rates || cpu_dai->symmetric_rates || + machine->symmetric_rates) { + dev_dbg(card->dev, "Symmetry forces %dHz rate\n", + machine->rate); + + ret = snd_pcm_hw_constraint_minmax(substream->runtime, + SNDRV_PCM_HW_PARAM_RATE, + machine->rate, + machine->rate); + if (ret < 0) { + dev_err(card->dev, + "Unable to apply rate symmetry constraint: %d\n", ret); + return ret; + } + } + + return 0; +} + /* * Called by ALSA when a PCM substream is opened, the runtime->hw record is * then initialized and any private data can be allocated. This also calls @@ -221,6 +250,13 @@ static int soc_pcm_open(struct snd_pcm_substream *substream) goto machine_err; } + /* Symmetry only applies if we've already got an active stream. */ + if (cpu_dai->active || codec_dai->active) { + ret = soc_pcm_apply_symmetry(substream); + if (ret != 0) + goto machine_err; + } + pr_debug("asoc: %s <-> %s info:\n", codec_dai->name, cpu_dai->name); pr_debug("asoc: rate mask 0x%x\n", runtime->hw.rates); pr_debug("asoc: min ch %d max ch %d\n", runtime->hw.channels_min, @@ -521,6 +557,8 @@ static int soc_pcm_hw_params(struct snd_pcm_substream *substream, } } + machine->rate = params_rate(params); + out: mutex_unlock(&pcm_mutex); return ret; -- cgit v0.10.2 From 5409fb4e327a84972483047ecf4fb41f279453e2 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 7 Apr 2009 18:45:21 +0100 Subject: ASoC: Add WM8988 CODEC driver The WM8988 is a low power, high quality stereo CODEC designed for portable digital audio applications. The device integrates complete interfaces to 2 stereo headphone or line out ports. External component requirements are drastically reduced as no separate headphone amplifiers are required. Advanced on-chip digital signal processing performs graphic equaliser, 3-D sound enhancement and automatic level control for the microphone or line input. The WM8988 can operate as a master or a slave, with various master clock frequencies including 12 or 24MHz for USB devices, or standard 256fs rates like 12.288MHz and 24.576MHz. Different audio sample rates such as 96kHz, 48kHz, 44.1kHz are generated directly from the master clock without the need for an external PLL. Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index b6c7f7a..ab36485 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -36,6 +36,7 @@ config SND_SOC_ALL_CODECS select SND_SOC_WM8900 if I2C select SND_SOC_WM8903 if I2C select SND_SOC_WM8971 if I2C + select SND_SOC_WM8988 if SND_SOC_I2C_AND_SPI select SND_SOC_WM8990 if I2C select SND_SOC_WM9705 if SND_SOC_AC97_BUS select SND_SOC_WM9712 if SND_SOC_AC97_BUS @@ -141,6 +142,9 @@ config SND_SOC_WM8903 config SND_SOC_WM8971 tristate +config SND_SOC_WM8988 + tristate + config SND_SOC_WM8990 tristate diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile index 030d245..a72548d 100644 --- a/sound/soc/codecs/Makefile +++ b/sound/soc/codecs/Makefile @@ -24,6 +24,7 @@ snd-soc-wm8753-objs := wm8753.o snd-soc-wm8900-objs := wm8900.o snd-soc-wm8903-objs := wm8903.o snd-soc-wm8971-objs := wm8971.o +snd-soc-wm8988-objs := wm8988.o snd-soc-wm8990-objs := wm8990.o snd-soc-wm9705-objs := wm9705.o snd-soc-wm9712-objs := wm9712.o @@ -55,6 +56,7 @@ obj-$(CONFIG_SND_SOC_WM8753) += snd-soc-wm8753.o obj-$(CONFIG_SND_SOC_WM8900) += snd-soc-wm8900.o obj-$(CONFIG_SND_SOC_WM8903) += snd-soc-wm8903.o obj-$(CONFIG_SND_SOC_WM8971) += snd-soc-wm8971.o +obj-$(CONFIG_SND_SOC_WM8988) += snd-soc-wm8988.o obj-$(CONFIG_SND_SOC_WM8990) += snd-soc-wm8990.o obj-$(CONFIG_SND_SOC_WM8991) += snd-soc-wm8991.o obj-$(CONFIG_SND_SOC_WM9705) += snd-soc-wm9705.o diff --git a/sound/soc/codecs/wm8988.c b/sound/soc/codecs/wm8988.c new file mode 100644 index 0000000..c05f718 --- /dev/null +++ b/sound/soc/codecs/wm8988.c @@ -0,0 +1,1097 @@ +/* + * wm8988.c -- WM8988 ALSA SoC audio driver + * + * Copyright 2009 Wolfson Microelectronics plc + * Copyright 2005 Openedhand Ltd. + * + * Author: Mark Brown + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "wm8988.h" + +/* + * wm8988 register cache + * We can't read the WM8988 register space when we + * are using 2 wire for device control, so we cache them instead. + */ +static const u16 wm8988_reg[] = { + 0x0097, 0x0097, 0x0079, 0x0079, /* 0 */ + 0x0000, 0x0008, 0x0000, 0x000a, /* 4 */ + 0x0000, 0x0000, 0x00ff, 0x00ff, /* 8 */ + 0x000f, 0x000f, 0x0000, 0x0000, /* 12 */ + 0x0000, 0x007b, 0x0000, 0x0032, /* 16 */ + 0x0000, 0x00c3, 0x00c3, 0x00c0, /* 20 */ + 0x0000, 0x0000, 0x0000, 0x0000, /* 24 */ + 0x0000, 0x0000, 0x0000, 0x0000, /* 28 */ + 0x0000, 0x0000, 0x0050, 0x0050, /* 32 */ + 0x0050, 0x0050, 0x0050, 0x0050, /* 36 */ + 0x0079, 0x0079, 0x0079, /* 40 */ +}; + +/* codec private data */ +struct wm8988_priv { + unsigned int sysclk; + struct snd_soc_codec codec; + struct snd_pcm_hw_constraint_list *sysclk_constraints; + u16 reg_cache[WM8988_NUM_REG]; +}; + + +/* + * read wm8988 register cache + */ +static inline unsigned int wm8988_read_reg_cache(struct snd_soc_codec *codec, + unsigned int reg) +{ + u16 *cache = codec->reg_cache; + if (reg > WM8988_NUM_REG) + return -1; + return cache[reg]; +} + +/* + * write wm8988 register cache + */ +static inline void wm8988_write_reg_cache(struct snd_soc_codec *codec, + unsigned int reg, unsigned int value) +{ + u16 *cache = codec->reg_cache; + if (reg > WM8988_NUM_REG) + return; + cache[reg] = value; +} + +static int wm8988_write(struct snd_soc_codec *codec, unsigned int reg, + unsigned int value) +{ + u8 data[2]; + + /* data is + * D15..D9 WM8753 register offset + * D8...D0 register data + */ + data[0] = (reg << 1) | ((value >> 8) & 0x0001); + data[1] = value & 0x00ff; + + wm8988_write_reg_cache(codec, reg, value); + if (codec->hw_write(codec->control_data, data, 2) == 2) + return 0; + else + return -EIO; +} + +#define wm8988_reset(c) wm8988_write(c, WM8988_RESET, 0) + +/* + * WM8988 Controls + */ + +static const char *bass_boost_txt[] = {"Linear Control", "Adaptive Boost"}; +static const struct soc_enum bass_boost = + SOC_ENUM_SINGLE(WM8988_BASS, 7, 2, bass_boost_txt); + +static const char *bass_filter_txt[] = { "130Hz @ 48kHz", "200Hz @ 48kHz" }; +static const struct soc_enum bass_filter = + SOC_ENUM_SINGLE(WM8988_BASS, 6, 2, bass_filter_txt); + +static const char *treble_txt[] = {"8kHz", "4kHz"}; +static const struct soc_enum treble = + SOC_ENUM_SINGLE(WM8988_TREBLE, 6, 2, treble_txt); + +static const char *stereo_3d_lc_txt[] = {"200Hz", "500Hz"}; +static const struct soc_enum stereo_3d_lc = + SOC_ENUM_SINGLE(WM8988_3D, 5, 2, stereo_3d_lc_txt); + +static const char *stereo_3d_uc_txt[] = {"2.2kHz", "1.5kHz"}; +static const struct soc_enum stereo_3d_uc = + SOC_ENUM_SINGLE(WM8988_3D, 6, 2, stereo_3d_uc_txt); + +static const char *stereo_3d_func_txt[] = {"Capture", "Playback"}; +static const struct soc_enum stereo_3d_func = + SOC_ENUM_SINGLE(WM8988_3D, 7, 2, stereo_3d_func_txt); + +static const char *alc_func_txt[] = {"Off", "Right", "Left", "Stereo"}; +static const struct soc_enum alc_func = + SOC_ENUM_SINGLE(WM8988_ALC1, 7, 4, alc_func_txt); + +static const char *ng_type_txt[] = {"Constant PGA Gain", + "Mute ADC Output"}; +static const struct soc_enum ng_type = + SOC_ENUM_SINGLE(WM8988_NGATE, 1, 2, ng_type_txt); + +static const char *deemph_txt[] = {"None", "32Khz", "44.1Khz", "48Khz"}; +static const struct soc_enum deemph = + SOC_ENUM_SINGLE(WM8988_ADCDAC, 1, 4, deemph_txt); + +static const char *adcpol_txt[] = {"Normal", "L Invert", "R Invert", + "L + R Invert"}; +static const struct soc_enum adcpol = + SOC_ENUM_SINGLE(WM8988_ADCDAC, 5, 4, adcpol_txt); + +static const DECLARE_TLV_DB_SCALE(pga_tlv, -1725, 75, 0); +static const DECLARE_TLV_DB_SCALE(adc_tlv, -9750, 50, 1); +static const DECLARE_TLV_DB_SCALE(dac_tlv, -12750, 50, 1); +static const DECLARE_TLV_DB_SCALE(out_tlv, -12100, 100, 1); +static const DECLARE_TLV_DB_SCALE(bypass_tlv, -1500, 300, 0); + +static const struct snd_kcontrol_new wm8988_snd_controls[] = { + +SOC_ENUM("Bass Boost", bass_boost), +SOC_ENUM("Bass Filter", bass_filter), +SOC_SINGLE("Bass Volume", WM8988_BASS, 0, 15, 1), + +SOC_SINGLE("Treble Volume", WM8988_TREBLE, 0, 15, 0), +SOC_ENUM("Treble Cut-off", treble), + +SOC_SINGLE("3D Switch", WM8988_3D, 0, 1, 0), +SOC_SINGLE("3D Volume", WM8988_3D, 1, 15, 0), +SOC_ENUM("3D Lower Cut-off", stereo_3d_lc), +SOC_ENUM("3D Upper Cut-off", stereo_3d_uc), +SOC_ENUM("3D Mode", stereo_3d_func), + +SOC_SINGLE("ALC Capture Target Volume", WM8988_ALC1, 0, 7, 0), +SOC_SINGLE("ALC Capture Max Volume", WM8988_ALC1, 4, 7, 0), +SOC_ENUM("ALC Capture Function", alc_func), +SOC_SINGLE("ALC Capture ZC Switch", WM8988_ALC2, 7, 1, 0), +SOC_SINGLE("ALC Capture Hold Time", WM8988_ALC2, 0, 15, 0), +SOC_SINGLE("ALC Capture Decay Time", WM8988_ALC3, 4, 15, 0), +SOC_SINGLE("ALC Capture Attack Time", WM8988_ALC3, 0, 15, 0), +SOC_SINGLE("ALC Capture NG Threshold", WM8988_NGATE, 3, 31, 0), +SOC_ENUM("ALC Capture NG Type", ng_type), +SOC_SINGLE("ALC Capture NG Switch", WM8988_NGATE, 0, 1, 0), + +SOC_SINGLE("ZC Timeout Switch", WM8988_ADCTL1, 0, 1, 0), + +SOC_DOUBLE_R_TLV("Capture Digital Volume", WM8988_LADC, WM8988_RADC, + 0, 255, 0, adc_tlv), +SOC_DOUBLE_R_TLV("Capture Volume", WM8988_LINVOL, WM8988_RINVOL, + 0, 63, 0, pga_tlv), +SOC_DOUBLE_R("Capture ZC Switch", WM8988_LINVOL, WM8988_RINVOL, 6, 1, 0), +SOC_DOUBLE_R("Capture Switch", WM8988_LINVOL, WM8988_RINVOL, 7, 1, 1), + +SOC_ENUM("Playback De-emphasis", deemph), + +SOC_ENUM("Capture Polarity", adcpol), +SOC_SINGLE("Playback 6dB Attenuate", WM8988_ADCDAC, 7, 1, 0), +SOC_SINGLE("Capture 6dB Attenuate", WM8988_ADCDAC, 8, 1, 0), + +SOC_DOUBLE_R_TLV("PCM Volume", WM8988_LDAC, WM8988_RDAC, 0, 255, 0, dac_tlv), + +SOC_SINGLE_TLV("Left Mixer Left Bypass Volume", WM8988_LOUTM1, 4, 7, 1, + bypass_tlv), +SOC_SINGLE_TLV("Left Mixer Right Bypass Volume", WM8988_LOUTM2, 4, 7, 1, + bypass_tlv), +SOC_SINGLE_TLV("Right Mixer Left Bypass Volume", WM8988_ROUTM1, 4, 7, 1, + bypass_tlv), +SOC_SINGLE_TLV("Right Mixer Right Bypass Volume", WM8988_ROUTM2, 4, 7, 1, + bypass_tlv), + +SOC_DOUBLE_R("Output 1 Playback ZC Switch", WM8988_LOUT1V, + WM8988_ROUT1V, 7, 1, 0), +SOC_DOUBLE_R_TLV("Output 1 Playback Volume", WM8988_LOUT1V, WM8988_ROUT1V, + 0, 127, 0, out_tlv), + +SOC_DOUBLE_R("Output 2 Playback ZC Switch", WM8988_LOUT2V, + WM8988_ROUT2V, 7, 1, 0), +SOC_DOUBLE_R_TLV("Output 2 Playback Volume", WM8988_LOUT2V, WM8988_ROUT2V, + 0, 127, 0, out_tlv), + +}; + +/* + * DAPM Controls + */ + +static int wm8988_lrc_control(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + struct snd_soc_codec *codec = w->codec; + u16 adctl2 = wm8988_read_reg_cache(codec, WM8988_ADCTL2); + + /* Use the DAC to gate LRC if active, otherwise use ADC */ + if (wm8988_read_reg_cache(codec, WM8988_PWR2) & 0x180) + adctl2 &= ~0x4; + else + adctl2 |= 0x4; + + return wm8988_write(codec, WM8988_ADCTL2, adctl2); +} + +static const char *wm8988_line_texts[] = { + "Line 1", "Line 2", "PGA", "Differential"}; + +static const unsigned int wm8988_line_values[] = { + 0, 1, 3, 4}; + +static const struct soc_enum wm8988_lline_enum = + SOC_VALUE_ENUM_SINGLE(WM8988_LOUTM1, 0, 7, + ARRAY_SIZE(wm8988_line_texts), + wm8988_line_texts, + wm8988_line_values); +static const struct snd_kcontrol_new wm8988_left_line_controls = + SOC_DAPM_VALUE_ENUM("Route", wm8988_lline_enum); + +static const struct soc_enum wm8988_rline_enum = + SOC_VALUE_ENUM_SINGLE(WM8988_ROUTM1, 0, 7, + ARRAY_SIZE(wm8988_line_texts), + wm8988_line_texts, + wm8988_line_values); +static const struct snd_kcontrol_new wm8988_right_line_controls = + SOC_DAPM_VALUE_ENUM("Route", wm8988_lline_enum); + +/* Left Mixer */ +static const struct snd_kcontrol_new wm8988_left_mixer_controls[] = { + SOC_DAPM_SINGLE("Playback Switch", WM8988_LOUTM1, 8, 1, 0), + SOC_DAPM_SINGLE("Left Bypass Switch", WM8988_LOUTM1, 7, 1, 0), + SOC_DAPM_SINGLE("Right Playback Switch", WM8988_LOUTM2, 8, 1, 0), + SOC_DAPM_SINGLE("Right Bypass Switch", WM8988_LOUTM2, 7, 1, 0), +}; + +/* Right Mixer */ +static const struct snd_kcontrol_new wm8988_right_mixer_controls[] = { + SOC_DAPM_SINGLE("Left Playback Switch", WM8988_ROUTM1, 8, 1, 0), + SOC_DAPM_SINGLE("Left Bypass Switch", WM8988_ROUTM1, 7, 1, 0), + SOC_DAPM_SINGLE("Playback Switch", WM8988_ROUTM2, 8, 1, 0), + SOC_DAPM_SINGLE("Right Bypass Switch", WM8988_ROUTM2, 7, 1, 0), +}; + +static const char *wm8988_pga_sel[] = {"Line 1", "Line 2", "Differential"}; +static const unsigned int wm8988_pga_val[] = { 0, 1, 3 }; + +/* Left PGA Mux */ +static const struct soc_enum wm8988_lpga_enum = + SOC_VALUE_ENUM_SINGLE(WM8988_LADCIN, 6, 3, + ARRAY_SIZE(wm8988_pga_sel), + wm8988_pga_sel, + wm8988_pga_val); +static const struct snd_kcontrol_new wm8988_left_pga_controls = + SOC_DAPM_VALUE_ENUM("Route", wm8988_lpga_enum); + +/* Right PGA Mux */ +static const struct soc_enum wm8988_rpga_enum = + SOC_VALUE_ENUM_SINGLE(WM8988_RADCIN, 6, 3, + ARRAY_SIZE(wm8988_pga_sel), + wm8988_pga_sel, + wm8988_pga_val); +static const struct snd_kcontrol_new wm8988_right_pga_controls = + SOC_DAPM_VALUE_ENUM("Route", wm8988_rpga_enum); + +/* Differential Mux */ +static const char *wm8988_diff_sel[] = {"Line 1", "Line 2"}; +static const struct soc_enum diffmux = + SOC_ENUM_SINGLE(WM8988_ADCIN, 8, 2, wm8988_diff_sel); +static const struct snd_kcontrol_new wm8988_diffmux_controls = + SOC_DAPM_ENUM("Route", diffmux); + +/* Mono ADC Mux */ +static const char *wm8988_mono_mux[] = {"Stereo", "Mono (Left)", + "Mono (Right)", "Digital Mono"}; +static const struct soc_enum monomux = + SOC_ENUM_SINGLE(WM8988_ADCIN, 6, 4, wm8988_mono_mux); +static const struct snd_kcontrol_new wm8988_monomux_controls = + SOC_DAPM_ENUM("Route", monomux); + +static const struct snd_soc_dapm_widget wm8988_dapm_widgets[] = { + SND_SOC_DAPM_MICBIAS("Mic Bias", WM8988_PWR1, 1, 0), + + SND_SOC_DAPM_MUX("Differential Mux", SND_SOC_NOPM, 0, 0, + &wm8988_diffmux_controls), + SND_SOC_DAPM_MUX("Left ADC Mux", SND_SOC_NOPM, 0, 0, + &wm8988_monomux_controls), + SND_SOC_DAPM_MUX("Right ADC Mux", SND_SOC_NOPM, 0, 0, + &wm8988_monomux_controls), + + SND_SOC_DAPM_MUX("Left PGA Mux", WM8988_PWR1, 5, 0, + &wm8988_left_pga_controls), + SND_SOC_DAPM_MUX("Right PGA Mux", WM8988_PWR1, 4, 0, + &wm8988_right_pga_controls), + + SND_SOC_DAPM_MUX("Left Line Mux", SND_SOC_NOPM, 0, 0, + &wm8988_left_line_controls), + SND_SOC_DAPM_MUX("Right Line Mux", SND_SOC_NOPM, 0, 0, + &wm8988_right_line_controls), + + SND_SOC_DAPM_ADC("Right ADC", "Right Capture", WM8988_PWR1, 2, 0), + SND_SOC_DAPM_ADC("Left ADC", "Left Capture", WM8988_PWR1, 3, 0), + + SND_SOC_DAPM_DAC("Right DAC", "Right Playback", WM8988_PWR2, 7, 0), + SND_SOC_DAPM_DAC("Left DAC", "Left Playback", WM8988_PWR2, 8, 0), + + SND_SOC_DAPM_MIXER("Left Mixer", SND_SOC_NOPM, 0, 0, + &wm8988_left_mixer_controls[0], + ARRAY_SIZE(wm8988_left_mixer_controls)), + SND_SOC_DAPM_MIXER("Right Mixer", SND_SOC_NOPM, 0, 0, + &wm8988_right_mixer_controls[0], + ARRAY_SIZE(wm8988_right_mixer_controls)), + + SND_SOC_DAPM_PGA("Right Out 2", WM8988_PWR2, 3, 0, NULL, 0), + SND_SOC_DAPM_PGA("Left Out 2", WM8988_PWR2, 4, 0, NULL, 0), + SND_SOC_DAPM_PGA("Right Out 1", WM8988_PWR2, 5, 0, NULL, 0), + SND_SOC_DAPM_PGA("Left Out 1", WM8988_PWR2, 6, 0, NULL, 0), + + SND_SOC_DAPM_POST("LRC control", wm8988_lrc_control), + + SND_SOC_DAPM_OUTPUT("LOUT1"), + SND_SOC_DAPM_OUTPUT("ROUT1"), + SND_SOC_DAPM_OUTPUT("LOUT2"), + SND_SOC_DAPM_OUTPUT("ROUT2"), + SND_SOC_DAPM_OUTPUT("VREF"), + + SND_SOC_DAPM_INPUT("LINPUT1"), + SND_SOC_DAPM_INPUT("LINPUT2"), + SND_SOC_DAPM_INPUT("RINPUT1"), + SND_SOC_DAPM_INPUT("RINPUT2"), +}; + +static const struct snd_soc_dapm_route audio_map[] = { + + { "Left Line Mux", "Line 1", "LINPUT1" }, + { "Left Line Mux", "Line 2", "LINPUT2" }, + { "Left Line Mux", "PGA", "Left PGA Mux" }, + { "Left Line Mux", "Differential", "Differential Mux" }, + + { "Right Line Mux", "Line 1", "RINPUT1" }, + { "Right Line Mux", "Line 2", "RINPUT2" }, + { "Right Line Mux", "PGA", "Right PGA Mux" }, + { "Right Line Mux", "Differential", "Differential Mux" }, + + { "Left PGA Mux", "Line 1", "LINPUT1" }, + { "Left PGA Mux", "Line 2", "LINPUT2" }, + { "Left PGA Mux", "Differential", "Differential Mux" }, + + { "Right PGA Mux", "Line 1", "RINPUT1" }, + { "Right PGA Mux", "Line 2", "RINPUT2" }, + { "Right PGA Mux", "Differential", "Differential Mux" }, + + { "Differential Mux", "Line 1", "LINPUT1" }, + { "Differential Mux", "Line 1", "RINPUT1" }, + { "Differential Mux", "Line 2", "LINPUT2" }, + { "Differential Mux", "Line 2", "RINPUT2" }, + + { "Left ADC Mux", "Stereo", "Left PGA Mux" }, + { "Left ADC Mux", "Mono (Left)", "Left PGA Mux" }, + { "Left ADC Mux", "Digital Mono", "Left PGA Mux" }, + + { "Right ADC Mux", "Stereo", "Right PGA Mux" }, + { "Right ADC Mux", "Mono (Right)", "Right PGA Mux" }, + { "Right ADC Mux", "Digital Mono", "Right PGA Mux" }, + + { "Left ADC", NULL, "Left ADC Mux" }, + { "Right ADC", NULL, "Right ADC Mux" }, + + { "Left Line Mux", "Line 1", "LINPUT1" }, + { "Left Line Mux", "Line 2", "LINPUT2" }, + { "Left Line Mux", "PGA", "Left PGA Mux" }, + { "Left Line Mux", "Differential", "Differential Mux" }, + + { "Right Line Mux", "Line 1", "RINPUT1" }, + { "Right Line Mux", "Line 2", "RINPUT2" }, + { "Right Line Mux", "PGA", "Right PGA Mux" }, + { "Right Line Mux", "Differential", "Differential Mux" }, + + { "Left Mixer", "Playback Switch", "Left DAC" }, + { "Left Mixer", "Left Bypass Switch", "Left Line Mux" }, + { "Left Mixer", "Right Playback Switch", "Right DAC" }, + { "Left Mixer", "Right Bypass Switch", "Right Line Mux" }, + + { "Right Mixer", "Left Playback Switch", "Left DAC" }, + { "Right Mixer", "Left Bypass Switch", "Left Line Mux" }, + { "Right Mixer", "Playback Switch", "Right DAC" }, + { "Right Mixer", "Right Bypass Switch", "Right Line Mux" }, + + { "Left Out 1", NULL, "Left Mixer" }, + { "LOUT1", NULL, "Left Out 1" }, + { "Right Out 1", NULL, "Right Mixer" }, + { "ROUT1", NULL, "Right Out 1" }, + + { "Left Out 2", NULL, "Left Mixer" }, + { "LOUT2", NULL, "Left Out 2" }, + { "Right Out 2", NULL, "Right Mixer" }, + { "ROUT2", NULL, "Right Out 2" }, +}; + +struct _coeff_div { + u32 mclk; + u32 rate; + u16 fs; + u8 sr:5; + u8 usb:1; +}; + +/* codec hifi mclk clock divider coefficients */ +static const struct _coeff_div coeff_div[] = { + /* 8k */ + {12288000, 8000, 1536, 0x6, 0x0}, + {11289600, 8000, 1408, 0x16, 0x0}, + {18432000, 8000, 2304, 0x7, 0x0}, + {16934400, 8000, 2112, 0x17, 0x0}, + {12000000, 8000, 1500, 0x6, 0x1}, + + /* 11.025k */ + {11289600, 11025, 1024, 0x18, 0x0}, + {16934400, 11025, 1536, 0x19, 0x0}, + {12000000, 11025, 1088, 0x19, 0x1}, + + /* 16k */ + {12288000, 16000, 768, 0xa, 0x0}, + {18432000, 16000, 1152, 0xb, 0x0}, + {12000000, 16000, 750, 0xa, 0x1}, + + /* 22.05k */ + {11289600, 22050, 512, 0x1a, 0x0}, + {16934400, 22050, 768, 0x1b, 0x0}, + {12000000, 22050, 544, 0x1b, 0x1}, + + /* 32k */ + {12288000, 32000, 384, 0xc, 0x0}, + {18432000, 32000, 576, 0xd, 0x0}, + {12000000, 32000, 375, 0xa, 0x1}, + + /* 44.1k */ + {11289600, 44100, 256, 0x10, 0x0}, + {16934400, 44100, 384, 0x11, 0x0}, + {12000000, 44100, 272, 0x11, 0x1}, + + /* 48k */ + {12288000, 48000, 256, 0x0, 0x0}, + {18432000, 48000, 384, 0x1, 0x0}, + {12000000, 48000, 250, 0x0, 0x1}, + + /* 88.2k */ + {11289600, 88200, 128, 0x1e, 0x0}, + {16934400, 88200, 192, 0x1f, 0x0}, + {12000000, 88200, 136, 0x1f, 0x1}, + + /* 96k */ + {12288000, 96000, 128, 0xe, 0x0}, + {18432000, 96000, 192, 0xf, 0x0}, + {12000000, 96000, 125, 0xe, 0x1}, +}; + +static inline int get_coeff(int mclk, int rate) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(coeff_div); i++) { + if (coeff_div[i].rate == rate && coeff_div[i].mclk == mclk) + return i; + } + + return -EINVAL; +} + +/* The set of rates we can generate from the above for each SYSCLK */ + +static unsigned int rates_12288[] = { + 8000, 12000, 16000, 24000, 24000, 32000, 48000, 96000, +}; + +static struct snd_pcm_hw_constraint_list constraints_12288 = { + .count = ARRAY_SIZE(rates_12288), + .list = rates_12288, +}; + +static unsigned int rates_112896[] = { + 8000, 11025, 22050, 44100, +}; + +static struct snd_pcm_hw_constraint_list constraints_112896 = { + .count = ARRAY_SIZE(rates_112896), + .list = rates_112896, +}; + +static unsigned int rates_12[] = { + 8000, 11025, 12000, 16000, 22050, 2400, 32000, 41100, 48000, + 48000, 88235, 96000, +}; + +static struct snd_pcm_hw_constraint_list constraints_12 = { + .count = ARRAY_SIZE(rates_12), + .list = rates_12, +}; + +/* + * Note that this should be called from init rather than from hw_params. + */ +static int wm8988_set_dai_sysclk(struct snd_soc_dai *codec_dai, + int clk_id, unsigned int freq, int dir) +{ + struct snd_soc_codec *codec = codec_dai->codec; + struct wm8988_priv *wm8988 = codec->private_data; + + switch (freq) { + case 11289600: + case 18432000: + case 22579200: + case 36864000: + wm8988->sysclk_constraints = &constraints_112896; + wm8988->sysclk = freq; + return 0; + + case 12288000: + case 16934400: + case 24576000: + case 33868800: + wm8988->sysclk_constraints = &constraints_12288; + wm8988->sysclk = freq; + return 0; + + case 12000000: + case 24000000: + wm8988->sysclk_constraints = &constraints_12; + wm8988->sysclk = freq; + return 0; + } + return -EINVAL; +} + +static int wm8988_set_dai_fmt(struct snd_soc_dai *codec_dai, + unsigned int fmt) +{ + struct snd_soc_codec *codec = codec_dai->codec; + u16 iface = 0; + + /* set master/slave audio interface */ + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: + iface = 0x0040; + break; + case SND_SOC_DAIFMT_CBS_CFS: + break; + default: + return -EINVAL; + } + + /* interface format */ + switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { + case SND_SOC_DAIFMT_I2S: + iface |= 0x0002; + break; + case SND_SOC_DAIFMT_RIGHT_J: + break; + case SND_SOC_DAIFMT_LEFT_J: + iface |= 0x0001; + break; + case SND_SOC_DAIFMT_DSP_A: + iface |= 0x0003; + break; + case SND_SOC_DAIFMT_DSP_B: + iface |= 0x0013; + break; + default: + return -EINVAL; + } + + /* clock inversion */ + switch (fmt & SND_SOC_DAIFMT_INV_MASK) { + case SND_SOC_DAIFMT_NB_NF: + break; + case SND_SOC_DAIFMT_IB_IF: + iface |= 0x0090; + break; + case SND_SOC_DAIFMT_IB_NF: + iface |= 0x0080; + break; + case SND_SOC_DAIFMT_NB_IF: + iface |= 0x0010; + break; + default: + return -EINVAL; + } + + wm8988_write(codec, WM8988_IFACE, iface); + return 0; +} + +static int wm8988_pcm_startup(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) +{ + struct snd_soc_codec *codec = dai->codec; + struct wm8988_priv *wm8988 = codec->private_data; + + /* The set of sample rates that can be supported depends on the + * MCLK supplied to the CODEC - enforce this. + */ + if (!wm8988->sysclk) { + dev_err(codec->dev, + "No MCLK configured, call set_sysclk() on init\n"); + return -EINVAL; + } + + snd_pcm_hw_constraint_list(substream->runtime, 0, + SNDRV_PCM_HW_PARAM_RATE, + wm8988->sysclk_constraints); + + return 0; +} + +static int wm8988_pcm_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct snd_soc_device *socdev = rtd->socdev; + struct snd_soc_codec *codec = socdev->card->codec; + struct wm8988_priv *wm8988 = codec->private_data; + u16 iface = wm8988_read_reg_cache(codec, WM8988_IFACE) & 0x1f3; + u16 srate = wm8988_read_reg_cache(codec, WM8988_SRATE) & 0x180; + int coeff; + + coeff = get_coeff(wm8988->sysclk, params_rate(params)); + if (coeff < 0) { + coeff = get_coeff(wm8988->sysclk / 2, params_rate(params)); + srate |= 0x40; + } + if (coeff < 0) { + dev_err(codec->dev, + "Unable to configure sample rate %dHz with %dHz MCLK\n", + params_rate(params), wm8988->sysclk); + return coeff; + } + + /* bit size */ + switch (params_format(params)) { + case SNDRV_PCM_FORMAT_S16_LE: + break; + case SNDRV_PCM_FORMAT_S20_3LE: + iface |= 0x0004; + break; + case SNDRV_PCM_FORMAT_S24_LE: + iface |= 0x0008; + break; + case SNDRV_PCM_FORMAT_S32_LE: + iface |= 0x000c; + break; + } + + /* set iface & srate */ + wm8988_write(codec, WM8988_IFACE, iface); + if (coeff >= 0) + wm8988_write(codec, WM8988_SRATE, srate | + (coeff_div[coeff].sr << 1) | coeff_div[coeff].usb); + + return 0; +} + +static int wm8988_mute(struct snd_soc_dai *dai, int mute) +{ + struct snd_soc_codec *codec = dai->codec; + u16 mute_reg = wm8988_read_reg_cache(codec, WM8988_ADCDAC) & 0xfff7; + + if (mute) + wm8988_write(codec, WM8988_ADCDAC, mute_reg | 0x8); + else + wm8988_write(codec, WM8988_ADCDAC, mute_reg); + return 0; +} + +static int wm8988_set_bias_level(struct snd_soc_codec *codec, + enum snd_soc_bias_level level) +{ + u16 pwr_reg = wm8988_read_reg_cache(codec, WM8988_PWR1) & ~0x1c1; + + switch (level) { + case SND_SOC_BIAS_ON: + break; + + case SND_SOC_BIAS_PREPARE: + /* VREF, VMID=2x50k, digital enabled */ + wm8988_write(codec, WM8988_PWR1, pwr_reg | 0x00c0); + break; + + case SND_SOC_BIAS_STANDBY: + if (codec->bias_level == SND_SOC_BIAS_OFF) { + /* VREF, VMID=2x5k */ + wm8988_write(codec, WM8988_PWR1, pwr_reg | 0x1c1); + + /* Charge caps */ + msleep(100); + } + + /* VREF, VMID=2*500k, digital stopped */ + wm8988_write(codec, WM8988_PWR1, pwr_reg | 0x0141); + break; + + case SND_SOC_BIAS_OFF: + wm8988_write(codec, WM8988_PWR1, 0x0000); + break; + } + codec->bias_level = level; + return 0; +} + +#define WM8988_RATES SNDRV_PCM_RATE_8000_96000 + +#define WM8988_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S20_3LE |\ + SNDRV_PCM_FMTBIT_S24_LE) + +static struct snd_soc_dai_ops wm8988_ops = { + .startup = wm8988_pcm_startup, + .hw_params = wm8988_pcm_hw_params, + .set_fmt = wm8988_set_dai_fmt, + .set_sysclk = wm8988_set_dai_sysclk, + .digital_mute = wm8988_mute, +}; + +struct snd_soc_dai wm8988_dai = { + .name = "WM8988", + .playback = { + .stream_name = "Playback", + .channels_min = 1, + .channels_max = 2, + .rates = WM8988_RATES, + .formats = WM8988_FORMATS, + }, + .capture = { + .stream_name = "Capture", + .channels_min = 1, + .channels_max = 2, + .rates = WM8988_RATES, + .formats = WM8988_FORMATS, + }, + .ops = &wm8988_ops, + .symmetric_rates = 1, +}; +EXPORT_SYMBOL_GPL(wm8988_dai); + +static int wm8988_suspend(struct platform_device *pdev, pm_message_t state) +{ + struct snd_soc_device *socdev = platform_get_drvdata(pdev); + struct snd_soc_codec *codec = socdev->card->codec; + + wm8988_set_bias_level(codec, SND_SOC_BIAS_OFF); + return 0; +} + +static int wm8988_resume(struct platform_device *pdev) +{ + struct snd_soc_device *socdev = platform_get_drvdata(pdev); + struct snd_soc_codec *codec = socdev->card->codec; + int i; + u8 data[2]; + u16 *cache = codec->reg_cache; + + /* Sync reg_cache with the hardware */ + for (i = 0; i < WM8988_NUM_REG; i++) { + if (i == WM8988_RESET) + continue; + data[0] = (i << 1) | ((cache[i] >> 8) & 0x0001); + data[1] = cache[i] & 0x00ff; + codec->hw_write(codec->control_data, data, 2); + } + + wm8988_set_bias_level(codec, SND_SOC_BIAS_STANDBY); + + return 0; +} + +static struct snd_soc_codec *wm8988_codec; + +static int wm8988_probe(struct platform_device *pdev) +{ + struct snd_soc_device *socdev = platform_get_drvdata(pdev); + struct snd_soc_codec *codec; + int ret = 0; + + if (wm8988_codec == NULL) { + dev_err(&pdev->dev, "Codec device not registered\n"); + return -ENODEV; + } + + socdev->card->codec = wm8988_codec; + codec = wm8988_codec; + + /* register pcms */ + ret = snd_soc_new_pcms(socdev, SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1); + if (ret < 0) { + dev_err(codec->dev, "failed to create pcms: %d\n", ret); + goto pcm_err; + } + + snd_soc_add_controls(codec, wm8988_snd_controls, + ARRAY_SIZE(wm8988_snd_controls)); + snd_soc_dapm_new_controls(codec, wm8988_dapm_widgets, + ARRAY_SIZE(wm8988_dapm_widgets)); + snd_soc_dapm_add_routes(codec, audio_map, ARRAY_SIZE(audio_map)); + snd_soc_dapm_new_widgets(codec); + + ret = snd_soc_init_card(socdev); + if (ret < 0) { + dev_err(codec->dev, "failed to register card: %d\n", ret); + goto card_err; + } + + return ret; + +card_err: + snd_soc_free_pcms(socdev); + snd_soc_dapm_free(socdev); +pcm_err: + return ret; +} + +static int wm8988_remove(struct platform_device *pdev) +{ + struct snd_soc_device *socdev = platform_get_drvdata(pdev); + + snd_soc_free_pcms(socdev); + snd_soc_dapm_free(socdev); + + return 0; +} + +struct snd_soc_codec_device soc_codec_dev_wm8988 = { + .probe = wm8988_probe, + .remove = wm8988_remove, + .suspend = wm8988_suspend, + .resume = wm8988_resume, +}; +EXPORT_SYMBOL_GPL(soc_codec_dev_wm8988); + +static int wm8988_register(struct wm8988_priv *wm8988) +{ + struct snd_soc_codec *codec = &wm8988->codec; + int ret; + u16 reg; + + if (wm8988_codec) { + dev_err(codec->dev, "Another WM8988 is registered\n"); + ret = -EINVAL; + goto err; + } + + mutex_init(&codec->mutex); + INIT_LIST_HEAD(&codec->dapm_widgets); + INIT_LIST_HEAD(&codec->dapm_paths); + + codec->private_data = wm8988; + codec->name = "WM8988"; + codec->owner = THIS_MODULE; + codec->read = wm8988_read_reg_cache; + codec->write = wm8988_write; + codec->dai = &wm8988_dai; + codec->num_dai = 1; + codec->reg_cache_size = ARRAY_SIZE(wm8988->reg_cache); + codec->reg_cache = &wm8988->reg_cache; + codec->bias_level = SND_SOC_BIAS_OFF; + codec->set_bias_level = wm8988_set_bias_level; + + memcpy(codec->reg_cache, wm8988_reg, + sizeof(wm8988_reg)); + + ret = wm8988_reset(codec); + if (ret < 0) { + dev_err(codec->dev, "Failed to issue reset\n"); + return ret; + } + + /* set the update bits (we always update left then right) */ + reg = wm8988_read_reg_cache(codec, WM8988_RADC); + wm8988_write(codec, WM8988_RADC, reg | 0x100); + reg = wm8988_read_reg_cache(codec, WM8988_RDAC); + wm8988_write(codec, WM8988_RDAC, reg | 0x0100); + reg = wm8988_read_reg_cache(codec, WM8988_ROUT1V); + wm8988_write(codec, WM8988_ROUT1V, reg | 0x0100); + reg = wm8988_read_reg_cache(codec, WM8988_ROUT2V); + wm8988_write(codec, WM8988_ROUT2V, reg | 0x0100); + reg = wm8988_read_reg_cache(codec, WM8988_RINVOL); + wm8988_write(codec, WM8988_RINVOL, reg | 0x0100); + + wm8988_set_bias_level(&wm8988->codec, SND_SOC_BIAS_STANDBY); + + wm8988_dai.dev = codec->dev; + + wm8988_codec = codec; + + ret = snd_soc_register_codec(codec); + if (ret != 0) { + dev_err(codec->dev, "Failed to register codec: %d\n", ret); + return ret; + } + + ret = snd_soc_register_dai(&wm8988_dai); + if (ret != 0) { + dev_err(codec->dev, "Failed to register DAI: %d\n", ret); + snd_soc_unregister_codec(codec); + return ret; + } + + return 0; + +err: + kfree(wm8988); + return ret; +} + +static void wm8988_unregister(struct wm8988_priv *wm8988) +{ + wm8988_set_bias_level(&wm8988->codec, SND_SOC_BIAS_OFF); + snd_soc_unregister_dai(&wm8988_dai); + snd_soc_unregister_codec(&wm8988->codec); + kfree(wm8988); + wm8988_codec = NULL; +} + +#if defined(CONFIG_I2C) || defined(CONFIG_I2C_MODULE) +static int wm8988_i2c_probe(struct i2c_client *i2c, + const struct i2c_device_id *id) +{ + struct wm8988_priv *wm8988; + struct snd_soc_codec *codec; + + wm8988 = kzalloc(sizeof(struct wm8988_priv), GFP_KERNEL); + if (wm8988 == NULL) + return -ENOMEM; + + codec = &wm8988->codec; + codec->hw_write = (hw_write_t)i2c_master_send; + + i2c_set_clientdata(i2c, wm8988); + codec->control_data = i2c; + + codec->dev = &i2c->dev; + + return wm8988_register(wm8988); +} + +static int wm8988_i2c_remove(struct i2c_client *client) +{ + struct wm8988_priv *wm8988 = i2c_get_clientdata(client); + wm8988_unregister(wm8988); + return 0; +} + +static const struct i2c_device_id wm8988_i2c_id[] = { + { "wm8988", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, wm8988_i2c_id); + +static struct i2c_driver wm8988_i2c_driver = { + .driver = { + .name = "WM8988", + .owner = THIS_MODULE, + }, + .probe = wm8988_i2c_probe, + .remove = wm8988_i2c_remove, + .id_table = wm8988_i2c_id, +}; +#endif + +#if defined(CONFIG_SPI_MASTER) +static int wm8988_spi_write(struct spi_device *spi, const char *data, int len) +{ + struct spi_transfer t; + struct spi_message m; + u8 msg[2]; + + if (len <= 0) + return 0; + + msg[0] = data[0]; + msg[1] = data[1]; + + spi_message_init(&m); + memset(&t, 0, (sizeof t)); + + t.tx_buf = &msg[0]; + t.len = len; + + spi_message_add_tail(&t, &m); + spi_sync(spi, &m); + + return len; +} + +static int __devinit wm8988_spi_probe(struct spi_device *spi) +{ + struct wm8988_priv *wm8988; + struct snd_soc_codec *codec; + + wm8988 = kzalloc(sizeof(struct wm8988_priv), GFP_KERNEL); + if (wm8988 == NULL) + return -ENOMEM; + + codec = &wm8988->codec; + codec->hw_write = (hw_write_t)wm8988_spi_write; + codec->control_data = spi; + codec->dev = &spi->dev; + + spi->dev.driver_data = wm8988; + + return wm8988_register(wm8988); +} + +static int __devexit wm8988_spi_remove(struct spi_device *spi) +{ + struct wm8988_priv *wm8988 = spi->dev.driver_data; + + wm8988_unregister(wm8988); + + return 0; +} + +static struct spi_driver wm8988_spi_driver = { + .driver = { + .name = "wm8988", + .bus = &spi_bus_type, + .owner = THIS_MODULE, + }, + .probe = wm8988_spi_probe, + .remove = __devexit_p(wm8988_spi_remove), +}; +#endif + +static int __init wm8988_modinit(void) +{ + int ret; + +#if defined(CONFIG_I2C) || defined(CONFIG_I2C_MODULE) + ret = i2c_add_driver(&wm8988_i2c_driver); + if (ret != 0) + pr_err("WM8988: Unable to register I2C driver: %d\n", ret); +#endif +#if defined(CONFIG_SPI_MASTER) + ret = spi_register_driver(&wm8988_spi_driver); + if (ret != 0) + pr_err("WM8988: Unable to register SPI driver: %d\n", ret); +#endif + return ret; +} +module_init(wm8988_modinit); + +static void __exit wm8988_exit(void) +{ +#if defined(CONFIG_I2C) || defined(CONFIG_I2C_MODULE) + i2c_del_driver(&wm8988_i2c_driver); +#endif +#if defined(CONFIG_SPI_MASTER) + spi_unregister_driver(&wm8988_spi_driver); +#endif +} +module_exit(wm8988_exit); + + +MODULE_DESCRIPTION("ASoC WM8988 driver"); +MODULE_AUTHOR("Mark Brown "); +MODULE_LICENSE("GPL"); diff --git a/sound/soc/codecs/wm8988.h b/sound/soc/codecs/wm8988.h new file mode 100644 index 0000000..4552d37 --- /dev/null +++ b/sound/soc/codecs/wm8988.h @@ -0,0 +1,60 @@ +/* + * Copyright 2005 Openedhand Ltd. + * + * Author: Richard Purdie + * + * Based on WM8753.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#ifndef _WM8988_H +#define _WM8988_H + +/* WM8988 register space */ + +#define WM8988_LINVOL 0x00 +#define WM8988_RINVOL 0x01 +#define WM8988_LOUT1V 0x02 +#define WM8988_ROUT1V 0x03 +#define WM8988_ADCDAC 0x05 +#define WM8988_IFACE 0x07 +#define WM8988_SRATE 0x08 +#define WM8988_LDAC 0x0a +#define WM8988_RDAC 0x0b +#define WM8988_BASS 0x0c +#define WM8988_TREBLE 0x0d +#define WM8988_RESET 0x0f +#define WM8988_3D 0x10 +#define WM8988_ALC1 0x11 +#define WM8988_ALC2 0x12 +#define WM8988_ALC3 0x13 +#define WM8988_NGATE 0x14 +#define WM8988_LADC 0x15 +#define WM8988_RADC 0x16 +#define WM8988_ADCTL1 0x17 +#define WM8988_ADCTL2 0x18 +#define WM8988_PWR1 0x19 +#define WM8988_PWR2 0x1a +#define WM8988_ADCTL3 0x1b +#define WM8988_ADCIN 0x1f +#define WM8988_LADCIN 0x20 +#define WM8988_RADCIN 0x21 +#define WM8988_LOUTM1 0x22 +#define WM8988_LOUTM2 0x23 +#define WM8988_ROUTM1 0x24 +#define WM8988_ROUTM2 0x25 +#define WM8988_LOUT2V 0x28 +#define WM8988_ROUT2V 0x29 +#define WM8988_LPPB 0x43 +#define WM8988_NUM_REG 0x44 + +#define WM8988_SYSCLK 0 + +extern struct snd_soc_dai wm8988_dai; +extern struct snd_soc_codec_device soc_codec_dev_wm8988; + +#endif -- cgit v0.10.2 From 169aafbc8d3f05431b5cfeb60294a12b8ef2bcee Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 7 Apr 2009 13:37:26 -0700 Subject: lguest: update lazy mmu changes to match lguest's use of kvm hypercalls Duplicate hcall -> kvm_hypercall0 convertion from "lguest: use KVM hypercalls". Signed-off-by: Jeremy Fitzhardinge Cc: Matias Zabaljauregui Cc: Rusty Russell diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 5ab2397..cfb2d68 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -168,7 +168,7 @@ static void lazy_hcall3(unsigned long call, * issue the do-nothing hypercall to flush any stored calls. */ static void lguest_leave_lazy_mmu_mode(void) { - hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0); + kvm_hypercall0(LHCALL_FLUSH_ASYNC); paravirt_leave_lazy_mmu(); } -- cgit v0.10.2 From 44bc9dc729e33a4ec6ebed4d0b6c08e8d20b42cf Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Apr 2009 10:47:17 +0200 Subject: mm, x86, ptrace, bts: defer branch trace stopping, cleanup Andrew Morton noticed that mm.h needlessly includes sched.h - remove it. Reported-by: Andrew Morton Signed-off-by: Ingo Molnar diff --git a/include/linux/mm.h b/include/linux/mm.h index 64d8ed2..776b641 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -13,7 +13,6 @@ #include #include #include -#include struct mempolicy; struct anon_vma; -- cgit v0.10.2 From a34b50ddc265bae058c66661b096ef6384c5a8b1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Apr 2009 10:56:54 +0200 Subject: mm, x86, ptrace, bts: defer branch trace stopping, remove dead code Remove the unused free_locked_buffer() API. Signed-off-by: Ingo Molnar diff --git a/include/linux/mm.h b/include/linux/mm.h index 776b641..a3963ba 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1320,7 +1320,6 @@ int vmemmap_populate(struct page *start_page, unsigned long pages, int node); void vmemmap_populate_print_last(void); extern void *alloc_locked_buffer(size_t size); -extern void free_locked_buffer(void *buffer, size_t size); extern void refund_locked_buffer_memory(struct mm_struct *mm, size_t size); #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/mm/mlock.c b/mm/mlock.c index 749383b..28be15e 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -671,9 +671,3 @@ void refund_locked_buffer_memory(struct mm_struct *mm, size_t size) up_write(&mm->mmap_sem); } - -void free_locked_buffer(void *buffer, size_t size) -{ - refund_locked_buffer_memory(current->mm, size); - kfree(buffer); -} -- cgit v0.10.2 From dc66270b51a62b1a6888d5309229e638a305c47b Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 8 Apr 2009 20:30:10 +1000 Subject: perf_counter: fix powerpc build Commit 4af4998b ("perf_counter: rework context time") changed struct perf_counter_context to have a 'time' field instead of a 'time_now' field, but neglected to fix the place in the powerpc perf_counter.c where the time_now field was accessed. This fixes it. Signed-off-by: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <18908.31922.411398.147810@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index f88c35d..0e56513 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -457,8 +457,7 @@ static void counter_sched_in(struct perf_counter *counter, int cpu) { counter->state = PERF_COUNTER_STATE_ACTIVE; counter->oncpu = cpu; - counter->tstamp_running += counter->ctx->time_now - - counter->tstamp_stopped; + counter->tstamp_running += counter->ctx->time - counter->tstamp_stopped; if (is_software_counter(counter)) counter->hw_ops->enable(counter); } -- cgit v0.10.2 From f708223d49ac39f5af1643985056206c98033f5b Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 8 Apr 2009 20:30:18 +1000 Subject: perf_counter: powerpc: set sample enable bit for marked instruction events Impact: enable access to hardware feature POWER processors have the ability to "mark" a subset of the instructions and provide more detailed information on what happens to the marked instructions as they flow through the pipeline. This marking is enabled by the "sample enable" bit in MMCRA, and there are synchronization requirements around setting and clearing the bit. This adds logic to the processor-specific back-ends so that they know which events relate to marked instructions and set the sampling enable bit if any event that we want to put on the PMU is a marked instruction event. It also adds logic to the generic powerpc code to do the necessary synchronization if that bit is set. Signed-off-by: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <18908.31930.1024.228867@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 0e56513..0697ade 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -307,6 +307,15 @@ u64 hw_perf_save_disable(void) } /* + * Disable instruction sampling if it was enabled + */ + if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { + mtspr(SPRN_MMCRA, + cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); + mb(); + } + + /* * Set the 'freeze counters' bit. * The barrier is to make sure the mtspr has been * executed and the PMU has frozen the counters @@ -347,12 +356,11 @@ void hw_perf_restore(u64 disable) * (possibly updated for removal of counters). */ if (!cpuhw->n_added) { - mtspr(SPRN_MMCRA, cpuhw->mmcr[2]); + mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); - mtspr(SPRN_MMCR0, cpuhw->mmcr[0]); if (cpuhw->n_counters == 0) get_lppaca()->pmcregs_in_use = 0; - goto out; + goto out_enable; } /* @@ -385,7 +393,7 @@ void hw_perf_restore(u64 disable) * Then unfreeze the counters. */ get_lppaca()->pmcregs_in_use = 1; - mtspr(SPRN_MMCRA, cpuhw->mmcr[2]); + mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) | MMCR0_FC); @@ -421,10 +429,20 @@ void hw_perf_restore(u64 disable) write_pmc(counter->hw.idx, val); perf_counter_update_userpage(counter); } - mb(); cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; + + out_enable: + mb(); mtspr(SPRN_MMCR0, cpuhw->mmcr[0]); + /* + * Enable instruction sampling if necessary + */ + if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { + mb(); + mtspr(SPRN_MMCRA, cpuhw->mmcr[2]); + } + out: local_irq_restore(flags); } diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c index cec21ea..1222c8e 100644 --- a/arch/powerpc/kernel/power5+-pmu.c +++ b/arch/powerpc/kernel/power5+-pmu.c @@ -1,5 +1,5 @@ /* - * Performance counter support for POWER5 (not POWER5++) processors. + * Performance counter support for POWER5+/++ (not POWER5) processors. * * Copyright 2009 Paul Mackerras, IBM Corporation. * @@ -281,10 +281,107 @@ static int power5p_get_alternatives(unsigned int event, unsigned int alt[]) return nalt; } +/* + * Map of which direct events on which PMCs are marked instruction events. + * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event. + * Bit 0 is set if it is marked for all PMCs. + * The 0x80 bit indicates a byte decode PMCSEL value. + */ +static unsigned char direct_event_is_marked[0x28] = { + 0, /* 00 */ + 0x1f, /* 01 PM_IOPS_CMPL */ + 0x2, /* 02 PM_MRK_GRP_DISP */ + 0xe, /* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */ + 0, /* 04 */ + 0x1c, /* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */ + 0x80, /* 06 */ + 0x80, /* 07 */ + 0, 0, 0,/* 08 - 0a */ + 0x18, /* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */ + 0, /* 0c */ + 0x80, /* 0d */ + 0x80, /* 0e */ + 0, /* 0f */ + 0, /* 10 */ + 0x14, /* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */ + 0, /* 12 */ + 0x10, /* 13 PM_MRK_GRP_CMPL */ + 0x1f, /* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */ + 0x2, /* 15 PM_MRK_GRP_ISSUED */ + 0x80, /* 16 */ + 0x80, /* 17 */ + 0, 0, 0, 0, 0, + 0x80, /* 1d */ + 0x80, /* 1e */ + 0, /* 1f */ + 0x80, /* 20 */ + 0x80, /* 21 */ + 0x80, /* 22 */ + 0x80, /* 23 */ + 0x80, /* 24 */ + 0x80, /* 25 */ + 0x80, /* 26 */ + 0x80, /* 27 */ +}; + +/* + * Returns 1 if event counts things relating to marked instructions + * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. + */ +static int power5p_marked_instr_event(unsigned int event) +{ + int pmc, psel; + int bit, byte, unit; + u32 mask; + + pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; + psel = event & PM_PMCSEL_MSK; + if (pmc >= 5) + return 0; + + bit = -1; + if (psel < sizeof(direct_event_is_marked)) { + if (direct_event_is_marked[psel] & (1 << pmc)) + return 1; + if (direct_event_is_marked[psel] & 0x80) + bit = 4; + else if (psel == 0x08) + bit = pmc - 1; + else if (psel == 0x10) + bit = 4 - pmc; + else if (psel == 0x1b && (pmc == 1 || pmc == 3)) + bit = 4; + } else if ((psel & 0x48) == 0x40) { + bit = psel & 7; + } else if (psel == 0x28) { + bit = pmc - 1; + } else if (pmc == 3 && (psel == 0x2e || psel == 0x2f)) { + bit = 4; + } + + if (!(event & PM_BUSEVENT_MSK) || bit == -1) + return 0; + + byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; + unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; + if (unit == PM_LSU0) { + /* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */ + mask = 0x5dff00; + } else if (unit == PM_LSU1 && byte >= 4) { + byte -= 4; + /* byte 5 bits 6-7, byte 6 bits 0,4, byte 7 bits 0-4,6 */ + mask = 0x5f11c000; + } else + return 0; + + return (mask >> (byte * 8 + bit)) & 1; +} + static int power5p_compute_mmcr(unsigned int event[], int n_ev, unsigned int hwc[], u64 mmcr[]) { u64 mmcr1 = 0; + u64 mmcra = 0; unsigned int pmc, unit, byte, psel; unsigned int ttm; int i, isbus, bit, grsel; @@ -404,6 +501,8 @@ static int power5p_compute_mmcr(unsigned int event[], int n_ev, grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK; mmcr1 |= (u64)grsel << grsel_shift[bit]; } + if (power5p_marked_instr_event(event[i])) + mmcra |= MMCRA_SAMPLE_ENABLE; if ((psel & 0x58) == 0x40 && (byte & 1) != ((pmc >> 1) & 1)) /* select alternate byte lane */ psel |= 0x10; @@ -419,7 +518,7 @@ static int power5p_compute_mmcr(unsigned int event[], int n_ev, if (pmc_inuse & 0x3e) mmcr[0] |= MMCR0_PMCjCE; mmcr[1] = mmcr1; - mmcr[2] = 0; + mmcr[2] = mmcra; return 0; } diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c index 379ed10..116c4bb 100644 --- a/arch/powerpc/kernel/power5-pmu.c +++ b/arch/powerpc/kernel/power5-pmu.c @@ -290,10 +290,102 @@ static int power5_get_alternatives(unsigned int event, unsigned int alt[]) return nalt; } +/* + * Map of which direct events on which PMCs are marked instruction events. + * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event. + * Bit 0 is set if it is marked for all PMCs. + * The 0x80 bit indicates a byte decode PMCSEL value. + */ +static unsigned char direct_event_is_marked[0x28] = { + 0, /* 00 */ + 0x1f, /* 01 PM_IOPS_CMPL */ + 0x2, /* 02 PM_MRK_GRP_DISP */ + 0xe, /* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */ + 0, /* 04 */ + 0x1c, /* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */ + 0x80, /* 06 */ + 0x80, /* 07 */ + 0, 0, 0,/* 08 - 0a */ + 0x18, /* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */ + 0, /* 0c */ + 0x80, /* 0d */ + 0x80, /* 0e */ + 0, /* 0f */ + 0, /* 10 */ + 0x14, /* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */ + 0, /* 12 */ + 0x10, /* 13 PM_MRK_GRP_CMPL */ + 0x1f, /* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */ + 0x2, /* 15 PM_MRK_GRP_ISSUED */ + 0x80, /* 16 */ + 0x80, /* 17 */ + 0, 0, 0, 0, 0, + 0x80, /* 1d */ + 0x80, /* 1e */ + 0, /* 1f */ + 0x80, /* 20 */ + 0x80, /* 21 */ + 0x80, /* 22 */ + 0x80, /* 23 */ + 0x80, /* 24 */ + 0x80, /* 25 */ + 0x80, /* 26 */ + 0x80, /* 27 */ +}; + +/* + * Returns 1 if event counts things relating to marked instructions + * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. + */ +static int power5_marked_instr_event(unsigned int event) +{ + int pmc, psel; + int bit, byte, unit; + u32 mask; + + pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; + psel = event & PM_PMCSEL_MSK; + if (pmc >= 5) + return 0; + + bit = -1; + if (psel < sizeof(direct_event_is_marked)) { + if (direct_event_is_marked[psel] & (1 << pmc)) + return 1; + if (direct_event_is_marked[psel] & 0x80) + bit = 4; + else if (psel == 0x08) + bit = pmc - 1; + else if (psel == 0x10) + bit = 4 - pmc; + else if (psel == 0x1b && (pmc == 1 || pmc == 3)) + bit = 4; + } else if ((psel & 0x58) == 0x40) + bit = psel & 7; + + if (!(event & PM_BUSEVENT_MSK)) + return 0; + + byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; + unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; + if (unit == PM_LSU0) { + /* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */ + mask = 0x5dff00; + } else if (unit == PM_LSU1 && byte >= 4) { + byte -= 4; + /* byte 4 bits 1,3,5,7, byte 5 bits 6-7, byte 7 bits 0-4,6 */ + mask = 0x5f00c0aa; + } else + return 0; + + return (mask >> (byte * 8 + bit)) & 1; +} + static int power5_compute_mmcr(unsigned int event[], int n_ev, unsigned int hwc[], u64 mmcr[]) { u64 mmcr1 = 0; + u64 mmcra = 0; unsigned int pmc, unit, byte, psel; unsigned int ttm, grp; int i, isbus, bit, grsel; @@ -430,6 +522,8 @@ static int power5_compute_mmcr(unsigned int event[], int n_ev, grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK; mmcr1 |= (u64)grsel << grsel_shift[bit]; } + if (power5_marked_instr_event(event[i])) + mmcra |= MMCRA_SAMPLE_ENABLE; if (pmc <= 3) mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc); hwc[i] = pmc; @@ -442,7 +536,7 @@ static int power5_compute_mmcr(unsigned int event[], int n_ev, if (pmc_inuse & 0x3e) mmcr[0] |= MMCR0_PMCjCE; mmcr[1] = mmcr1; - mmcr[2] = 0; + mmcr[2] = mmcra; return 0; } diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c index b1f61f3..fce1fc2 100644 --- a/arch/powerpc/kernel/power6-pmu.c +++ b/arch/powerpc/kernel/power6-pmu.c @@ -49,12 +49,134 @@ #define MMCR1_PMCSEL_MSK 0xff /* + * Map of which direct events on which PMCs are marked instruction events. + * Indexed by PMCSEL value >> 1. + * Bottom 4 bits are a map of which PMCs are interesting, + * top 4 bits say what sort of event: + * 0 = direct marked event, + * 1 = byte decode event, + * 4 = add/and event (PMC1 -> bits 0 & 4), + * 5 = add/and event (PMC1 -> bits 1 & 5), + * 6 = add/and event (PMC1 -> bits 2 & 6), + * 7 = add/and event (PMC1 -> bits 3 & 7). + */ +static unsigned char direct_event_is_marked[0x60 >> 1] = { + 0, /* 00 */ + 0, /* 02 */ + 0, /* 04 */ + 0x07, /* 06 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */ + 0x04, /* 08 PM_MRK_DFU_FIN */ + 0x06, /* 0a PM_MRK_IFU_FIN, PM_MRK_INST_FIN */ + 0, /* 0c */ + 0, /* 0e */ + 0x02, /* 10 PM_MRK_INST_DISP */ + 0x08, /* 12 PM_MRK_LSU_DERAT_MISS */ + 0, /* 14 */ + 0, /* 16 */ + 0x0c, /* 18 PM_THRESH_TIMEO, PM_MRK_INST_FIN */ + 0x0f, /* 1a PM_MRK_INST_DISP, PM_MRK_{FXU,FPU,LSU}_FIN */ + 0x01, /* 1c PM_MRK_INST_ISSUED */ + 0, /* 1e */ + 0, /* 20 */ + 0, /* 22 */ + 0, /* 24 */ + 0, /* 26 */ + 0x15, /* 28 PM_MRK_DATA_FROM_L2MISS, PM_MRK_DATA_FROM_L3MISS */ + 0, /* 2a */ + 0, /* 2c */ + 0, /* 2e */ + 0x4f, /* 30 */ + 0x7f, /* 32 */ + 0x4f, /* 34 */ + 0x5f, /* 36 */ + 0x6f, /* 38 */ + 0x4f, /* 3a */ + 0, /* 3c */ + 0x08, /* 3e PM_MRK_INST_TIMEO */ + 0x1f, /* 40 */ + 0x1f, /* 42 */ + 0x1f, /* 44 */ + 0x1f, /* 46 */ + 0x1f, /* 48 */ + 0x1f, /* 4a */ + 0x1f, /* 4c */ + 0x1f, /* 4e */ + 0, /* 50 */ + 0x05, /* 52 PM_MRK_BR_TAKEN, PM_MRK_BR_MPRED */ + 0x1c, /* 54 PM_MRK_PTEG_FROM_L3MISS, PM_MRK_PTEG_FROM_L2MISS */ + 0x02, /* 56 PM_MRK_LD_MISS_L1 */ + 0, /* 58 */ + 0, /* 5a */ + 0, /* 5c */ + 0, /* 5e */ +}; + +/* + * Masks showing for each unit which bits are marked events. + * These masks are in LE order, i.e. 0x00000001 is byte 0, bit 0. + */ +static u32 marked_bus_events[16] = { + 0x01000000, /* direct events set 1: byte 3 bit 0 */ + 0x00010000, /* direct events set 2: byte 2 bit 0 */ + 0, 0, 0, 0, /* IDU, IFU, nest: nothing */ + 0x00000088, /* VMX set 1: byte 0 bits 3, 7 */ + 0x000000c0, /* VMX set 2: byte 0 bits 4-7 */ + 0x04010000, /* LSU set 1: byte 2 bit 0, byte 3 bit 2 */ + 0xff010000u, /* LSU set 2: byte 2 bit 0, all of byte 3 */ + 0, /* LSU set 3 */ + 0x00000010, /* VMX set 3: byte 0 bit 4 */ + 0, /* BFP set 1 */ + 0x00000022, /* BFP set 2: byte 0 bits 1, 5 */ + 0, 0 +}; + +/* + * Returns 1 if event counts things relating to marked instructions + * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. + */ +static int power6_marked_instr_event(unsigned int event) +{ + int pmc, psel, ptype; + int bit, byte, unit; + u32 mask; + + pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; + psel = (event & PM_PMCSEL_MSK) >> 1; /* drop edge/level bit */ + if (pmc >= 5) + return 0; + + bit = -1; + if (psel < sizeof(direct_event_is_marked)) { + ptype = direct_event_is_marked[psel]; + if (pmc == 0 || !(ptype & (1 << (pmc - 1)))) + return 0; + ptype >>= 4; + if (ptype == 0) + return 1; + if (ptype == 1) + bit = 0; + else + bit = ptype ^ (pmc - 1); + } else if ((psel & 0x48) == 0x40) + bit = psel & 7; + + if (!(event & PM_BUSEVENT_MSK) || bit == -1) + return 0; + + byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; + unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; + mask = marked_bus_events[unit]; + return (mask >> (byte * 8 + bit)) & 1; +} + +/* * Assign PMC numbers and compute MMCR1 value for a set of events */ static int p6_compute_mmcr(unsigned int event[], int n_ev, unsigned int hwc[], u64 mmcr[]) { u64 mmcr1 = 0; + u64 mmcra = 0; int i; unsigned int pmc, ev, b, u, s, psel; unsigned int ttmset = 0; @@ -116,6 +238,8 @@ static int p6_compute_mmcr(unsigned int event[], int n_ev, if (ev & PM_LLAV) mmcr1 |= MMCR1_PMC1_LLA_VALUE >> pmc; } + if (power6_marked_instr_event(event[i])) + mmcra |= MMCRA_SAMPLE_ENABLE; mmcr1 |= (u64)psel << MMCR1_PMCSEL_SH(pmc); } mmcr[0] = 0; @@ -124,7 +248,7 @@ static int p6_compute_mmcr(unsigned int event[], int n_ev, if (pmc_inuse & 0xe) mmcr[0] |= MMCR0_PMCjCE; mmcr[1] = mmcr1; - mmcr[2] = 0; + mmcr[2] = mmcra; return 0; } diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c index c325658..aed8ccd 100644 --- a/arch/powerpc/kernel/ppc970-pmu.c +++ b/arch/powerpc/kernel/ppc970-pmu.c @@ -19,6 +19,8 @@ #define PM_PMC_MSK 0xf #define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */ #define PM_UNIT_MSK 0xf +#define PM_SPCSEL_SH 6 +#define PM_SPCSEL_MSK 3 #define PM_BYTE_SH 4 /* Byte number of event bus to use */ #define PM_BYTE_MSK 3 #define PM_PMCSEL_MSK 0xf @@ -88,8 +90,11 @@ static short mmcr1_adder_bits[8] = { * Layout of constraint bits: * 6666555555555544444444443333333333222222222211111111110000000000 * 3210987654321098765432109876543210987654321098765432109876543210 - * <><>[ >[ >[ >< >< >< >< ><><><><><><><><> - * T0T1 UC PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8 + * <><><>[ >[ >[ >< >< >< >< ><><><><><><><><> + * SPT0T1 UC PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8 + * + * SP - SPCSEL constraint + * 48-49: SPCSEL value 0x3_0000_0000_0000 * * T0 - TTM0 constraint * 46-47: TTM0SEL value (0=FPU, 2=IFU, 3=VPU) 0xC000_0000_0000 @@ -126,6 +131,57 @@ static short mmcr1_adder_bits[8] = { * 0-13: Count of events needing PMC2..PMC8 */ +static unsigned char direct_marked_event[8] = { + (1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */ + (1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */ + (1<<3) | (1<<5), /* PMC3: PM_MRK_ST_CMPL_INT, PM_MRK_VMX_FIN */ + (1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */ + (1<<4) | (1<<5), /* PMC5: PM_GRP_MRK, PM_MRK_GRP_TIMEO */ + (1<<3) | (1<<4) | (1<<5), + /* PMC6: PM_MRK_ST_STS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */ + (1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */ + (1<<4) /* PMC8: PM_MRK_LSU_FIN */ +}; + +/* + * Returns 1 if event counts things relating to marked instructions + * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. + */ +static int p970_marked_instr_event(unsigned int event) +{ + int pmc, psel, unit, byte, bit; + unsigned int mask; + + pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; + psel = event & PM_PMCSEL_MSK; + if (pmc) { + if (direct_marked_event[pmc - 1] & (1 << psel)) + return 1; + if (psel == 0) /* add events */ + bit = (pmc <= 4)? pmc - 1: 8 - pmc; + else if (psel == 7 || psel == 13) /* decode events */ + bit = 4; + else + return 0; + } else + bit = psel; + + byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; + unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; + mask = 0; + switch (unit) { + case PM_VPU: + mask = 0x4c; /* byte 0 bits 2,3,6 */ + case PM_LSU0: + /* byte 2 bits 0,2,3,4,6; all of byte 1 */ + mask = 0x085dff00; + case PM_LSU1L: + mask = 0x50 << 24; /* byte 3 bits 4,6 */ + break; + } + return (mask >> (byte * 8 + bit)) & 1; +} + /* Masks and values for using events from the various units */ static u64 unit_cons[PM_LASTUNIT+1][2] = { [PM_FPU] = { 0xc80000000000ull, 0x040000000000ull }, @@ -138,7 +194,7 @@ static u64 unit_cons[PM_LASTUNIT+1][2] = { static int p970_get_constraint(unsigned int event, u64 *maskp, u64 *valp) { - int pmc, byte, unit, sh; + int pmc, byte, unit, sh, spcsel; u64 mask = 0, value = 0; int grp = -1; @@ -177,6 +233,11 @@ static int p970_get_constraint(unsigned int event, u64 *maskp, u64 *valp) mask |= 0x800000000ull; value |= 0x100000000ull; } + spcsel = (event >> PM_SPCSEL_SH) & PM_SPCSEL_MSK; + if (spcsel) { + mask |= 3ull << 48; + value |= (u64)spcsel << 48; + } *maskp = mask; *valp = value; return 0; @@ -209,6 +270,7 @@ static int p970_compute_mmcr(unsigned int event[], int n_ev, unsigned char ttmuse[2]; unsigned char pmcsel[8]; int i; + int spcsel; if (n_ev > 8) return -1; @@ -316,6 +378,10 @@ static int p970_compute_mmcr(unsigned int event[], int n_ev, } pmcsel[pmc] = psel; hwc[i] = pmc; + spcsel = (event[i] >> PM_SPCSEL_SH) & PM_SPCSEL_MSK; + mmcr1 |= spcsel; + if (p970_marked_instr_event(event[i])) + mmcra |= MMCRA_SAMPLE_ENABLE; } for (pmc = 0; pmc < 2; ++pmc) mmcr0 |= pmcsel[pmc] << (MMCR0_PMC1SEL_SH - 7 * pmc); -- cgit v0.10.2 From bab5bc9e857638880facef76e4b4c3fa807f8c73 Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Tue, 7 Apr 2009 23:23:50 -0700 Subject: futex: fixup unlocked requeue pi case Thomas's testing caught a problem when the requeue target futex is unowned and multiple tasks are requeued to it. This patch ensures the FUTEX_WAITERS bit gets set if futex_requeue() will requeue one or more tasks in addition to the one acquiring the lock. Signed-off-by: Darren Hart Signed-off-by: Thomas Gleixner diff --git a/kernel/futex.c b/kernel/futex.c index 185c981..041bf3a 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -565,12 +565,14 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, /** * futex_lock_pi_atomic() - atomic work required to acquire a pi aware futex - * @uaddr: the pi futex user address - * @hb: the pi futex hash bucket - * @key: the futex key associated with uaddr and hb - * @ps: the pi_state pointer where we store the result of the lookup - * @task: the task to perform the atomic lock work for. This will be - * "current" except in the case of requeue pi. + * @uaddr: the pi futex user address + * @hb: the pi futex hash bucket + * @key: the futex key associated with uaddr and hb + * @ps: the pi_state pointer where we store the result of the + * lookup + * @task: the task to perform the atomic lock work for. This will + * be "current" except in the case of requeue pi. + * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0) * * Returns: * 0 - ready to wait @@ -582,7 +584,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, union futex_key *key, struct futex_pi_state **ps, - struct task_struct *task) + struct task_struct *task, int set_waiters) { int lock_taken, ret, ownerdied = 0; u32 uval, newval, curval; @@ -596,6 +598,8 @@ retry: * the locks. It will most likely not succeed. */ newval = task_pid_vnr(task); + if (set_waiters) + newval |= FUTEX_WAITERS; curval = cmpxchg_futex_value_locked(uaddr, 0, newval); @@ -1004,14 +1008,18 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key) /** * futex_proxy_trylock_atomic() - Attempt an atomic lock for the top waiter - * @pifutex: the user address of the to futex - * @hb1: the from futex hash bucket, must be locked by the caller - * @hb2: the to futex hash bucket, must be locked by the caller - * @key1: the from futex key - * @key2: the to futex key + * @pifutex: the user address of the to futex + * @hb1: the from futex hash bucket, must be locked by the caller + * @hb2: the to futex hash bucket, must be locked by the caller + * @key1: the from futex key + * @key2: the to futex key + * @ps: address to store the pi_state pointer + * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0) * * Try and get the lock on behalf of the top waiter if we can do it atomically. - * Wake the top waiter if we succeed. hb1 and hb2 must be held by the caller. + * Wake the top waiter if we succeed. If the caller specified set_waiters, + * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit. + * hb1 and hb2 must be held by the caller. * * Returns: * 0 - failed to acquire the lock atomicly @@ -1022,15 +1030,23 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2, union futex_key *key1, union futex_key *key2, - struct futex_pi_state **ps) + struct futex_pi_state **ps, int set_waiters) { - struct futex_q *top_waiter; + struct futex_q *top_waiter = NULL; u32 curval; int ret; if (get_futex_value_locked(&curval, pifutex)) return -EFAULT; + /* + * Find the top_waiter and determine if there are additional waiters. + * If the caller intends to requeue more than 1 waiter to pifutex, + * force futex_lock_pi_atomic() to set the FUTEX_WAITERS bit now, + * as we have means to handle the possible fault. If not, don't set + * the bit unecessarily as it will force the subsequent unlock to enter + * the kernel. + */ top_waiter = futex_top_waiter(hb1, key1); /* There are no waiters, nothing for us to do. */ @@ -1038,10 +1054,12 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, return 0; /* - * Either take the lock for top_waiter or set the FUTEX_WAITERS bit. - * The pi_state is returned in ps in contended cases. + * Try to take the lock for top_waiter. Set the FUTEX_WAITERS bit in + * the contended case or if set_waiters is 1. The pi_state is returned + * in ps in contended cases. */ - ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task); + ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, + set_waiters); if (ret == 1) requeue_pi_wake_futex(top_waiter, key2); @@ -1146,9 +1164,14 @@ retry_private: } if (requeue_pi && (task_count - nr_wake < nr_requeue)) { - /* Attempt to acquire uaddr2 and wake the top_waiter. */ + /* + * Attempt to acquire uaddr2 and wake the top waiter. If we + * intend to requeue waiters, force setting the FUTEX_WAITERS + * bit. We force this here where we are able to easily handle + * faults rather in the requeue loop below. + */ ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1, - &key2, &pi_state); + &key2, &pi_state, nr_requeue); /* * At this point the top_waiter has either taken uaddr2 or is @@ -1810,7 +1833,7 @@ retry: retry_private: hb = queue_lock(&q); - ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current); + ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0); if (unlikely(ret)) { switch (ret) { case 1: -- cgit v0.10.2 From fcb2ac5bdfa3a7a04fb9749b916f64400f4c35a8 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 8 Apr 2009 13:31:58 +0200 Subject: x86_32: introduce restore_fpu_checking() Impact: cleanup, prepare FPU code unificaton Like on x86_64, return an error from restore_fpu and kill the task if it fails. Also rename restore_fpu to restore_fpu_checking which allows ifdefs to be removed in math_state_restore(). Signed-off-by: Jiri Slaby LKML-Reference: <1239190320-23952-1-git-send-email-jirislaby@gmail.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 71c9e51..09a2d6d 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -185,12 +185,10 @@ static inline void tolerant_fwait(void) asm volatile("fnclex ; fwait"); } -static inline void restore_fpu(struct task_struct *tsk) +static inline int restore_fpu_checking(struct task_struct *tsk) { - if (task_thread_info(tsk)->status & TS_XSAVE) { - xrstor_checking(&tsk->thread.xstate->xsave); - return; - } + if (task_thread_info(tsk)->status & TS_XSAVE) + return xrstor_checking(&tsk->thread.xstate->xsave); /* * The "nop" is needed to make the instructions the same * length. @@ -200,6 +198,7 @@ static inline void restore_fpu(struct task_struct *tsk) "fxrstor %1", X86_FEATURE_FXSR, "m" (tsk->thread.xstate->fxsave)); + return 0; } /* We need a safe address that is cheap to find and that is already diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index a1d2883..d696145 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -839,9 +839,6 @@ asmlinkage void math_state_restore(void) } clts(); /* Allow maths ops (or we recurse) */ -#ifdef CONFIG_X86_32 - restore_fpu(tsk); -#else /* * Paranoid restore. send a SIGSEGV if we fail to restore the state. */ @@ -850,7 +847,7 @@ asmlinkage void math_state_restore(void) force_sig(SIGSEGV, tsk); return; } -#endif + thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ tsk->fpu_counter++; } -- cgit v0.10.2 From 34ba476a01e128aad51e02f9be854584e9ec73cf Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 8 Apr 2009 13:31:59 +0200 Subject: x86: unify restore_fpu_checking Impact: cleanup On x86_32, separate f*rstor to an inline function which makes restore_fpu_checking the same on both platforms -> move it outside the ifdefs. Signed-off-by: Jiri Slaby LKML-Reference: <1239190320-23952-2-git-send-email-jirislaby@gmail.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 09a2d6d..7a6f21d 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -75,14 +75,6 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) return err; } -static inline int restore_fpu_checking(struct task_struct *tsk) -{ - if (task_thread_info(tsk)->status & TS_XSAVE) - return xrstor_checking(&tsk->thread.xstate->xsave); - else - return fxrstor_checking(&tsk->thread.xstate->fxsave); -} - /* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception is pending. Clear the x87 state here by setting it to fixed values. The kernel data segment can be sometimes 0 and sometimes @@ -185,10 +177,9 @@ static inline void tolerant_fwait(void) asm volatile("fnclex ; fwait"); } -static inline int restore_fpu_checking(struct task_struct *tsk) +/* perform fxrstor iff the processor has extended states, otherwise frstor */ +static inline int fxrstor_checking(struct i387_fxsave_struct *fx) { - if (task_thread_info(tsk)->status & TS_XSAVE) - return xrstor_checking(&tsk->thread.xstate->xsave); /* * The "nop" is needed to make the instructions the same * length. @@ -197,7 +188,8 @@ static inline int restore_fpu_checking(struct task_struct *tsk) "nop ; frstor %1", "fxrstor %1", X86_FEATURE_FXSR, - "m" (tsk->thread.xstate->fxsave)); + "m" (*fx)); + return 0; } @@ -261,6 +253,14 @@ end: #endif /* CONFIG_X86_64 */ +static inline int restore_fpu_checking(struct task_struct *tsk) +{ + if (task_thread_info(tsk)->status & TS_XSAVE) + return xrstor_checking(&tsk->thread.xstate->xsave); + else + return fxrstor_checking(&tsk->thread.xstate->fxsave); +} + /* * Signal frame handlers... */ -- cgit v0.10.2 From 4ecf458492c2d97b3f9d850a5f92d79792e0a7e7 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 8 Apr 2009 13:32:00 +0200 Subject: x86_64: fix incorrect comments Impact: cleanup The comments which fxrstor_checking and fxsave_uset refer to is now in fxsave. Change the comments appropriately. Signed-off-by: Jiri Slaby Cc: Jiri Slaby LKML-Reference: <1239190320-23952-3-git-send-email-jirislaby@gmail.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 7a6f21d..63d1850 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -67,7 +67,7 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) ".previous\n" _ASM_EXTABLE(1b, 3b) : [err] "=r" (err) -#if 0 /* See comment in __save_init_fpu() below. */ +#if 0 /* See comment in fxsave() below. */ : [fx] "r" (fx), "m" (*fx), "0" (0)); #else : [fx] "cdaSDb" (fx), "m" (*fx), "0" (0)); @@ -112,7 +112,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx) ".previous\n" _ASM_EXTABLE(1b, 3b) : [err] "=r" (err), "=m" (*fx) -#if 0 /* See comment in __fxsave_clear() below. */ +#if 0 /* See comment in fxsave() below. */ : [fx] "r" (fx), "0" (0)); #else : [fx] "cdaSDb" (fx), "0" (0)); -- cgit v0.10.2 From a59dacfdc9ba06903652fa4883bf1106278b18ec Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 17 Oct 2008 14:38:08 +0200 Subject: x86 early quirks: eliminate unused function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: cleanup this warning: arch/x86/kernel/early-quirks.c:99: warning: ‘ati_ixp4x0_rev’ defined but not used triggers because ati_ixp4x0_rev() is only used in the ACPI && X86_IO_APIC case. Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 76b8cd9..ebdb85c 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -97,6 +97,7 @@ static void __init nvidia_bugs(int num, int slot, int func) } #if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC) +#if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC) static u32 __init ati_ixp4x0_rev(int num, int slot, int func) { u32 d; @@ -114,6 +115,7 @@ static u32 __init ati_ixp4x0_rev(int num, int slot, int func) d &= 0xff; return d; } +#endif static void __init ati_bugs(int num, int slot, int func) { -- cgit v0.10.2 From cdc1cb0d4445f39561a65204d26f89365f917550 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 3 Apr 2009 17:15:14 -0700 Subject: x86: make wakeup_secondary_cpu_via_init static Impact: cleanup Signed-off-by: Yinghai Lu LKML-Reference: <49D6A692.6040400@kernel.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 58d24ef..bddf2cc 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -538,7 +538,7 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) return (send_status | accept_status); } -int __devinit +static int __devinit wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) { unsigned long send_status, accept_status = 0; -- cgit v0.10.2 From 02421f98ec55c3ff118f358740ff640f096c7ad6 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 3 Apr 2009 17:15:53 -0700 Subject: x86: consistent about warm_reset_vector for UN_NON_UNIQUE_APIC Impact: cleanup didn't set it for UV_NON_UNIQUE_APIC, so don't restore it Signed-off-by: Yinghai Lu LKML-Reference: <49D6A6B9.6060501@kernel.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index bddf2cc..bf8ad63 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -822,10 +822,12 @@ do_rest: /* mark "stuck" area as not stuck */ *((volatile unsigned long *)trampoline_base) = 0; - /* - * Cleanup possible dangling ends... - */ - smpboot_restore_warm_reset_vector(); + if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { + /* + * Cleanup possible dangling ends... + */ + smpboot_restore_warm_reset_vector(); + } return boot_error; } -- cgit v0.10.2 From ceb5ac3264686e75e6951de6a18d4baa9bdecb92 Mon Sep 17 00:00:00 2001 From: Becky Bruce Date: Wed, 8 Apr 2009 09:09:15 -0500 Subject: swiotlb: comment corrections Impact: cleanup swiotlb_map/unmap_single are now swiotlb_map/unmap_page; trivially change all the comments to reference new names. Also, there were some comments that should have been referring to just plain old map_single, not swiotlb_map_single; fix those as well. Also change a use of the word "pointer", when what is referred to is actually a dma/physical address. Signed-off-by: Becky Bruce Acked-by: FUJITA Tomonori Signed-off-by: Kumar Gala Cc: jeremy@goop.org Cc: ian.campbell@citrix.com LKML-Reference: <1239199761-22886-2-git-send-email-galak@kernel.crashing.org> Signed-off-by: Ingo Molnar diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 2b0b5a7..170cf56 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -60,8 +60,8 @@ enum dma_sync_target { int swiotlb_force; /* - * Used to do a quick range check in swiotlb_unmap_single and - * swiotlb_sync_single_*, to see if the memory was in fact allocated by this + * Used to do a quick range check in unmap_single and + * sync_single_*, to see if the memory was in fact allocated by this * API. */ static char *io_tlb_start, *io_tlb_end; @@ -560,7 +560,6 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, size)) { /* * The allocated memory isn't reachable by the device. - * Fall back on swiotlb_map_single(). */ free_pages((unsigned long) ret, order); ret = NULL; @@ -568,9 +567,8 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, if (!ret) { /* * We are either out of memory or the device can't DMA - * to GFP_DMA memory; fall back on - * swiotlb_map_single(), which will grab memory from - * the lowest available address range. + * to GFP_DMA memory; fall back on map_single(), which + * will grab memory from the lowest available address range. */ ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE); if (!ret) @@ -634,7 +632,7 @@ swiotlb_full(struct device *dev, size_t size, int dir, int do_panic) * physical address to use is returned. * * Once the device is given the dma address, the device owns this memory until - * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed. + * either swiotlb_unmap_page or swiotlb_dma_sync_single is performed. */ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, @@ -648,7 +646,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, BUG_ON(dir == DMA_NONE); /* - * If the pointer passed in happens to be in the device's DMA window, + * If the address happens to be in the device's DMA window, * we can safely return the device addr and not worry about bounce * buffering it. */ @@ -679,7 +677,7 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page); /* * Unmap a single streaming mode DMA translation. The dma_addr and size must - * match what was provided for in a previous swiotlb_map_single call. All + * match what was provided for in a previous swiotlb_map_page call. All * other usages are undefined. * * After this call, reads by the cpu to the buffer are guaranteed to see @@ -703,7 +701,7 @@ EXPORT_SYMBOL_GPL(swiotlb_unmap_page); * Make physical memory consistent for a single streaming mode DMA translation * after a transfer. * - * If you perform a swiotlb_map_single() but wish to interrogate the buffer + * If you perform a swiotlb_map_page() but wish to interrogate the buffer * using the cpu, yet do not wish to teardown the dma mapping, you must * call this function before doing so. At the next point you give the dma * address back to the card, you must first perform a @@ -777,7 +775,7 @@ EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device); /* * Map a set of buffers described by scatterlist in streaming mode for DMA. - * This is the scatter-gather version of the above swiotlb_map_single + * This is the scatter-gather version of the above swiotlb_map_page * interface. Here the scatter gather list elements are each tagged with the * appropriate dma address and length. They are obtained via * sg_dma_{address,length}(SG). @@ -788,7 +786,7 @@ EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device); * The routine returns the number of addr/length pairs actually * used, at most nents. * - * Device ownership issues as mentioned above for swiotlb_map_single are the + * Device ownership issues as mentioned above for swiotlb_map_page are the * same here. */ int @@ -836,7 +834,7 @@ EXPORT_SYMBOL(swiotlb_map_sg); /* * Unmap a set of streaming mode DMA translations. Again, cpu read rules - * concerning calls here are the same as for swiotlb_unmap_single() above. + * concerning calls here are the same as for swiotlb_unmap_page() above. */ void swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, -- cgit v0.10.2 From 67131ad0514d7105b55003a0506209cf1bba3f00 Mon Sep 17 00:00:00 2001 From: Becky Bruce Date: Wed, 8 Apr 2009 09:09:16 -0500 Subject: swiotlb: fix compile warning Squash a build warning seen on 32-bit powerpc caused by calling min() with 2 different types. Use min_t() instead. Signed-off-by: Becky Bruce Acked-by: FUJITA Tomonori Signed-off-by: Kumar Gala Cc: jeremy@goop.org Cc: ian.campbell@citrix.com LKML-Reference: <1239199761-22886-3-git-send-email-galak@kernel.crashing.org> Signed-off-by: Ingo Molnar diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 170cf56..4fd6a76 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -341,7 +341,7 @@ static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size, unsigned long flags; while (size) { - sz = min(PAGE_SIZE - offset, size); + sz = min_t(size_t, PAGE_SIZE - offset, size); local_irq_save(flags); buffer = kmap_atomic(pfn_to_page(pfn), -- cgit v0.10.2 From dd6b02fe427f30520d0adc94aa52352367227873 Mon Sep 17 00:00:00 2001 From: Becky Bruce Date: Wed, 8 Apr 2009 09:09:17 -0500 Subject: swiotlb: map_page fix for highmem systems The current code calls virt_to_phys() on address that might be in highmem, which is bad. This wasn't needed, anyway, because we already have the physical address we need. Get rid of the now-unused virtual address as well. Signed-off-by: Becky Bruce Acked-by: FUJITA Tomonori Signed-off-by: Kumar Gala Cc: jeremy@goop.org Cc: ian.campbell@citrix.com LKML-Reference: <1239199761-22886-4-git-send-email-galak@kernel.crashing.org> Signed-off-by: Ingo Molnar diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 4fd6a76..e8a47c8 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -640,7 +640,6 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, struct dma_attrs *attrs) { phys_addr_t phys = page_to_phys(page) + offset; - void *ptr = page_address(page) + offset; dma_addr_t dev_addr = swiotlb_phys_to_bus(dev, phys); void *map; @@ -651,7 +650,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, * buffering it. */ if (!address_needs_mapping(dev, dev_addr, size) && - !range_needs_mapping(virt_to_phys(ptr), size)) + !range_needs_mapping(phys, size)) return dev_addr; /* -- cgit v0.10.2 From ef5722f698bde01cfec2b98fff733a48663ebf55 Mon Sep 17 00:00:00 2001 From: Becky Bruce Date: Wed, 8 Apr 2009 09:09:18 -0500 Subject: swiotlb: allow arch override of address_needs_mapping Some architectures require additional checking to determine if a device can dma to an address and need to provide their own address_needs_mapping.. Signed-off-by: Becky Bruce Acked-by: FUJITA Tomonori Signed-off-by: Kumar Gala Cc: jeremy@goop.org Cc: ian.campbell@citrix.com LKML-Reference: <1239199761-22886-5-git-send-email-galak@kernel.crashing.org> Signed-off-by: Ingo Molnar diff --git a/lib/swiotlb.c b/lib/swiotlb.c index e8a47c8..d81afab 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -145,6 +145,12 @@ static void *swiotlb_bus_to_virt(dma_addr_t address) return phys_to_virt(swiotlb_bus_to_phys(address)); } +int __weak swiotlb_arch_address_needs_mapping(struct device *hwdev, + dma_addr_t addr, size_t size) +{ + return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size); +} + int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size) { return 0; @@ -309,10 +315,10 @@ cleanup1: return -ENOMEM; } -static int +static inline int address_needs_mapping(struct device *hwdev, dma_addr_t addr, size_t size) { - return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size); + return swiotlb_arch_address_needs_mapping(hwdev, addr, size); } static inline int range_needs_mapping(phys_addr_t paddr, size_t size) -- cgit v0.10.2 From 7fcebbd2d984eac3fdd6da2f4453e7c43d32de89 Mon Sep 17 00:00:00 2001 From: Becky Bruce Date: Wed, 8 Apr 2009 09:09:19 -0500 Subject: swiotlb: rename unmap_single to do_unmap_single Previously, swiotlb_unmap_page and swiotlb_unmap_sg were duplicating very similar code. Refactor that code into a new unmap_single and unmap_single use do_unmap_single. Note that the swiotlb_unmap_sg code was previously doing a complicated comparison to determine if an addresses needed to be unmapped where a simple is_swiotlb_buffer() call would have sufficed. Signed-off-by: Becky Bruce Signed-off-by: Kumar Gala Cc: jeremy@goop.org Cc: ian.campbell@citrix.com LKML-Reference: <1239199761-22886-6-git-send-email-galak@kernel.crashing.org> Signed-off-by: Ingo Molnar diff --git a/lib/swiotlb.c b/lib/swiotlb.c index d81afab..2bde54a 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -482,7 +482,7 @@ found: * dma_addr is the kernel virtual address of the bounce buffer to unmap. */ static void -unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) +do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) { unsigned long flags; int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; @@ -591,7 +591,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, (unsigned long long)dev_addr); /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ - unmap_single(hwdev, ret, size, DMA_TO_DEVICE); + do_unmap_single(hwdev, ret, size, DMA_TO_DEVICE); return NULL; } *dma_handle = dev_addr; @@ -608,7 +608,7 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, free_pages((unsigned long) vaddr, get_order(size)); else /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ - unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE); + do_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE); } EXPORT_SYMBOL(swiotlb_free_coherent); @@ -688,17 +688,29 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page); * After this call, reads by the cpu to the buffer are guaranteed to see * whatever the device wrote there. */ -void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) +static void unmap_single(struct device *hwdev, dma_addr_t dev_addr, + size_t size, int dir) { char *dma_addr = swiotlb_bus_to_virt(dev_addr); BUG_ON(dir == DMA_NONE); - if (is_swiotlb_buffer(dma_addr)) - unmap_single(hwdev, dma_addr, size, dir); - else if (dir == DMA_FROM_DEVICE) - dma_mark_clean(dma_addr, size); + + if (is_swiotlb_buffer(dma_addr)) { + do_unmap_single(hwdev, dma_addr, size, dir); + return; + } + + if (dir != DMA_FROM_DEVICE) + return; + + dma_mark_clean(dma_addr, size); +} + +void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + unmap_single(hwdev, dev_addr, size, dir); } EXPORT_SYMBOL_GPL(swiotlb_unmap_page); @@ -850,13 +862,9 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, BUG_ON(dir == DMA_NONE); - for_each_sg(sgl, sg, nelems, i) { - if (sg->dma_address != swiotlb_phys_to_bus(hwdev, sg_phys(sg))) - unmap_single(hwdev, swiotlb_bus_to_virt(sg->dma_address), - sg->dma_length, dir); - else if (dir == DMA_FROM_DEVICE) - dma_mark_clean(swiotlb_bus_to_virt(sg->dma_address), sg->dma_length); - } + for_each_sg(sgl, sg, nelems, i) + unmap_single(hwdev, sg->dma_address, sg->dma_length, dir); + } EXPORT_SYMBOL(swiotlb_unmap_sg_attrs); -- cgit v0.10.2 From 380d687833aee098c4a2c3b35beaefe1c1f48d01 Mon Sep 17 00:00:00 2001 From: Becky Bruce Date: Wed, 8 Apr 2009 09:09:20 -0500 Subject: swiotlb: use swiotlb_sync_single instead of duplicating code Right now both swiotlb_sync_single_range and swiotlb_sync_sg were duplicating the code in swiotlb_sync_single. Just call it instead. Also rearrange the sync_single code for readability. Note that the swiotlb_sync_sg code was previously doing a complicated comparison to determine if an addresses needed to be unmapped where a simple is_swiotlb_buffer() call would have sufficed. Signed-off-by: Becky Bruce Acked-by: FUJITA Tomonori Signed-off-by: Kumar Gala Cc: jeremy@goop.org Cc: ian.campbell@citrix.com LKML-Reference: <1239199761-22886-7-git-send-email-galak@kernel.crashing.org> Signed-off-by: Ingo Molnar diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 2bde54a..d912f06 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -731,10 +731,16 @@ swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, char *dma_addr = swiotlb_bus_to_virt(dev_addr); BUG_ON(dir == DMA_NONE); - if (is_swiotlb_buffer(dma_addr)) + + if (is_swiotlb_buffer(dma_addr)) { sync_single(hwdev, dma_addr, size, dir, target); - else if (dir == DMA_FROM_DEVICE) - dma_mark_clean(dma_addr, size); + return; + } + + if (dir != DMA_FROM_DEVICE) + return; + + dma_mark_clean(dma_addr, size); } void @@ -761,13 +767,7 @@ swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr, unsigned long offset, size_t size, int dir, int target) { - char *dma_addr = swiotlb_bus_to_virt(dev_addr) + offset; - - BUG_ON(dir == DMA_NONE); - if (is_swiotlb_buffer(dma_addr)) - sync_single(hwdev, dma_addr, size, dir, target); - else if (dir == DMA_FROM_DEVICE) - dma_mark_clean(dma_addr, size); + swiotlb_sync_single(hwdev, dev_addr + offset, size, dir, target); } void @@ -890,15 +890,9 @@ swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, struct scatterlist *sg; int i; - BUG_ON(dir == DMA_NONE); - - for_each_sg(sgl, sg, nelems, i) { - if (sg->dma_address != swiotlb_phys_to_bus(hwdev, sg_phys(sg))) - sync_single(hwdev, swiotlb_bus_to_virt(sg->dma_address), + for_each_sg(sgl, sg, nelems, i) + swiotlb_sync_single(hwdev, sg->dma_address, sg->dma_length, dir, target); - else if (dir == DMA_FROM_DEVICE) - dma_mark_clean(swiotlb_bus_to_virt(sg->dma_address), sg->dma_length); - } } void -- cgit v0.10.2 From 42d7c5e353cef9062129b0de3ec9ddf10567b9ca Mon Sep 17 00:00:00 2001 From: Becky Bruce Date: Wed, 8 Apr 2009 09:09:21 -0500 Subject: swiotlb: change swiotlb_bus_to[phys,virt] prototypes Add a hwdev argument that is needed on some architectures in order to access a per-device offset that is taken into account when producing a physical address (also needed to get from bus address to virtual address because the physical address is an intermediate step). Also make swiotlb_bus_to_virt weak so architectures can override it. Signed-off-by: Becky Bruce Acked-by: FUJITA Tomonori Signed-off-by: Kumar Gala Cc: jeremy@goop.org Cc: ian.campbell@citrix.com LKML-Reference: <1239199761-22886-8-git-send-email-galak@kernel.crashing.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 34f12e9..887388a 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -28,7 +28,7 @@ dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr) return paddr; } -phys_addr_t swiotlb_bus_to_phys(dma_addr_t baddr) +phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr) { return baddr; } diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index ac9ff54..cb1a663 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -29,7 +29,8 @@ extern void *swiotlb_alloc(unsigned order, unsigned long nslabs); extern dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t address); -extern phys_addr_t swiotlb_bus_to_phys(dma_addr_t address); +extern phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, + dma_addr_t address); extern int swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size); diff --git a/lib/swiotlb.c b/lib/swiotlb.c index d912f06..bffe6d7 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -129,7 +129,7 @@ dma_addr_t __weak swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr) return paddr; } -phys_addr_t __weak swiotlb_bus_to_phys(dma_addr_t baddr) +phys_addr_t __weak swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr) { return baddr; } @@ -140,9 +140,9 @@ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, return swiotlb_phys_to_bus(hwdev, virt_to_phys(address)); } -static void *swiotlb_bus_to_virt(dma_addr_t address) +void * __weak swiotlb_bus_to_virt(struct device *hwdev, dma_addr_t address) { - return phys_to_virt(swiotlb_bus_to_phys(address)); + return phys_to_virt(swiotlb_bus_to_phys(hwdev, address)); } int __weak swiotlb_arch_address_needs_mapping(struct device *hwdev, @@ -691,7 +691,7 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page); static void unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size, int dir) { - char *dma_addr = swiotlb_bus_to_virt(dev_addr); + char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr); BUG_ON(dir == DMA_NONE); @@ -728,7 +728,7 @@ static void swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, size_t size, int dir, int target) { - char *dma_addr = swiotlb_bus_to_virt(dev_addr); + char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr); BUG_ON(dir == DMA_NONE); -- cgit v0.10.2 From a4e94ef0dd391eae05bdeacd12b8da3510957a97 Mon Sep 17 00:00:00 2001 From: Zhaolei Date: Fri, 27 Mar 2009 17:07:05 +0800 Subject: printk: add support of hh length modifier for printk Impact: new feature, extend vsprintf format strings hh is used as length modifier for signed char or unsigned char. It is supported by glibc, we add kernel support now. Signed-off-by: Zhao Lei Acked-by: Lai Jiangshan Acked-by: Frederic Weisbecker Cc: torvalds@linux-foundation.org Cc: Steven Rostedt LKML-Reference: <49CC9739.30107@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 7536ace..b56f6d0 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -408,6 +408,8 @@ enum format_type { FORMAT_TYPE_LONG_LONG, FORMAT_TYPE_ULONG, FORMAT_TYPE_LONG, + FORMAT_TYPE_UBYTE, + FORMAT_TYPE_BYTE, FORMAT_TYPE_USHORT, FORMAT_TYPE_SHORT, FORMAT_TYPE_UINT, @@ -853,11 +855,15 @@ qualifier: spec->qualifier = -1; if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt == 'Z' || *fmt == 'z' || *fmt == 't') { - spec->qualifier = *fmt; - ++fmt; - if (spec->qualifier == 'l' && *fmt == 'l') { - spec->qualifier = 'L'; - ++fmt; + spec->qualifier = *fmt++; + if (unlikely(spec->qualifier == *fmt)) { + if (spec->qualifier == 'l') { + spec->qualifier = 'L'; + ++fmt; + } else if (spec->qualifier == 'h') { + spec->qualifier = 'H'; + ++fmt; + } } } @@ -919,6 +925,11 @@ qualifier: spec->type = FORMAT_TYPE_SIZE_T; } else if (spec->qualifier == 't') { spec->type = FORMAT_TYPE_PTRDIFF; + } else if (spec->qualifier == 'H') { + if (spec->flags & SIGN) + spec->type = FORMAT_TYPE_BYTE; + else + spec->type = FORMAT_TYPE_UBYTE; } else if (spec->qualifier == 'h') { if (spec->flags & SIGN) spec->type = FORMAT_TYPE_SHORT; @@ -1087,6 +1098,12 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) case FORMAT_TYPE_PTRDIFF: num = va_arg(args, ptrdiff_t); break; + case FORMAT_TYPE_UBYTE: + num = (unsigned char) va_arg(args, int); + break; + case FORMAT_TYPE_BYTE: + num = (signed char) va_arg(args, int); + break; case FORMAT_TYPE_USHORT: num = (unsigned short) va_arg(args, int); break; @@ -1363,6 +1380,10 @@ do { \ case FORMAT_TYPE_PTRDIFF: save_arg(ptrdiff_t); break; + case FORMAT_TYPE_UBYTE: + case FORMAT_TYPE_BYTE: + save_arg(char); + break; case FORMAT_TYPE_USHORT: case FORMAT_TYPE_SHORT: save_arg(short); @@ -1538,6 +1559,12 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) case FORMAT_TYPE_PTRDIFF: num = get_arg(ptrdiff_t); break; + case FORMAT_TYPE_UBYTE: + num = get_arg(unsigned char); + break; + case FORMAT_TYPE_BYTE: + num = get_arg(signed char); + break; case FORMAT_TYPE_USHORT: num = get_arg(unsigned short); break; -- cgit v0.10.2 From 7333a8003cdc0470e8c0ae8b949cbc44f3165ff3 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Wed, 25 Mar 2009 10:50:34 +0900 Subject: x86: smarten /proc/interrupts output for new counters Now /proc/interrupts of tip tree has new counters: CNT: Performance counter interrupts Format change of output, as like that by commit: commit 7a81d9a7da03d2f27840d659f97ef140d032f609 x86: smarten /proc/interrupts output should be applied to these new counters too. Signed-off-by: Hidetoshi Seto Cc: Jan Beulich LKML-Reference: <49C98DEA.8060208@jp.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index d465487..dccaaa8 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -63,7 +63,7 @@ static int show_other_interrupts(struct seq_file *p, int prec) for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); seq_printf(p, " Spurious interrupts\n"); - seq_printf(p, "CNT: "); + seq_printf(p, "%*s: ", prec, "CNT"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); seq_printf(p, " Performance counter interrupts\n"); -- cgit v0.10.2 From ae9e6bc9f74f8247cbca50a6a93c80e0d686fa19 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 2 Apr 2009 13:19:54 +0900 Subject: percpu: don't put the first chunk in reverse-map rbtree Impact: both first chunks don't use rbtree, no functional change There can be two first chunks - reserved and dynamic with the former one being optional. Dynamic first chunk was linked on reverse-mapping rbtree while the reserved one was mapped manually using the start address and reserved offset limit. This patch makes both first chunks to be looked up manually without using the rbtree. This is to help getting rid of the rbtree. Signed-off-by: Tejun Heo Cc: Martin Schwidefsky Cc: rusty@rustcorp.com.au Cc: Paul Mundt Cc: rmk@arm.linux.org.uk Cc: starvik@axis.com Cc: ralf@linux-mips.org Cc: davem@davemloft.net Cc: cooloney@kernel.org Cc: kyle@mcmartin.ca Cc: matthew@wil.cx Cc: grundler@parisc-linux.org Cc: takata@linux-m32r.org Cc: benh@kernel.crashing.org Cc: rth@twiddle.net Cc: ink@jurassic.park.msu.ru Cc: heiko.carstens@de.ibm.com Cc: Linus Torvalds Cc: Nick Piggin Cc: Christoph Lameter LKML-Reference: <49D43CEA.3040609@kernel.org> Signed-off-by: Ingo Molnar diff --git a/mm/percpu.c b/mm/percpu.c index 1aa5d8f..bf1bf1f 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -110,9 +110,21 @@ static size_t pcpu_chunk_struct_size __read_mostly; void *pcpu_base_addr __read_mostly; EXPORT_SYMBOL_GPL(pcpu_base_addr); -/* optional reserved chunk, only accessible for reserved allocations */ +/* + * The first chunk which always exists. Note that unlike other + * chunks, this one can be allocated and mapped in several different + * ways and thus often doesn't live in the vmalloc area. + */ +static struct pcpu_chunk *pcpu_first_chunk; + +/* + * Optional reserved chunk. This chunk reserves part of the first + * chunk and serves it for reserved allocations. The amount of + * reserved offset is in pcpu_reserved_chunk_limit. When reserved + * area doesn't exist, the following variables contain NULL and 0 + * respectively. + */ static struct pcpu_chunk *pcpu_reserved_chunk; -/* offset limit of the reserved chunk */ static int pcpu_reserved_chunk_limit; /* @@ -297,15 +309,16 @@ static struct rb_node **pcpu_chunk_rb_search(void *addr, */ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) { + void *first_start = pcpu_first_chunk->vm->addr; struct rb_node *n, *parent; struct pcpu_chunk *chunk; - /* is it in the reserved chunk? */ - if (pcpu_reserved_chunk) { - void *start = pcpu_reserved_chunk->vm->addr; - - if (addr >= start && addr < start + pcpu_reserved_chunk_limit) + /* is it in the first chunk? */ + if (addr >= first_start && addr < first_start + pcpu_chunk_size) { + /* is it in the reserved area? */ + if (addr < first_start + pcpu_reserved_chunk_limit) return pcpu_reserved_chunk; + return pcpu_first_chunk; } /* nah... search the regular ones */ @@ -1147,7 +1160,8 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, if (reserved_size) { schunk->free_size = reserved_size; - pcpu_reserved_chunk = schunk; /* not for dynamic alloc */ + pcpu_reserved_chunk = schunk; + pcpu_reserved_chunk_limit = static_size + reserved_size; } else { schunk->free_size = dyn_size; dyn_size = 0; /* dynamic area covered */ @@ -1158,8 +1172,6 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, if (schunk->free_size) schunk->map[schunk->map_used++] = schunk->free_size; - pcpu_reserved_chunk_limit = static_size + schunk->free_size; - /* init dynamic chunk if necessary */ if (dyn_size) { dchunk = alloc_bootmem(sizeof(struct pcpu_chunk)); @@ -1226,13 +1238,8 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, } /* link the first chunk in */ - if (!dchunk) { - pcpu_chunk_relocate(schunk, -1); - pcpu_chunk_addr_insert(schunk); - } else { - pcpu_chunk_relocate(dchunk, -1); - pcpu_chunk_addr_insert(dchunk); - } + pcpu_first_chunk = dchunk ?: schunk; + pcpu_chunk_relocate(pcpu_first_chunk, -1); /* we're done */ pcpu_base_addr = (void *)pcpu_chunk_addr(schunk, 0, 0); -- cgit v0.10.2 From e1b9aa3f47242e757c776a3771bb6613e675bf9c Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 2 Apr 2009 13:21:44 +0900 Subject: percpu: remove rbtree and use page->index instead Impact: use page->index for addr to chunk mapping instead of dedicated rbtree The rbtree is used to determine the chunk from the virtual address. However, we can already determine the page struct from a virtual address and there are several unused fields in page struct used by vmalloc. Use the index field to store a pointer to the chunk. Then there is no need anymore for an rbtree. tj: * s/(set|get)_chunk/pcpu_\1_page_chunk/ * Drop inline from the above two functions and moved them upwards so that they are with other simple helpers. * Initial pages might not (actually most of the time don't) live in the vmalloc area. With the previous patch to manually reverse-map both first chunks, this is no longer an issue. Removed pcpu_set_chunk() call on initial pages. Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo Cc: Martin Schwidefsky Cc: rusty@rustcorp.com.au Cc: Paul Mundt Cc: rmk@arm.linux.org.uk Cc: starvik@axis.com Cc: ralf@linux-mips.org Cc: davem@davemloft.net Cc: cooloney@kernel.org Cc: kyle@mcmartin.ca Cc: matthew@wil.cx Cc: grundler@parisc-linux.org Cc: takata@linux-m32r.org Cc: benh@kernel.crashing.org Cc: rth@twiddle.net Cc: ink@jurassic.park.msu.ru Cc: heiko.carstens@de.ibm.com Cc: Linus Torvalds Cc: Nick Piggin LKML-Reference: <49D43D58.4050102@kernel.org> Signed-off-by: Ingo Molnar diff --git a/mm/percpu.c b/mm/percpu.c index bf1bf1f..c0b2c1a 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -23,7 +23,7 @@ * Allocation is done in offset-size areas of single unit space. Ie, * an area of 512 bytes at 6k in c1 occupies 512 bytes at 6k of c1:u0, * c1:u1, c1:u2 and c1:u3. Percpu access can be done by configuring - * percpu base registers UNIT_SIZE apart. + * percpu base registers pcpu_unit_size apart. * * There are usually many small percpu allocations many of them as * small as 4 bytes. The allocator organizes chunks into lists @@ -38,8 +38,8 @@ * region and negative allocated. Allocation inside a chunk is done * by scanning this map sequentially and serving the first matching * entry. This is mostly copied from the percpu_modalloc() allocator. - * Chunks are also linked into a rb tree to ease address to chunk - * mapping during free. + * Chunks can be determined from the address using the index field + * in the page struct. The index field contains a pointer to the chunk. * * To use this allocator, arch code should do the followings. * @@ -61,7 +61,6 @@ #include #include #include -#include #include #include #include @@ -88,7 +87,6 @@ struct pcpu_chunk { struct list_head list; /* linked to pcpu_slot lists */ - struct rb_node rb_node; /* key is chunk->vm->addr */ int free_size; /* free bytes in the chunk */ int contig_hint; /* max contiguous size hint */ struct vm_struct *vm; /* mapped vmalloc region */ @@ -133,7 +131,7 @@ static int pcpu_reserved_chunk_limit; * There are two locks - pcpu_alloc_mutex and pcpu_lock. The former * protects allocation/reclaim paths, chunks and chunk->page arrays. * The latter is a spinlock and protects the index data structures - - * chunk slots, rbtree, chunks and area maps in chunks. + * chunk slots, chunks and area maps in chunks. * * During allocation, pcpu_alloc_mutex is kept locked all the time and * pcpu_lock is grabbed and released as necessary. All actual memory @@ -152,7 +150,6 @@ static DEFINE_MUTEX(pcpu_alloc_mutex); /* protects whole alloc and reclaim */ static DEFINE_SPINLOCK(pcpu_lock); /* protects index data structures */ static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */ -static struct rb_root pcpu_addr_root = RB_ROOT; /* chunks by address */ /* reclaim work to release fully free chunks, scheduled from free path */ static void pcpu_reclaim(struct work_struct *work); @@ -203,6 +200,18 @@ static bool pcpu_chunk_page_occupied(struct pcpu_chunk *chunk, return *pcpu_chunk_pagep(chunk, 0, page_idx) != NULL; } +/* set the pointer to a chunk in a page struct */ +static void pcpu_set_page_chunk(struct page *page, struct pcpu_chunk *pcpu) +{ + page->index = (unsigned long)pcpu; +} + +/* obtain pointer to a chunk from a page struct */ +static struct pcpu_chunk *pcpu_get_page_chunk(struct page *page) +{ + return (struct pcpu_chunk *)page->index; +} + /** * pcpu_mem_alloc - allocate memory * @size: bytes to allocate @@ -269,40 +278,9 @@ static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot) } } -static struct rb_node **pcpu_chunk_rb_search(void *addr, - struct rb_node **parentp) -{ - struct rb_node **p = &pcpu_addr_root.rb_node; - struct rb_node *parent = NULL; - struct pcpu_chunk *chunk; - - while (*p) { - parent = *p; - chunk = rb_entry(parent, struct pcpu_chunk, rb_node); - - if (addr < chunk->vm->addr) - p = &(*p)->rb_left; - else if (addr > chunk->vm->addr) - p = &(*p)->rb_right; - else - break; - } - - if (parentp) - *parentp = parent; - return p; -} - /** - * pcpu_chunk_addr_search - search for chunk containing specified address - * @addr: address to search for - * - * Look for chunk which might contain @addr. More specifically, it - * searchs for the chunk with the highest start address which isn't - * beyond @addr. - * - * CONTEXT: - * pcpu_lock. + * pcpu_chunk_addr_search - determine chunk containing specified address + * @addr: address for which the chunk needs to be determined. * * RETURNS: * The address of the found chunk. @@ -310,8 +288,6 @@ static struct rb_node **pcpu_chunk_rb_search(void *addr, static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) { void *first_start = pcpu_first_chunk->vm->addr; - struct rb_node *n, *parent; - struct pcpu_chunk *chunk; /* is it in the first chunk? */ if (addr >= first_start && addr < first_start + pcpu_chunk_size) { @@ -321,42 +297,7 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) return pcpu_first_chunk; } - /* nah... search the regular ones */ - n = *pcpu_chunk_rb_search(addr, &parent); - if (!n) { - /* no exactly matching chunk, the parent is the closest */ - n = parent; - BUG_ON(!n); - } - chunk = rb_entry(n, struct pcpu_chunk, rb_node); - - if (addr < chunk->vm->addr) { - /* the parent was the next one, look for the previous one */ - n = rb_prev(n); - BUG_ON(!n); - chunk = rb_entry(n, struct pcpu_chunk, rb_node); - } - - return chunk; -} - -/** - * pcpu_chunk_addr_insert - insert chunk into address rb tree - * @new: chunk to insert - * - * Insert @new into address rb tree. - * - * CONTEXT: - * pcpu_lock. - */ -static void pcpu_chunk_addr_insert(struct pcpu_chunk *new) -{ - struct rb_node **p, *parent; - - p = pcpu_chunk_rb_search(new->vm->addr, &parent); - BUG_ON(*p); - rb_link_node(&new->rb_node, parent, p); - rb_insert_color(&new->rb_node, &pcpu_addr_root); + return pcpu_get_page_chunk(vmalloc_to_page(addr)); } /** @@ -768,6 +709,7 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size) alloc_mask, 0); if (!*pagep) goto err; + pcpu_set_page_chunk(*pagep, chunk); } } @@ -892,7 +834,6 @@ restart: spin_lock_irq(&pcpu_lock); pcpu_chunk_relocate(chunk, -1); - pcpu_chunk_addr_insert(chunk); goto restart; area_found: @@ -981,7 +922,6 @@ static void pcpu_reclaim(struct work_struct *work) if (chunk == list_first_entry(head, struct pcpu_chunk, list)) continue; - rb_erase(&chunk->rb_node, &pcpu_addr_root); list_move(&chunk->list, &todo); } -- cgit v0.10.2 From e30e08f65c7ef6c230424264f09c3d53f117f58b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 8 Apr 2009 15:01:25 +0200 Subject: perf_counter: fix NMI race in task clock We should not be updating ctx->time from NMI context, work around that. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090408130408.681326666@chello.nl> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 863703b..84a3908 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -319,8 +319,6 @@ static void __perf_counter_disable(void *info) spin_lock_irqsave(&ctx->lock, flags); - update_context_time(ctx); - /* * If the counter is on, turn it off. * If it is in error state, leave it in error state. @@ -2335,13 +2333,11 @@ static const struct hw_perf_counter_ops perf_ops_cpu_clock = { * Software counter: task time clock */ -static void task_clock_perf_counter_update(struct perf_counter *counter) +static void task_clock_perf_counter_update(struct perf_counter *counter, u64 now) { - u64 prev, now; + u64 prev; s64 delta; - now = counter->ctx->time; - prev = atomic64_xchg(&counter->hw.prev_count, now); delta = now - prev; atomic64_add(delta, &counter->count); @@ -2369,13 +2365,24 @@ static int task_clock_perf_counter_enable(struct perf_counter *counter) static void task_clock_perf_counter_disable(struct perf_counter *counter) { hrtimer_cancel(&counter->hw.hrtimer); - task_clock_perf_counter_update(counter); + task_clock_perf_counter_update(counter, counter->ctx->time); + } static void task_clock_perf_counter_read(struct perf_counter *counter) { - update_context_time(counter->ctx); - task_clock_perf_counter_update(counter); + u64 time; + + if (!in_nmi()) { + update_context_time(counter->ctx); + time = counter->ctx->time; + } else { + u64 now = perf_clock(); + u64 delta = now - counter->ctx->timestamp; + time = counter->ctx->time + delta; + } + + task_clock_perf_counter_update(counter, time); } static const struct hw_perf_counter_ops perf_ops_task_clock = { -- cgit v0.10.2 From 6fab01927e8bdbbc77bafba2abb4810c5591ad52 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 8 Apr 2009 15:01:26 +0200 Subject: perf_counter: provide misc bits in the event header Limit the size of each record to 64k (or should we count in multiples of u64 and have a 512K limit?), this gives 16 bits or spare room in the header, which we can use for misc bits, so as to not have to grow the record with u64 every time we have a few bits to report. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090408130408.769271806@chello.nl> Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 7f5d353..5bd8817 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -201,9 +201,13 @@ struct perf_counter_mmap_page { __u32 data_head; /* head in the data section */ }; +#define PERF_EVENT_MISC_KERNEL (1 << 0) +#define PERF_EVENT_MISC_USER (1 << 1) + struct perf_event_header { __u32 type; - __u32 size; + __u16 misc; + __u16 size; }; enum perf_event_type { diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 84a3908..4af98f9 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1831,6 +1831,9 @@ static void perf_counter_output(struct perf_counter *counter, header.type = PERF_EVENT_COUNTER_OVERFLOW; header.size = sizeof(header); + header.misc = user_mode(regs) ? + PERF_EVENT_MISC_USER : PERF_EVENT_MISC_KERNEL; + if (record_type & PERF_RECORD_IP) { ip = instruction_pointer(regs); header.type |= __PERF_EVENT_IP; -- cgit v0.10.2 From 6b6e5486b3a168f0328c82a8d4376caf901472b1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 8 Apr 2009 15:01:27 +0200 Subject: perf_counter: use misc field to widen type Push the PERF_EVENT_COUNTER_OVERFLOW bit into the misc field so that we can have the full 32bit for PERF_RECORD_ bits. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090408130408.891867663@chello.nl> Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 5bd8817..4809ae1 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -201,8 +201,9 @@ struct perf_counter_mmap_page { __u32 data_head; /* head in the data section */ }; -#define PERF_EVENT_MISC_KERNEL (1 << 0) -#define PERF_EVENT_MISC_USER (1 << 1) +#define PERF_EVENT_MISC_KERNEL (1 << 0) +#define PERF_EVENT_MISC_USER (1 << 1) +#define PERF_EVENT_MISC_OVERFLOW (1 << 2) struct perf_event_header { __u32 type; @@ -230,36 +231,27 @@ enum perf_event_type { PERF_EVENT_MUNMAP = 2, /* - * Half the event type space is reserved for the counter overflow - * bitfields, as found in hw_event.record_type. - * - * These events will have types of the form: - * PERF_EVENT_COUNTER_OVERFLOW { | __PERF_EVENT_* } * + * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field + * will be PERF_RECORD_* * * struct { * struct perf_event_header header; * - * { u64 ip; } && __PERF_EVENT_IP - * { u32 pid, tid; } && __PERF_EVENT_TID + * { u64 ip; } && PERF_RECORD_IP + * { u32 pid, tid; } && PERF_RECORD_TID * * { u64 nr; - * { u64 event, val; } cnt[nr]; } && __PERF_EVENT_GROUP + * { u64 event, val; } cnt[nr]; } && PERF_RECORD_GROUP * * { u16 nr, * hv, * kernel, * user; - * u64 ips[nr]; } && __PERF_EVENT_CALLCHAIN + * u64 ips[nr]; } && PERF_RECORD_CALLCHAIN * - * { u64 time; } && __PERF_EVENT_TIME + * { u64 time; } && PERF_RECORD_TIME * }; */ - PERF_EVENT_COUNTER_OVERFLOW = 1UL << 31, - __PERF_EVENT_IP = PERF_RECORD_IP, - __PERF_EVENT_TID = PERF_RECORD_TID, - __PERF_EVENT_GROUP = PERF_RECORD_GROUP, - __PERF_EVENT_CALLCHAIN = PERF_RECORD_CALLCHAIN, - __PERF_EVENT_TIME = PERF_RECORD_TIME, }; #ifdef __KERNEL__ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 4af98f9..bf12df6 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1828,15 +1828,16 @@ static void perf_counter_output(struct perf_counter *counter, int callchain_size = 0; u64 time; - header.type = PERF_EVENT_COUNTER_OVERFLOW; + header.type = 0; header.size = sizeof(header); - header.misc = user_mode(regs) ? + header.misc = PERF_EVENT_MISC_OVERFLOW; + header.misc |= user_mode(regs) ? PERF_EVENT_MISC_USER : PERF_EVENT_MISC_KERNEL; if (record_type & PERF_RECORD_IP) { ip = instruction_pointer(regs); - header.type |= __PERF_EVENT_IP; + header.type |= PERF_RECORD_IP; header.size += sizeof(ip); } @@ -1845,12 +1846,12 @@ static void perf_counter_output(struct perf_counter *counter, tid_entry.pid = current->group_leader->pid; tid_entry.tid = current->pid; - header.type |= __PERF_EVENT_TID; + header.type |= PERF_RECORD_TID; header.size += sizeof(tid_entry); } if (record_type & PERF_RECORD_GROUP) { - header.type |= __PERF_EVENT_GROUP; + header.type |= PERF_RECORD_GROUP; header.size += sizeof(u64) + counter->nr_siblings * sizeof(group_entry); } @@ -1861,7 +1862,7 @@ static void perf_counter_output(struct perf_counter *counter, if (callchain) { callchain_size = (1 + callchain->nr) * sizeof(u64); - header.type |= __PERF_EVENT_CALLCHAIN; + header.type |= PERF_RECORD_CALLCHAIN; header.size += callchain_size; } } @@ -1872,7 +1873,7 @@ static void perf_counter_output(struct perf_counter *counter, */ time = sched_clock(); - header.type |= __PERF_EVENT_TIME; + header.type |= PERF_RECORD_TIME; header.size += sizeof(u64); } -- cgit v0.10.2 From 808382b33bb4c60df6379ec2db39f332cc56b82a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 8 Apr 2009 15:01:28 +0200 Subject: perf_counter: kerneltop: keep up with ABI changes Update kerneltop to use PERF_EVENT_MISC_OVERFLOW Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090408130408.947197470@chello.nl> Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c index 15f3a5f..042c1b8 100644 --- a/Documentation/perf_counter/kerneltop.c +++ b/Documentation/perf_counter/kerneltop.c @@ -1277,22 +1277,22 @@ static void mmap_read(struct mmap_data *md) old += size; - switch (event->header.type) { - case PERF_EVENT_COUNTER_OVERFLOW | __PERF_EVENT_IP: - case PERF_EVENT_COUNTER_OVERFLOW | __PERF_EVENT_IP | __PERF_EVENT_TID: - process_event(event->ip.ip, md->counter); - break; - - case PERF_EVENT_MMAP: - case PERF_EVENT_MUNMAP: - printf("%s: %Lu %Lu %Lu %s\n", - event->header.type == PERF_EVENT_MMAP - ? "mmap" : "munmap", - event->mmap.start, - event->mmap.len, - event->mmap.pgoff, - event->mmap.filename); - break; + if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) { + if (event->header.type & PERF_RECORD_IP) + process_event(event->ip.ip, md->counter); + } else { + switch (event->header.type) { + case PERF_EVENT_MMAP: + case PERF_EVENT_MUNMAP: + printf("%s: %Lu %Lu %Lu %s\n", + event->header.type == PERF_EVENT_MMAP + ? "mmap" : "munmap", + event->mmap.start, + event->mmap.len, + event->mmap.pgoff, + event->mmap.filename); + break; + } } } -- cgit v0.10.2 From 8740f9418c78dcad694b46ab25d1645d5aef1f5e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 8 Apr 2009 15:01:29 +0200 Subject: perf_counter: add some comments Add a few comments because I was forgetting what field what for what functionality. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090408130409.036984214@chello.nl> Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 4809ae1..8bf764fc 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -344,10 +344,12 @@ struct file; struct perf_mmap_data { struct rcu_head rcu_head; - int nr_pages; - atomic_t wakeup; - atomic_t head; - atomic_t events; + int nr_pages; /* nr of data pages */ + + atomic_t wakeup; /* POLL_ for wakeups */ + atomic_t head; /* write position */ + atomic_t events; /* event limit */ + struct perf_counter_mmap_page *user_page; void *data_pages[0]; }; -- cgit v0.10.2 From 8d1b2d9361b494bfc761700c348c65ebbe3deb5b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 8 Apr 2009 15:01:30 +0200 Subject: perf_counter: track task-comm data Similar to the mmap data stream, add one that tracks the task COMM field, so that the userspace reporting knows what to call a task. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090408130409.127422406@chello.nl> Signed-off-by: Ingo Molnar diff --git a/fs/exec.c b/fs/exec.c index e015c0b..bf47ed0 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -951,6 +951,7 @@ void set_task_comm(struct task_struct *tsk, char *buf) task_lock(tsk); strlcpy(tsk->comm, buf, sizeof(tsk->comm)); task_unlock(tsk); + perf_counter_comm(tsk); } int flush_old_exec(struct linux_binprm * bprm) diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 8bf764fc..a70a55f 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -142,8 +142,9 @@ struct perf_counter_hw_event { exclude_idle : 1, /* don't count when idle */ mmap : 1, /* include mmap data */ munmap : 1, /* include munmap data */ + comm : 1, /* include comm data */ - __reserved_1 : 53; + __reserved_1 : 52; __u32 extra_config_len; __u32 wakeup_events; /* wakeup every n events */ @@ -231,6 +232,16 @@ enum perf_event_type { PERF_EVENT_MUNMAP = 2, /* + * struct { + * struct perf_event_header header; + * + * u32 pid, tid; + * char comm[]; + * }; + */ + PERF_EVENT_COMM = 3, + + /* * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field * will be PERF_RECORD_* * @@ -545,6 +556,8 @@ extern void perf_counter_mmap(unsigned long addr, unsigned long len, extern void perf_counter_munmap(unsigned long addr, unsigned long len, unsigned long pgoff, struct file *file); +extern void perf_counter_comm(struct task_struct *tsk); + #define MAX_STACK_DEPTH 255 struct perf_callchain_entry { @@ -583,6 +596,7 @@ static inline void perf_counter_munmap(unsigned long addr, unsigned long len, unsigned long pgoff, struct file *file) { } +static inline void perf_counter_comm(struct task_struct *tsk) { } #endif #endif /* __KERNEL__ */ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index bf12df6..2d4aebb 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1917,6 +1917,99 @@ static void perf_counter_output(struct perf_counter *counter, } /* + * comm tracking + */ + +struct perf_comm_event { + struct task_struct *task; + char *comm; + int comm_size; + + struct { + struct perf_event_header header; + + u32 pid; + u32 tid; + } event; +}; + +static void perf_counter_comm_output(struct perf_counter *counter, + struct perf_comm_event *comm_event) +{ + struct perf_output_handle handle; + int size = comm_event->event.header.size; + int ret = perf_output_begin(&handle, counter, size, 0, 0); + + if (ret) + return; + + perf_output_put(&handle, comm_event->event); + perf_output_copy(&handle, comm_event->comm, + comm_event->comm_size); + perf_output_end(&handle); +} + +static int perf_counter_comm_match(struct perf_counter *counter, + struct perf_comm_event *comm_event) +{ + if (counter->hw_event.comm && + comm_event->event.header.type == PERF_EVENT_COMM) + return 1; + + return 0; +} + +static void perf_counter_comm_ctx(struct perf_counter_context *ctx, + struct perf_comm_event *comm_event) +{ + struct perf_counter *counter; + + if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) + return; + + rcu_read_lock(); + list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { + if (perf_counter_comm_match(counter, comm_event)) + perf_counter_comm_output(counter, comm_event); + } + rcu_read_unlock(); +} + +static void perf_counter_comm_event(struct perf_comm_event *comm_event) +{ + struct perf_cpu_context *cpuctx; + unsigned int size; + char *comm = comm_event->task->comm; + + size = ALIGN(strlen(comm), sizeof(u64)); + + comm_event->comm = comm; + comm_event->comm_size = size; + + comm_event->event.header.size = sizeof(comm_event->event) + size; + + cpuctx = &get_cpu_var(perf_cpu_context); + perf_counter_comm_ctx(&cpuctx->ctx, comm_event); + put_cpu_var(perf_cpu_context); + + perf_counter_comm_ctx(¤t->perf_counter_ctx, comm_event); +} + +void perf_counter_comm(struct task_struct *task) +{ + struct perf_comm_event comm_event = { + .task = task, + .event = { + .header = { .type = PERF_EVENT_COMM, }, + .pid = task->group_leader->pid, + .tid = task->pid, + }, + }; + + perf_counter_comm_event(&comm_event); +} + +/* * mmap tracking */ -- cgit v0.10.2 From de9ac07bbf8f51e0ce40e5428c3a8f627bd237c2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 8 Apr 2009 15:01:31 +0200 Subject: perf_counter: some simple userspace profiling # perf-record make -j4 kernel/ # perf-report | tail -15 0.39 cc1 [kernel] lock_acquired 0.42 cc1 [kernel] lock_acquire 0.51 cc1 [ user ] /lib64/libc-2.8.90.so: _int_free 0.51 as [kernel] clear_page_c 0.53 cc1 [ user ] /lib64/libc-2.8.90.so: memcpy 0.56 cc1 [ user ] /lib64/libc-2.8.90.so: _IO_vfprintf 0.63 cc1 [kernel] lock_release 0.67 cc1 [ user ] /lib64/libc-2.8.90.so: strlen 0.68 cc1 [kernel] debug_smp_processor_id 1.38 cc1 [ user ] /lib64/libc-2.8.90.so: _int_malloc 1.55 cc1 [ user ] /lib64/libc-2.8.90.so: memset 1.77 cc1 [kernel] __lock_acquire 1.88 cc1 [kernel] clear_page_c 3.61 as [ user ] /usr/bin/as: 59.16 cc1 [ user ] /usr/libexec/gcc/x86_64-redhat-linux/4.3.2/cc1: Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford Cc: Arnaldo Carvalho de Melo LKML-Reference: <20090408130409.220518450@chello.nl> Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile index 194b662..1dd37ee 100644 --- a/Documentation/perf_counter/Makefile +++ b/Documentation/perf_counter/Makefile @@ -1,10 +1,16 @@ -BINS = kerneltop perfstat +BINS = kerneltop perfstat perf-record perf-report all: $(BINS) kerneltop: kerneltop.c ../../include/linux/perf_counter.h cc -O6 -Wall -lrt -o $@ $< +perf-record: perf-record.c ../../include/linux/perf_counter.h + cc -O6 -Wall -lrt -o $@ $< + +perf-report: perf-report.cc ../../include/linux/perf_counter.h + g++ -O6 -Wall -lrt -o $@ $< + perfstat: kerneltop ln -sf kerneltop perfstat diff --git a/Documentation/perf_counter/perf-record.c b/Documentation/perf_counter/perf-record.c new file mode 100644 index 0000000..614de7c --- /dev/null +++ b/Documentation/perf_counter/perf-record.c @@ -0,0 +1,530 @@ + + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "../../include/linux/perf_counter.h" + + +/* + * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all + * counters in the current task. + */ +#define PR_TASK_PERF_COUNTERS_DISABLE 31 +#define PR_TASK_PERF_COUNTERS_ENABLE 32 + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +#define rdclock() \ +({ \ + struct timespec ts; \ + \ + clock_gettime(CLOCK_MONOTONIC, &ts); \ + ts.tv_sec * 1000000000ULL + ts.tv_nsec; \ +}) + +/* + * Pick up some kernel type conventions: + */ +#define __user +#define asmlinkage + +#ifdef __x86_64__ +#define __NR_perf_counter_open 295 +#define rmb() asm volatile("lfence" ::: "memory") +#define cpu_relax() asm volatile("rep; nop" ::: "memory"); +#endif + +#ifdef __i386__ +#define __NR_perf_counter_open 333 +#define rmb() asm volatile("lfence" ::: "memory") +#define cpu_relax() asm volatile("rep; nop" ::: "memory"); +#endif + +#ifdef __powerpc__ +#define __NR_perf_counter_open 319 +#define rmb() asm volatile ("sync" ::: "memory") +#define cpu_relax() asm volatile ("" ::: "memory"); +#endif + +#define unlikely(x) __builtin_expect(!!(x), 0) +#define min(x, y) ({ \ + typeof(x) _min1 = (x); \ + typeof(y) _min2 = (y); \ + (void) (&_min1 == &_min2); \ + _min1 < _min2 ? _min1 : _min2; }) + +asmlinkage int sys_perf_counter_open( + struct perf_counter_hw_event *hw_event_uptr __user, + pid_t pid, + int cpu, + int group_fd, + unsigned long flags) +{ + return syscall( + __NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags); +} + +#define MAX_COUNTERS 64 +#define MAX_NR_CPUS 256 + +#define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id)) + +static int nr_counters = 0; +static __u64 event_id[MAX_COUNTERS] = { }; +static int default_interval = 100000; +static int event_count[MAX_COUNTERS]; +static int fd[MAX_NR_CPUS][MAX_COUNTERS]; +static int nr_cpus = 0; +static unsigned int page_size; +static unsigned int mmap_pages = 16; +static int output; +static char *output_name = "output.perf"; +static int group = 0; +static unsigned int realtime_prio = 0; + +const unsigned int default_count[] = { + 1000000, + 1000000, + 10000, + 10000, + 1000000, + 10000, +}; + +static char *hw_event_names[] = { + "CPU cycles", + "instructions", + "cache references", + "cache misses", + "branches", + "branch misses", + "bus cycles", +}; + +static char *sw_event_names[] = { + "cpu clock ticks", + "task clock ticks", + "pagefaults", + "context switches", + "CPU migrations", + "minor faults", + "major faults", +}; + +struct event_symbol { + __u64 event; + char *symbol; +}; + +static struct event_symbol event_symbols[] = { + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cpu-cycles", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cycles", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), "instructions", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), "cache-references", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), "cache-misses", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branch-instructions", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branches", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES), "branch-misses", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES), "bus-cycles", }, + + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK), "cpu-clock", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), "task-clock", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "page-faults", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "faults", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN), "minor-faults", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ), "major-faults", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "context-switches", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "cs", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "cpu-migrations", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "migrations", }, +}; + +/* + * Each event can have multiple symbolic names. + * Symbolic names are (almost) exactly matched. + */ +static __u64 match_event_symbols(char *str) +{ + __u64 config, id; + int type; + unsigned int i; + + if (sscanf(str, "r%llx", &config) == 1) + return config | PERF_COUNTER_RAW_MASK; + + if (sscanf(str, "%d:%llu", &type, &id) == 2) + return EID(type, id); + + for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { + if (!strncmp(str, event_symbols[i].symbol, + strlen(event_symbols[i].symbol))) + return event_symbols[i].event; + } + + return ~0ULL; +} + +static int parse_events(char *str) +{ + __u64 config; + +again: + if (nr_counters == MAX_COUNTERS) + return -1; + + config = match_event_symbols(str); + if (config == ~0ULL) + return -1; + + event_id[nr_counters] = config; + nr_counters++; + + str = strstr(str, ","); + if (str) { + str++; + goto again; + } + + return 0; +} + +#define __PERF_COUNTER_FIELD(config, name) \ + ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT) + +#define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW) +#define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG) +#define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE) +#define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT) + +static void display_events_help(void) +{ + unsigned int i; + __u64 e; + + printf( + " -e EVENT --event=EVENT # symbolic-name abbreviations"); + + for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { + int type, id; + + e = event_symbols[i].event; + type = PERF_COUNTER_TYPE(e); + id = PERF_COUNTER_ID(e); + + printf("\n %d:%d: %-20s", + type, id, event_symbols[i].symbol); + } + + printf("\n" + " rNNN: raw PMU events (eventsel+umask)\n\n"); +} + +static void display_help(void) +{ + printf( + "Usage: perf-record []\n" + "perf-record Options (up to %d event types can be specified at once):\n\n", + MAX_COUNTERS); + + display_events_help(); + + printf( + " -c CNT --count=CNT # event period to sample\n" + " -m pages --mmap_pages= # number of mmap data pages\n" + " -o file --output= # output file\n" + " -r prio --realtime= # use RT prio\n" + ); + + exit(0); +} + +static void process_options(int argc, char *argv[]) +{ + int error = 0, counter; + + for (;;) { + int option_index = 0; + /** Options for getopt */ + static struct option long_options[] = { + {"count", required_argument, NULL, 'c'}, + {"event", required_argument, NULL, 'e'}, + {"mmap_pages", required_argument, NULL, 'm'}, + {"output", required_argument, NULL, 'o'}, + {"realtime", required_argument, NULL, 'r'}, + {NULL, 0, NULL, 0 } + }; + int c = getopt_long(argc, argv, "+:c:e:m:o:r:", + long_options, &option_index); + if (c == -1) + break; + + switch (c) { + case 'c': default_interval = atoi(optarg); break; + case 'e': error = parse_events(optarg); break; + case 'm': mmap_pages = atoi(optarg); break; + case 'o': output_name = strdup(optarg); break; + case 'r': realtime_prio = atoi(optarg); break; + default: error = 1; break; + } + } + if (error) + display_help(); + + if (!nr_counters) { + nr_counters = 1; + event_id[0] = 0; + } + + for (counter = 0; counter < nr_counters; counter++) { + if (event_count[counter]) + continue; + + event_count[counter] = default_interval; + } +} + +struct mmap_data { + int counter; + void *base; + unsigned int mask; + unsigned int prev; +}; + +static unsigned int mmap_read_head(struct mmap_data *md) +{ + struct perf_counter_mmap_page *pc = md->base; + int head; + + head = pc->data_head; + rmb(); + + return head; +} + +static long events; +static struct timeval last_read, this_read; + +static void mmap_read(struct mmap_data *md) +{ + unsigned int head = mmap_read_head(md); + unsigned int old = md->prev; + unsigned char *data = md->base + page_size; + unsigned long size; + void *buf; + int diff; + + gettimeofday(&this_read, NULL); + + /* + * If we're further behind than half the buffer, there's a chance + * the writer will bite our tail and screw up the events under us. + * + * If we somehow ended up ahead of the head, we got messed up. + * + * In either case, truncate and restart at head. + */ + diff = head - old; + if (diff > md->mask / 2 || diff < 0) { + struct timeval iv; + unsigned long msecs; + + timersub(&this_read, &last_read, &iv); + msecs = iv.tv_sec*1000 + iv.tv_usec/1000; + + fprintf(stderr, "WARNING: failed to keep up with mmap data." + " Last read %lu msecs ago.\n", msecs); + + /* + * head points to a known good entry, start there. + */ + old = head; + } + + last_read = this_read; + + if (old != head) + events++; + + size = head - old; + + if ((old & md->mask) + size != (head & md->mask)) { + buf = &data[old & md->mask]; + size = md->mask + 1 - (old & md->mask); + old += size; + while (size) { + int ret = write(output, buf, size); + if (ret < 0) { + perror("failed to write"); + exit(-1); + } + size -= ret; + buf += ret; + } + } + + buf = &data[old & md->mask]; + size = head - old; + old += size; + while (size) { + int ret = write(output, buf, size); + if (ret < 0) { + perror("failed to write"); + exit(-1); + } + size -= ret; + buf += ret; + } + + md->prev = old; +} + +static volatile int done = 0; + +static void sigchld_handler(int sig) +{ + if (sig == SIGCHLD) + done = 1; +} + +int main(int argc, char *argv[]) +{ + struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; + struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; + struct perf_counter_hw_event hw_event; + int i, counter, group_fd, nr_poll = 0; + pid_t pid; + int ret; + + page_size = sysconf(_SC_PAGE_SIZE); + + process_options(argc, argv); + + nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); + assert(nr_cpus <= MAX_NR_CPUS); + assert(nr_cpus >= 0); + + output = open(output_name, O_CREAT|O_RDWR, S_IRWXU); + if (output < 0) { + perror("failed to create output file"); + exit(-1); + } + + argc -= optind; + argv += optind; + + for (i = 0; i < nr_cpus; i++) { + group_fd = -1; + for (counter = 0; counter < nr_counters; counter++) { + + memset(&hw_event, 0, sizeof(hw_event)); + hw_event.config = event_id[counter]; + hw_event.irq_period = event_count[counter]; + hw_event.record_type = PERF_RECORD_IP | PERF_RECORD_TID; + hw_event.nmi = 1; + hw_event.mmap = 1; + hw_event.comm = 1; + + fd[i][counter] = sys_perf_counter_open(&hw_event, -1, i, group_fd, 0); + if (fd[i][counter] < 0) { + int err = errno; + printf("kerneltop error: syscall returned with %d (%s)\n", + fd[i][counter], strerror(err)); + if (err == EPERM) + printf("Are you root?\n"); + exit(-1); + } + assert(fd[i][counter] >= 0); + fcntl(fd[i][counter], F_SETFL, O_NONBLOCK); + + /* + * First counter acts as the group leader: + */ + if (group && group_fd == -1) + group_fd = fd[i][counter]; + + event_array[nr_poll].fd = fd[i][counter]; + event_array[nr_poll].events = POLLIN; + nr_poll++; + + mmap_array[i][counter].counter = counter; + mmap_array[i][counter].prev = 0; + mmap_array[i][counter].mask = mmap_pages*page_size - 1; + mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size, + PROT_READ, MAP_SHARED, fd[i][counter], 0); + if (mmap_array[i][counter].base == MAP_FAILED) { + printf("kerneltop error: failed to mmap with %d (%s)\n", + errno, strerror(errno)); + exit(-1); + } + } + } + + signal(SIGCHLD, sigchld_handler); + + pid = fork(); + if (pid < 0) + perror("failed to fork"); + + if (!pid) { + if (execvp(argv[0], argv)) { + perror(argv[0]); + exit(-1); + } + } + + if (realtime_prio) { + struct sched_param param; + + param.sched_priority = realtime_prio; + if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { + printf("Could not set realtime priority.\n"); + exit(-1); + } + } + + /* + * TODO: store the current /proc/$/maps information somewhere + */ + + while (!done) { + int hits = events; + + for (i = 0; i < nr_cpus; i++) { + for (counter = 0; counter < nr_counters; counter++) + mmap_read(&mmap_array[i][counter]); + } + + if (hits == events) + ret = poll(event_array, nr_poll, 100); + } + + return 0; +} diff --git a/Documentation/perf_counter/perf-report.cc b/Documentation/perf_counter/perf-report.cc new file mode 100644 index 0000000..09da0ba --- /dev/null +++ b/Documentation/perf_counter/perf-report.cc @@ -0,0 +1,472 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "../../include/linux/perf_counter.h" + +#include +#include +#include + + +static char const *input_name = "output.perf"; +static int input; + +static unsigned long page_size; +static unsigned long mmap_window = 32; + +struct ip_event { + struct perf_event_header header; + __u64 ip; + __u32 pid, tid; +}; +struct mmap_event { + struct perf_event_header header; + __u32 pid, tid; + __u64 start; + __u64 len; + __u64 pgoff; + char filename[PATH_MAX]; +}; +struct comm_event { + struct perf_event_header header; + __u32 pid,tid; + char comm[16]; +}; + +typedef union event_union { + struct perf_event_header header; + struct ip_event ip; + struct mmap_event mmap; + struct comm_event comm; +} event_t; + +struct section { + uint64_t start; + uint64_t end; + + uint64_t offset; + + std::string name; + + section() { }; + + section(uint64_t stab) : end(stab) { }; + + section(uint64_t start, uint64_t size, uint64_t offset, std::string name) : + start(start), end(start + size), offset(offset), name(name) + { }; + + bool operator < (const struct section &s) const { + return end < s.end; + }; +}; + +typedef std::set sections_t; + +struct symbol { + uint64_t start; + uint64_t end; + + std::string name; + + symbol() { }; + + symbol(uint64_t ip) : start(ip) { } + + symbol(uint64_t start, uint64_t len, std::string name) : + start(start), end(start + len), name(name) + { }; + + bool operator < (const struct symbol &s) const { + return start < s.start; + }; +}; + +typedef std::set symbols_t; + +struct dso { + sections_t sections; + symbols_t syms; +}; + +static std::map dsos; + +static void load_dso_sections(std::string dso_name) +{ + struct dso &dso = dsos[dso_name]; + + std::string cmd = "readelf -DSW " + dso_name; + + FILE *file = popen(cmd.c_str(), "r"); + if (!file) { + perror("failed to open pipe"); + exit(-1); + } + + char *line = NULL; + size_t n = 0; + + while (!feof(file)) { + uint64_t addr, off, size; + char name[32]; + + if (getline(&line, &n, file) < 0) + break; + if (!line) + break; + + if (sscanf(line, " [%*2d] %16s %*14s %Lx %Lx %Lx", + name, &addr, &off, &size) == 4) { + + dso.sections.insert(section(addr, size, addr - off, name)); + } +#if 0 + /* + * for reading readelf symbols (-s), however these don't seem + * to include nearly everything, so use nm for that. + */ + if (sscanf(line, " %*4d %*3d: %Lx %5Lu %*7s %*6s %*7s %3d %s", + &start, &size, §ion, sym) == 4) { + + start -= dso.section_offsets[section]; + + dso.syms.insert(symbol(start, size, std::string(sym))); + } +#endif + } + pclose(file); +} + +static void load_dso_symbols(std::string dso_name, std::string args) +{ + struct dso &dso = dsos[dso_name]; + + std::string cmd = "nm -nSC " + args + " " + dso_name; + + FILE *file = popen(cmd.c_str(), "r"); + if (!file) { + perror("failed to open pipe"); + exit(-1); + } + + char *line = NULL; + size_t n = 0; + + while (!feof(file)) { + uint64_t start, size; + char c; + char sym[1024]; + + if (getline(&line, &n, file) < 0) + break; + if (!line) + break; + + + if (sscanf(line, "%Lx %Lx %c %s", &start, &size, &c, sym) == 4) { + sections_t::const_iterator si = + dso.sections.upper_bound(section(start)); + if (si == dso.sections.end()) { + printf("symbol in unknown section: %s\n", sym); + continue; + } + + start -= si->offset; + + dso.syms.insert(symbol(start, size, sym)); + } + } + pclose(file); +} + +static void load_dso(std::string dso_name) +{ + load_dso_sections(dso_name); + load_dso_symbols(dso_name, "-D"); /* dynamic symbols */ + load_dso_symbols(dso_name, ""); /* regular ones */ +} + +void load_kallsyms(void) +{ + struct dso &dso = dsos["[kernel]"]; + + FILE *file = fopen("/proc/kallsyms", "r"); + if (!file) { + perror("failed to open kallsyms"); + exit(-1); + } + + char *line; + size_t n; + + while (!feof(file)) { + uint64_t start; + char c; + char sym[1024]; + + if (getline(&line, &n, file) < 0) + break; + if (!line) + break; + + if (sscanf(line, "%Lx %c %s", &start, &c, sym) == 3) + dso.syms.insert(symbol(start, 0x1000000, std::string(sym))); + } + fclose(file); +} + +struct map { + uint64_t start; + uint64_t end; + uint64_t pgoff; + + std::string dso; + + map() { }; + + map(uint64_t ip) : end(ip) { } + + map(mmap_event *mmap) { + start = mmap->start; + end = mmap->start + mmap->len; + pgoff = mmap->pgoff; + + dso = std::string(mmap->filename); + + if (dsos.find(dso) == dsos.end()) + load_dso(dso); + }; + + bool operator < (const struct map &m) const { + return end < m.end; + }; +}; + +typedef std::set maps_t; + +static std::map maps; + +static std::map comms; + +static std::map hist; +static std::multimap rev_hist; + +static std::string resolve_comm(int pid) +{ + std::string comm = ""; + std::map::const_iterator ci = comms.find(pid); + if (ci != comms.end()) + comm = ci->second; + + return comm; +} + +static std::string resolve_user_symbol(int pid, uint64_t ip) +{ + std::string sym = ""; + + maps_t &m = maps[pid]; + maps_t::const_iterator mi = m.upper_bound(map(ip)); + if (mi == m.end()) + return sym; + + ip -= mi->start + mi->pgoff; + + symbols_t &s = dsos[mi->dso].syms; + symbols_t::const_iterator si = s.upper_bound(symbol(ip)); + + sym = mi->dso + ": "; + + if (si == s.begin()) + return sym; + si--; + + if (si->start <= ip && ip < si->end) + sym = mi->dso + ": " + si->name; +#if 0 + else if (si->start <= ip) + sym = mi->dso + ": ?" + si->name; +#endif + + return sym; +} + +static std::string resolve_kernel_symbol(uint64_t ip) +{ + std::string sym = ""; + + symbols_t &s = dsos["[kernel]"].syms; + symbols_t::const_iterator si = s.upper_bound(symbol(ip)); + + if (si == s.begin()) + return sym; + si--; + + if (si->start <= ip && ip < si->end) + sym = si->name; + + return sym; +} + +static void display_help(void) +{ + printf( + "Usage: perf-report []\n" + " -i file --input= # input file\n" + ); + + exit(0); +} + +static void process_options(int argc, char *argv[]) +{ + int error = 0; + + for (;;) { + int option_index = 0; + /** Options for getopt */ + static struct option long_options[] = { + {"input", required_argument, NULL, 'i'}, + {NULL, 0, NULL, 0 } + }; + int c = getopt_long(argc, argv, "+:i:", + long_options, &option_index); + if (c == -1) + break; + + switch (c) { + case 'i': input_name = strdup(optarg); break; + default: error = 1; break; + } + } + + if (error) + display_help(); +} + +int main(int argc, char *argv[]) +{ + unsigned long offset = 0; + unsigned long head = 0; + struct stat stat; + char *buf; + event_t *event; + int ret; + unsigned long total = 0; + + page_size = getpagesize(); + + process_options(argc, argv); + + input = open(input_name, O_RDONLY); + if (input < 0) { + perror("failed to open file"); + exit(-1); + } + + ret = fstat(input, &stat); + if (ret < 0) { + perror("failed to stat file"); + exit(-1); + } + + load_kallsyms(); + +remap: + buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ, + MAP_SHARED, input, offset); + if (buf == MAP_FAILED) { + perror("failed to mmap file"); + exit(-1); + } + +more: + event = (event_t *)(buf + head); + + if (head + event->header.size >= page_size * mmap_window) { + unsigned long shift = page_size * (head / page_size); + + munmap(buf, page_size * mmap_window); + offset += shift; + head -= shift; + goto remap; + } + head += event->header.size; + + if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) { + std::string comm, sym, level; + char output[1024]; + + if (event->header.misc & PERF_EVENT_MISC_KERNEL) { + level = "[kernel]"; + sym = resolve_kernel_symbol(event->ip.ip); + } else if (event->header.misc & PERF_EVENT_MISC_USER) { + level = "[ user ]"; + sym = resolve_user_symbol(event->ip.pid, event->ip.ip); + } else { + level = "[ hv ]"; + } + comm = resolve_comm(event->ip.pid); + + snprintf(output, sizeof(output), "%16s %s %s", + comm.c_str(), level.c_str(), sym.c_str()); + hist[output]++; + + total++; + + } else switch (event->header.type) { + case PERF_EVENT_MMAP: + maps[event->mmap.pid].insert(map(&event->mmap)); + break; + + case PERF_EVENT_COMM: + comms[event->comm.pid] = std::string(event->comm.comm); + break; + } + + if (offset + head < stat.st_size) + goto more; + + close(input); + + std::map::iterator hi = hist.begin(); + + while (hi != hist.end()) { + rev_hist.insert(std::pair(hi->second, hi->first)); + hist.erase(hi++); + } + + std::multimap::const_iterator ri = rev_hist.begin(); + + while (ri != rev_hist.end()) { + printf(" %5.2f %s\n", (100.0 * ri->first)/total, ri->second.c_str()); + ri++; + } + + return 0; +} + -- cgit v0.10.2 From 4d855457d84b819fefcd1cd1b0a2a0a0ec475c07 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 8 Apr 2009 15:01:32 +0200 Subject: perf_counter: move PERF_RECORD_TIME Move PERF_RECORD_TIME so that all the fixed length items come before the variable length ones. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090408130409.307926436@chello.nl> Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index a70a55f..8bd1be5 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -100,9 +100,9 @@ enum sw_event_ids { enum perf_counter_record_format { PERF_RECORD_IP = 1U << 0, PERF_RECORD_TID = 1U << 1, - PERF_RECORD_GROUP = 1U << 2, - PERF_RECORD_CALLCHAIN = 1U << 3, - PERF_RECORD_TIME = 1U << 4, + PERF_RECORD_TIME = 1U << 2, + PERF_RECORD_GROUP = 1U << 3, + PERF_RECORD_CALLCHAIN = 1U << 4, }; /* @@ -250,6 +250,7 @@ enum perf_event_type { * * { u64 ip; } && PERF_RECORD_IP * { u32 pid, tid; } && PERF_RECORD_TID + * { u64 time; } && PERF_RECORD_TIME * * { u64 nr; * { u64 event, val; } cnt[nr]; } && PERF_RECORD_GROUP @@ -259,8 +260,6 @@ enum perf_event_type { * kernel, * user; * u64 ips[nr]; } && PERF_RECORD_CALLCHAIN - * - * { u64 time; } && PERF_RECORD_TIME * }; */ }; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 2d4aebb..4dc8600d2 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1850,6 +1850,16 @@ static void perf_counter_output(struct perf_counter *counter, header.size += sizeof(tid_entry); } + if (record_type & PERF_RECORD_TIME) { + /* + * Maybe do better on x86 and provide cpu_clock_nmi() + */ + time = sched_clock(); + + header.type |= PERF_RECORD_TIME; + header.size += sizeof(u64); + } + if (record_type & PERF_RECORD_GROUP) { header.type |= PERF_RECORD_GROUP; header.size += sizeof(u64) + @@ -1867,16 +1877,6 @@ static void perf_counter_output(struct perf_counter *counter, } } - if (record_type & PERF_RECORD_TIME) { - /* - * Maybe do better on x86 and provide cpu_clock_nmi() - */ - time = sched_clock(); - - header.type |= PERF_RECORD_TIME; - header.size += sizeof(u64); - } - ret = perf_output_begin(&handle, counter, header.size, nmi, 1); if (ret) return; @@ -1889,6 +1889,9 @@ static void perf_counter_output(struct perf_counter *counter, if (record_type & PERF_RECORD_TID) perf_output_put(&handle, tid_entry); + if (record_type & PERF_RECORD_TIME) + perf_output_put(&handle, time); + if (record_type & PERF_RECORD_GROUP) { struct perf_counter *leader, *sub; u64 nr = counter->nr_siblings; @@ -1910,9 +1913,6 @@ static void perf_counter_output(struct perf_counter *counter, if (callchain) perf_output_copy(&handle, callchain, callchain_size); - if (record_type & PERF_RECORD_TIME) - perf_output_put(&handle, time); - perf_output_end(&handle); } -- cgit v0.10.2 From 78f13e9525ba777da25c4ddab89f28e9366a8b7c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 8 Apr 2009 15:01:33 +0200 Subject: perf_counter: allow for data addresses to be recorded Paul suggested we allow for data addresses to be recorded along with the traditional IPs as power can provide these. For now, only the software pagefault events provide data addresses, but in the future power might as well for some events. x86 doesn't seem capable of providing this atm. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090408130409.394816925@chello.nl> Signed-off-by: Ingo Molnar diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index 0697ade..c9d019f 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -749,7 +749,7 @@ static void record_and_restart(struct perf_counter *counter, long val, * Finally record data if requested. */ if (record) - perf_counter_overflow(counter, 1, regs); + perf_counter_overflow(counter, 1, regs, 0); } /* diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 17bbf6f..ac0e112 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -171,7 +171,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, die("Weird page fault", regs, SIGSEGV); } - perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs); + perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs, address); /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the @@ -312,7 +312,8 @@ good_area: } if (ret & VM_FAULT_MAJOR) { current->maj_flt++; - perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, regs); + perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, + regs, address); #ifdef CONFIG_PPC_SMLPAR if (firmware_has_feature(FW_FEATURE_CMO)) { preempt_disable(); @@ -322,7 +323,8 @@ good_area: #endif } else { current->min_flt++; - perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, regs); + perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, + regs, address); } up_read(&mm->mmap_sem); return 0; diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 1116a41..0fcbaab 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -800,7 +800,7 @@ again: continue; perf_save_and_restart(counter); - if (perf_counter_overflow(counter, nmi, regs)) + if (perf_counter_overflow(counter, nmi, regs, 0)) __pmc_generic_disable(counter, &counter->hw, bit); } diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index f2d3324..6f9df2b 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1045,7 +1045,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) if (unlikely(error_code & PF_RSVD)) pgtable_bad(regs, error_code, address); - perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs); + perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs, address); /* * If we're in an interrupt, have no user context or are running @@ -1142,10 +1142,12 @@ good_area: if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; - perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, regs); + perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, + regs, address); } else { tsk->min_flt++; - perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, regs); + perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, + regs, address); } check_v8086_mode(regs, address, tsk); diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 8bd1be5..c22363a 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -101,8 +101,9 @@ enum perf_counter_record_format { PERF_RECORD_IP = 1U << 0, PERF_RECORD_TID = 1U << 1, PERF_RECORD_TIME = 1U << 2, - PERF_RECORD_GROUP = 1U << 3, - PERF_RECORD_CALLCHAIN = 1U << 4, + PERF_RECORD_ADDR = 1U << 3, + PERF_RECORD_GROUP = 1U << 4, + PERF_RECORD_CALLCHAIN = 1U << 5, }; /* @@ -251,6 +252,7 @@ enum perf_event_type { * { u64 ip; } && PERF_RECORD_IP * { u32 pid, tid; } && PERF_RECORD_TID * { u64 time; } && PERF_RECORD_TIME + * { u64 addr; } && PERF_RECORD_ADDR * * { u64 nr; * { u64 event, val; } cnt[nr]; } && PERF_RECORD_GROUP @@ -537,7 +539,7 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader, extern void perf_counter_update_userpage(struct perf_counter *counter); extern int perf_counter_overflow(struct perf_counter *counter, - int nmi, struct pt_regs *regs); + int nmi, struct pt_regs *regs, u64 addr); /* * Return 1 for a software counter, 0 for a hardware counter */ @@ -547,7 +549,7 @@ static inline int is_software_counter(struct perf_counter *counter) perf_event_type(&counter->hw_event) != PERF_TYPE_HARDWARE; } -extern void perf_swcounter_event(u32, u64, int, struct pt_regs *); +extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64); extern void perf_counter_mmap(unsigned long addr, unsigned long len, unsigned long pgoff, struct file *file); @@ -584,8 +586,8 @@ static inline int perf_counter_task_disable(void) { return -EINVAL; } static inline int perf_counter_task_enable(void) { return -EINVAL; } static inline void -perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs) { } - +perf_swcounter_event(u32 event, u64 nr, int nmi, + struct pt_regs *regs, u64 addr) { } static inline void perf_counter_mmap(unsigned long addr, unsigned long len, diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 4dc8600d2..321c57e 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -800,7 +800,7 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu) update_context_time(ctx); regs = task_pt_regs(task); - perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs); + perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs, 0); __perf_counter_sched_out(ctx, cpuctx); cpuctx->task_ctx = NULL; @@ -1810,7 +1810,7 @@ static void perf_output_end(struct perf_output_handle *handle) } static void perf_counter_output(struct perf_counter *counter, - int nmi, struct pt_regs *regs) + int nmi, struct pt_regs *regs, u64 addr) { int ret; u64 record_type = counter->hw_event.record_type; @@ -1860,6 +1860,11 @@ static void perf_counter_output(struct perf_counter *counter, header.size += sizeof(u64); } + if (record_type & PERF_RECORD_ADDR) { + header.type |= PERF_RECORD_ADDR; + header.size += sizeof(u64); + } + if (record_type & PERF_RECORD_GROUP) { header.type |= PERF_RECORD_GROUP; header.size += sizeof(u64) + @@ -1892,6 +1897,9 @@ static void perf_counter_output(struct perf_counter *counter, if (record_type & PERF_RECORD_TIME) perf_output_put(&handle, time); + if (record_type & PERF_RECORD_ADDR) + perf_output_put(&handle, addr); + if (record_type & PERF_RECORD_GROUP) { struct perf_counter *leader, *sub; u64 nr = counter->nr_siblings; @@ -2158,7 +2166,7 @@ void perf_counter_munmap(unsigned long addr, unsigned long len, */ int perf_counter_overflow(struct perf_counter *counter, - int nmi, struct pt_regs *regs) + int nmi, struct pt_regs *regs, u64 addr) { int events = atomic_read(&counter->event_limit); int ret = 0; @@ -2175,7 +2183,7 @@ int perf_counter_overflow(struct perf_counter *counter, perf_counter_disable(counter); } - perf_counter_output(counter, nmi, regs); + perf_counter_output(counter, nmi, regs, addr); return ret; } @@ -2240,7 +2248,7 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) regs = task_pt_regs(current); if (regs) { - if (perf_counter_overflow(counter, 0, regs)) + if (perf_counter_overflow(counter, 0, regs, 0)) ret = HRTIMER_NORESTART; } @@ -2250,11 +2258,11 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) } static void perf_swcounter_overflow(struct perf_counter *counter, - int nmi, struct pt_regs *regs) + int nmi, struct pt_regs *regs, u64 addr) { perf_swcounter_update(counter); perf_swcounter_set_period(counter); - if (perf_counter_overflow(counter, nmi, regs)) + if (perf_counter_overflow(counter, nmi, regs, addr)) /* soft-disable the counter */ ; @@ -2286,16 +2294,17 @@ static int perf_swcounter_match(struct perf_counter *counter, } static void perf_swcounter_add(struct perf_counter *counter, u64 nr, - int nmi, struct pt_regs *regs) + int nmi, struct pt_regs *regs, u64 addr) { int neg = atomic64_add_negative(nr, &counter->hw.count); if (counter->hw.irq_period && !neg) - perf_swcounter_overflow(counter, nmi, regs); + perf_swcounter_overflow(counter, nmi, regs, addr); } static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, enum perf_event_types type, u32 event, - u64 nr, int nmi, struct pt_regs *regs) + u64 nr, int nmi, struct pt_regs *regs, + u64 addr) { struct perf_counter *counter; @@ -2305,7 +2314,7 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, rcu_read_lock(); list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { if (perf_swcounter_match(counter, type, event, regs)) - perf_swcounter_add(counter, nr, nmi, regs); + perf_swcounter_add(counter, nr, nmi, regs, addr); } rcu_read_unlock(); } @@ -2325,7 +2334,8 @@ static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx) } static void __perf_swcounter_event(enum perf_event_types type, u32 event, - u64 nr, int nmi, struct pt_regs *regs) + u64 nr, int nmi, struct pt_regs *regs, + u64 addr) { struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); int *recursion = perf_swcounter_recursion_context(cpuctx); @@ -2336,10 +2346,11 @@ static void __perf_swcounter_event(enum perf_event_types type, u32 event, (*recursion)++; barrier(); - perf_swcounter_ctx_event(&cpuctx->ctx, type, event, nr, nmi, regs); + perf_swcounter_ctx_event(&cpuctx->ctx, type, event, + nr, nmi, regs, addr); if (cpuctx->task_ctx) { perf_swcounter_ctx_event(cpuctx->task_ctx, type, event, - nr, nmi, regs); + nr, nmi, regs, addr); } barrier(); @@ -2349,9 +2360,10 @@ out: put_cpu_var(perf_cpu_context); } -void perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs) +void +perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr) { - __perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, regs); + __perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, regs, addr); } static void perf_swcounter_read(struct perf_counter *counter) @@ -2548,7 +2560,7 @@ void perf_tpcounter_event(int event_id) if (!regs) regs = task_pt_regs(current); - __perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, regs); + __perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, regs, 0); } extern int ftrace_profile_enable(int); -- cgit v0.10.2 From 1c1452be2e9ae282a7316c3b23987811bd7acda6 Mon Sep 17 00:00:00 2001 From: Jonas Larsson Date: Tue, 31 Mar 2009 11:16:48 +0200 Subject: atmel-mci: Add support for inverted detect pin Same patch as before, modified to use bool. Also adds description of the new field in struct atmel_mci that I missed in the first patch. This patch adds Atmel MCI support for inverted detect pins. Signed-off-by: Jonas Larsson Acked-by: Pierre Ossman Signed-off-by: Haavard Skinnemoen diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c index cf6a100..7b603e4 100644 --- a/drivers/mmc/host/atmel-mci.c +++ b/drivers/mmc/host/atmel-mci.c @@ -177,6 +177,7 @@ struct atmel_mci { * available. * @wp_pin: GPIO pin used for card write protect sending, or negative * if not available. + * @detect_is_active_high: The state of the detect pin when it is active. * @detect_timer: Timer used for debouncing @detect_pin interrupts. */ struct atmel_mci_slot { @@ -196,6 +197,7 @@ struct atmel_mci_slot { int detect_pin; int wp_pin; + bool detect_is_active_high; struct timer_list detect_timer; }; @@ -924,7 +926,8 @@ static int atmci_get_cd(struct mmc_host *mmc) struct atmel_mci_slot *slot = mmc_priv(mmc); if (gpio_is_valid(slot->detect_pin)) { - present = !gpio_get_value(slot->detect_pin); + present = !(gpio_get_value(slot->detect_pin) ^ + slot->detect_is_active_high); dev_dbg(&mmc->class_dev, "card is %spresent\n", present ? "" : "not "); } @@ -1028,7 +1031,8 @@ static void atmci_detect_change(unsigned long data) return; enable_irq(gpio_to_irq(slot->detect_pin)); - present = !gpio_get_value(slot->detect_pin); + present = !(gpio_get_value(slot->detect_pin) ^ + slot->detect_is_active_high); present_old = test_bit(ATMCI_CARD_PRESENT, &slot->flags); dev_vdbg(&slot->mmc->class_dev, "detect change: %d (was %d)\n", @@ -1456,6 +1460,7 @@ static int __init atmci_init_slot(struct atmel_mci *host, slot->host = host; slot->detect_pin = slot_data->detect_pin; slot->wp_pin = slot_data->wp_pin; + slot->detect_is_active_high = slot_data->detect_is_active_high; slot->sdc_reg = sdc_reg; mmc->ops = &atmci_ops; @@ -1477,7 +1482,8 @@ static int __init atmci_init_slot(struct atmel_mci *host, if (gpio_request(slot->detect_pin, "mmc_detect")) { dev_dbg(&mmc->class_dev, "no detect pin available\n"); slot->detect_pin = -EBUSY; - } else if (gpio_get_value(slot->detect_pin)) { + } else if (gpio_get_value(slot->detect_pin) ^ + slot->detect_is_active_high) { clear_bit(ATMCI_CARD_PRESENT, &slot->flags); } } diff --git a/include/linux/atmel-mci.h b/include/linux/atmel-mci.h index 2f1f957..57b1846 100644 --- a/include/linux/atmel-mci.h +++ b/include/linux/atmel-mci.h @@ -10,6 +10,7 @@ * @bus_width: Number of data lines wired up the slot * @detect_pin: GPIO pin wired to the card detect switch * @wp_pin: GPIO pin wired to the write protect sensor + * @detect_is_active_high: The state of the detect pin when it is active * * If a given slot is not present on the board, @bus_width should be * set to 0. The other fields are ignored in this case. @@ -24,6 +25,7 @@ struct mci_slot_pdata { unsigned int bus_width; int detect_pin; int wp_pin; + bool detect_is_active_high; }; /** -- cgit v0.10.2 From 7ebcfcf19723254ebe90cdf8718436b9955a9a01 Mon Sep 17 00:00:00 2001 From: Jonas Larsson Date: Tue, 31 Mar 2009 11:16:52 +0200 Subject: avr32: Solves problem with inverted MCI detect pin on Merisc board Same patch as before, modified to use bool. This patch solves the problem with the inverted mci detect pin on Merisc boards. Signed-off-by: Jonas Larsson Signed-off-by: Haavard Skinnemoen diff --git a/arch/avr32/boards/merisc/setup.c b/arch/avr32/boards/merisc/setup.c index 20b300c..623b077 100644 --- a/arch/avr32/boards/merisc/setup.c +++ b/arch/avr32/boards/merisc/setup.c @@ -94,9 +94,10 @@ static struct spi_board_info __initdata spi0_board_info[] = { static struct mci_platform_data __initdata mci0_data = { .slot[0] = { - .bus_width = 4, - .detect_pin = GPIO_PIN_PE(19), - .wp_pin = GPIO_PIN_PE(20), + .bus_width = 4, + .detect_pin = GPIO_PIN_PE(19), + .wp_pin = GPIO_PIN_PE(20), + .detect_is_active_high = true, }, }; -- cgit v0.10.2 From e85abf8f432bb2a13733ab7609fbb8e1500af51d Mon Sep 17 00:00:00 2001 From: Gary Hade Date: Wed, 8 Apr 2009 14:07:25 -0700 Subject: x86: consolidate SMP code in io_apic.c Impact: Cleanup Reorganizes the code in arch/x86/kernel/io_apic.c by combining two '#ifdef CONFIG_SMP' regions. In addition to making the code easier to understand the first '#ifdef CONFIG_SMP' region is moved to a location later in the file which will reduce the need for function forward declarations when the code subsequently revised. The only changes other than relocating code to a different position in the file were the removal of the assign_irq_vector() forward declaration which was no longer needed and some line length reduction formatting changes. Signed-off-by: Gary Hade Cc: lcm@us.ibm.com LKML-Reference: <20090408210725.GC11159@us.ibm.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 767fe7e..7c9d045 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -518,120 +518,6 @@ static void ioapic_mask_entry(int apic, int pin) spin_unlock_irqrestore(&ioapic_lock, flags); } -#ifdef CONFIG_SMP -static void send_cleanup_vector(struct irq_cfg *cfg) -{ - cpumask_var_t cleanup_mask; - - if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { - unsigned int i; - cfg->move_cleanup_count = 0; - for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) - cfg->move_cleanup_count++; - for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) - apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); - } else { - cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); - cfg->move_cleanup_count = cpumask_weight(cleanup_mask); - apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); - free_cpumask_var(cleanup_mask); - } - cfg->move_in_progress = 0; -} - -static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) -{ - int apic, pin; - struct irq_pin_list *entry; - u8 vector = cfg->vector; - - entry = cfg->irq_2_pin; - for (;;) { - unsigned int reg; - - if (!entry) - break; - - apic = entry->apic; - pin = entry->pin; - /* - * With interrupt-remapping, destination information comes - * from interrupt-remapping table entry. - */ - if (!irq_remapped(irq)) - io_apic_write(apic, 0x11 + pin*2, dest); - reg = io_apic_read(apic, 0x10 + pin*2); - reg &= ~IO_APIC_REDIR_VECTOR_MASK; - reg |= vector; - io_apic_modify(apic, 0x10 + pin*2, reg); - if (!entry->next) - break; - entry = entry->next; - } -} - -static int -assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask); - -/* - * Either sets desc->affinity to a valid value, and returns - * ->cpu_mask_to_apicid of that, or returns BAD_APICID and - * leaves desc->affinity untouched. - */ -static unsigned int -set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) -{ - struct irq_cfg *cfg; - unsigned int irq; - - if (!cpumask_intersects(mask, cpu_online_mask)) - return BAD_APICID; - - irq = desc->irq; - cfg = desc->chip_data; - if (assign_irq_vector(irq, cfg, mask)) - return BAD_APICID; - - /* check that before desc->addinity get updated */ - set_extra_move_desc(desc, mask); - - cpumask_copy(desc->affinity, mask); - - return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain); -} - -static void -set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) -{ - struct irq_cfg *cfg; - unsigned long flags; - unsigned int dest; - unsigned int irq; - - irq = desc->irq; - cfg = desc->chip_data; - - spin_lock_irqsave(&ioapic_lock, flags); - dest = set_desc_affinity(desc, mask); - if (dest != BAD_APICID) { - /* Only the high 8 bits are valid. */ - dest = SET_APIC_LOGICAL_ID(dest); - __target_IO_APIC_irq(irq, dest, cfg); - } - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -static void -set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask) -{ - struct irq_desc *desc; - - desc = irq_to_desc(irq); - - set_ioapic_affinity_irq_desc(desc, mask); -} -#endif /* CONFIG_SMP */ - /* * The common case is 1:1 IRQ<->pin mappings. Sometimes there are * shared ISA-space IRQs, so we have to support them. We are super @@ -2360,6 +2246,115 @@ static int ioapic_retrigger_irq(unsigned int irq) */ #ifdef CONFIG_SMP +static void send_cleanup_vector(struct irq_cfg *cfg) +{ + cpumask_var_t cleanup_mask; + + if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { + unsigned int i; + cfg->move_cleanup_count = 0; + for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) + cfg->move_cleanup_count++; + for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) + apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); + } else { + cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); + cfg->move_cleanup_count = cpumask_weight(cleanup_mask); + apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + free_cpumask_var(cleanup_mask); + } + cfg->move_in_progress = 0; +} + +static void +__target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) +{ + int apic, pin; + struct irq_pin_list *entry; + u8 vector = cfg->vector; + + entry = cfg->irq_2_pin; + for (;;) { + unsigned int reg; + + if (!entry) + break; + + apic = entry->apic; + pin = entry->pin; + /* + * With interrupt-remapping, destination information comes + * from interrupt-remapping table entry. + */ + if (!irq_remapped(irq)) + io_apic_write(apic, 0x11 + pin*2, dest); + reg = io_apic_read(apic, 0x10 + pin*2); + reg &= ~IO_APIC_REDIR_VECTOR_MASK; + reg |= vector; + io_apic_modify(apic, 0x10 + pin*2, reg); + if (!entry->next) + break; + entry = entry->next; + } +} + +/* + * Either sets desc->affinity to a valid value, and returns + * ->cpu_mask_to_apicid of that, or returns BAD_APICID and + * leaves desc->affinity untouched. + */ +static unsigned int +set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) +{ + struct irq_cfg *cfg; + unsigned int irq; + + if (!cpumask_intersects(mask, cpu_online_mask)) + return BAD_APICID; + + irq = desc->irq; + cfg = desc->chip_data; + if (assign_irq_vector(irq, cfg, mask)) + return BAD_APICID; + + /* check that before desc->addinity get updated */ + set_extra_move_desc(desc, mask); + + cpumask_copy(desc->affinity, mask); + + return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain); +} + +static void +set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) +{ + struct irq_cfg *cfg; + unsigned long flags; + unsigned int dest; + unsigned int irq; + + irq = desc->irq; + cfg = desc->chip_data; + + spin_lock_irqsave(&ioapic_lock, flags); + dest = set_desc_affinity(desc, mask); + if (dest != BAD_APICID) { + /* Only the high 8 bits are valid. */ + dest = SET_APIC_LOGICAL_ID(dest); + __target_IO_APIC_irq(irq, dest, cfg); + } + spin_unlock_irqrestore(&ioapic_lock, flags); +} + +static void +set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask) +{ + struct irq_desc *desc; + + desc = irq_to_desc(irq); + + set_ioapic_affinity_irq_desc(desc, mask); +} #ifdef CONFIG_INTR_REMAP -- cgit v0.10.2 From 6c0b324435ff49fb3c68fe808a93853d81c7fb97 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 9 Apr 2009 09:27:37 +1000 Subject: perf_counter: add MAINTAINERS entry This adds an entry in MAINTAINERS for the perf_counter subsystem. Signed-off-by: Paul Mackerras Acked-by: Peter Zijlstra LKML-Reference: <18909.13033.345975.434902@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar diff --git a/MAINTAINERS b/MAINTAINERS index c3b2159..16fb45a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3483,6 +3483,16 @@ M: balbir@linux.vnet.ibm.com L: linux-kernel@vger.kernel.org S: Maintained +PERFORMANCE COUNTER SUBSYSTEM +P: Peter Zijlstra +M: a.p.zijlstra@chello.nl +P: Paul Mackerras +M: paulus@samba.org +P: Ingo Molnar +M: mingo@elte.hu +L: linux-kernel@vger.kernel.org +S: Supported + PERSONALITY HANDLING P: Christoph Hellwig M: hch@infradead.org -- cgit v0.10.2 From ca8f2d7f019a8547f39ddb9ed0144932f12807f2 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 9 Apr 2009 14:42:56 +1000 Subject: perf_counter: powerpc: add nmi_enter/nmi_exit calls Impact: fix potential deadlocks on powerpc Now that the core is using in_nmi() (added in e30e08f6, "perf_counter: fix NMI race in task clock"), we need the powerpc perf_counter_interrupt to call nmi_enter() and nmi_exit() in those cases where the interrupt happens when interrupts are soft-disabled. If interrupts were soft-enabled, we can treat it as a regular interrupt and do irq_enter/irq_exit around the whole routine. This lets us get rid of the test_perf_counter_pending() call at the end of perf_counter_interrupt, thus simplifying things a little. Signed-off-by: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <18909.31952.873098.336615@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index c9d019f..bd76d0f 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -714,7 +714,7 @@ hw_perf_counter_init(struct perf_counter *counter) * here so there is no possibility of being interrupted. */ static void record_and_restart(struct perf_counter *counter, long val, - struct pt_regs *regs) + struct pt_regs *regs, int nmi) { s64 prev, delta, left; int record = 0; @@ -749,7 +749,7 @@ static void record_and_restart(struct perf_counter *counter, long val, * Finally record data if requested. */ if (record) - perf_counter_overflow(counter, 1, regs, 0); + perf_counter_overflow(counter, nmi, regs, 0); } /* @@ -762,6 +762,17 @@ static void perf_counter_interrupt(struct pt_regs *regs) struct perf_counter *counter; long val; int found = 0; + int nmi; + + /* + * If interrupts were soft-disabled when this PMU interrupt + * occurred, treat it as an NMI. + */ + nmi = !regs->softe; + if (nmi) + nmi_enter(); + else + irq_enter(); for (i = 0; i < cpuhw->n_counters; ++i) { counter = cpuhw->counter[i]; @@ -769,7 +780,7 @@ static void perf_counter_interrupt(struct pt_regs *regs) if ((int)val < 0) { /* counter has overflowed */ found = 1; - record_and_restart(counter, val, regs); + record_and_restart(counter, val, regs, nmi); } } @@ -796,18 +807,10 @@ static void perf_counter_interrupt(struct pt_regs *regs) */ mtspr(SPRN_MMCR0, cpuhw->mmcr[0]); - /* - * If we need a wakeup, check whether interrupts were soft-enabled - * when we took the interrupt. If they were, we can wake stuff up - * immediately; otherwise we'll have do the wakeup when interrupts - * get soft-enabled. - */ - if (test_perf_counter_pending() && regs->softe) { - irq_enter(); - clear_perf_counter_pending(); - perf_counter_do_pending(); + if (nmi) + nmi_exit(); + else irq_exit(); - } } void hw_perf_counter_setup(int cpu) -- cgit v0.10.2 From d2de688891909b148efe83a6fc9520a9cd6015f0 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Thu, 9 Apr 2009 15:17:22 +1000 Subject: sparc64: extend TI_RESTART_BLOCK space by 8 bytes Impact: build fix Today's linux-next build (sparc64 defconfig) failed like this: arch/sparc/kernel/built-in.o: In function `trap_init': (.init.text+0x4): undefined reference to `thread_info_offsets_are_bolixed_dave' Caused by commit 52400ba946759af28442dee6265c5c0180ac7122 ("futex: add requeue_pi functionality") (from the tip-core tree) which changed the size of struct restart_block. Shift TI_KUNA_REGS and TI_KUNA_INSN up by 8 bytes to make space for the larger restart block. Signed-off-by: Stephen Rothwell Acked-by: "David S. Miller" Cc: Darren Hart LKML-Reference: <20090409151722.c8eabb56.sfr@canb.auug.org.au> Signed-off-by: Ingo Molnar diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h index 639ac80..6586572 100644 --- a/arch/sparc/include/asm/thread_info_64.h +++ b/arch/sparc/include/asm/thread_info_64.h @@ -102,8 +102,8 @@ struct thread_info { #define TI_KERN_CNTD1 0x00000488 #define TI_PCR 0x00000490 #define TI_RESTART_BLOCK 0x00000498 -#define TI_KUNA_REGS 0x000004c0 -#define TI_KUNA_INSN 0x000004c8 +#define TI_KUNA_REGS 0x000004c8 +#define TI_KUNA_INSN 0x000004d0 #define TI_FPREGS 0x00000500 /* We embed this in the uppermost byte of thread_info->flags */ -- cgit v0.10.2 From 002f128b473fb82f454654be5081b0919ee01ab2 Mon Sep 17 00:00:00 2001 From: Paul Turner Date: Wed, 8 Apr 2009 15:29:43 -0700 Subject: sched: remove redundant hierarchy walk in check_preempt_wakeup Impact: micro-optimization Under group scheduling we traverse up until we are at common siblings to make the wakeup comparison on. At this point however, they should have the same parent so continuing to check up the tree is redundant. Signed-off-by: Paul Turner Acked-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 3816f21..5f9650e 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1487,17 +1487,10 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) find_matching_se(&se, &pse); - while (se) { - BUG_ON(!pse); + BUG_ON(!pse); - if (wakeup_preempt_entity(se, pse) == 1) { - resched_task(curr); - break; - } - - se = parent_entity(se); - pse = parent_entity(pse); - } + if (wakeup_preempt_entity(se, pse) == 1) + resched_task(curr); } static struct task_struct *pick_next_task_fair(struct rq *rq) -- cgit v0.10.2 From 888fcee066a2f4abd0d0bc9418c0535f9b01e6e5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Apr 2009 09:48:22 +0200 Subject: perf_counter: fix off task->comm by one strlen() does not include the \0. Signed-off-by: Ingo Molnar diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 321c57e..b07195b 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1989,7 +1989,7 @@ static void perf_counter_comm_event(struct perf_comm_event *comm_event) unsigned int size; char *comm = comm_event->task->comm; - size = ALIGN(strlen(comm), sizeof(u64)); + size = ALIGN(strlen(comm)+1, sizeof(u64)); comm_event->comm = comm; comm_event->comm_size = size; @@ -2109,7 +2109,7 @@ static void perf_counter_mmap_event(struct perf_mmap_event *mmap_event) } got_name: - size = ALIGN(strlen(name), sizeof(u64)); + size = ALIGN(strlen(name)+1, sizeof(u64)); mmap_event->file_name = name; mmap_event->file_size = size; -- cgit v0.10.2 From b3828ebb3901adfe989d8d4157ed28247aeec132 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Apr 2009 09:50:04 +0200 Subject: perf_counter tools: include PID in perf-report output, tweak user/kernel printut It's handier than an entry. Also replace the kernel/user column with a more compact version: 0.52 cc1 [k] page_fault 0.57 :0 [k] _spin_lock 0.59 :7506 [.] 0.69 as [.] /usr/bin/as: 0.76 cc1 [.] /lib64/libc-2.8.so: _int_free 0.92 cc1 [k] clear_page_c 1.00 :7465 [.] 1.43 cc1 [.] /lib64/libc-2.8.so: memset 1.86 cc1 [.] /lib64/libc-2.8.so: _int_malloc 70.33 cc1 [.] /usr/libexec/gcc/x86_64-redhat-linux/4.3.2/cc1: Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/perf-report.cc b/Documentation/perf_counter/perf-report.cc index 09da0ba..1727317 100644 --- a/Documentation/perf_counter/perf-report.cc +++ b/Documentation/perf_counter/perf-report.cc @@ -277,10 +277,17 @@ static std::multimap rev_hist; static std::string resolve_comm(int pid) { - std::string comm = ""; + std::string comm; + std::map::const_iterator ci = comms.find(pid); - if (ci != comms.end()) + if (ci != comms.end()) { comm = ci->second; + } else { + char pid_str[30]; + + sprintf(pid_str, ":%d", pid); + comm = pid_str; + } return comm; } @@ -422,13 +429,13 @@ more: char output[1024]; if (event->header.misc & PERF_EVENT_MISC_KERNEL) { - level = "[kernel]"; + level = " [k] "; sym = resolve_kernel_symbol(event->ip.ip); } else if (event->header.misc & PERF_EVENT_MISC_USER) { - level = "[ user ]"; + level = " [.] "; sym = resolve_user_symbol(event->ip.pid, event->ip.ip); } else { - level = "[ hv ]"; + level = " [H] "; } comm = resolve_comm(event->ip.pid); -- cgit v0.10.2 From 9ee318a7825929bc3734110b83ae8e20e53d9de3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 9 Apr 2009 10:53:44 +0200 Subject: perf_counter: optimize mmap/comm tracking Impact: performance optimization The mmap/comm tracking code does quite a lot of work before it discovers there's no interest in it, avoid that by keeping a counter. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090409085524.427173196@chello.nl> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index b07195b..76376ec 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -38,6 +38,10 @@ int perf_max_counters __read_mostly = 1; static int perf_reserved_percpu __read_mostly; static int perf_overcommit __read_mostly = 1; +static atomic_t nr_mmap_tracking __read_mostly; +static atomic_t nr_munmap_tracking __read_mostly; +static atomic_t nr_comm_tracking __read_mostly; + /* * Mutex for (sysadmin-configurable) counter reservations: */ @@ -1186,6 +1190,13 @@ static void free_counter(struct perf_counter *counter) { perf_pending_sync(counter); + if (counter->hw_event.mmap) + atomic_dec(&nr_mmap_tracking); + if (counter->hw_event.munmap) + atomic_dec(&nr_munmap_tracking); + if (counter->hw_event.comm) + atomic_dec(&nr_comm_tracking); + if (counter->destroy) counter->destroy(counter); @@ -2005,7 +2016,12 @@ static void perf_counter_comm_event(struct perf_comm_event *comm_event) void perf_counter_comm(struct task_struct *task) { - struct perf_comm_event comm_event = { + struct perf_comm_event comm_event; + + if (!atomic_read(&nr_comm_tracking)) + return; + + comm_event = (struct perf_comm_event){ .task = task, .event = { .header = { .type = PERF_EVENT_COMM, }, @@ -2128,7 +2144,12 @@ got_name: void perf_counter_mmap(unsigned long addr, unsigned long len, unsigned long pgoff, struct file *file) { - struct perf_mmap_event mmap_event = { + struct perf_mmap_event mmap_event; + + if (!atomic_read(&nr_mmap_tracking)) + return; + + mmap_event = (struct perf_mmap_event){ .file = file, .event = { .header = { .type = PERF_EVENT_MMAP, }, @@ -2146,7 +2167,12 @@ void perf_counter_mmap(unsigned long addr, unsigned long len, void perf_counter_munmap(unsigned long addr, unsigned long len, unsigned long pgoff, struct file *file) { - struct perf_mmap_event mmap_event = { + struct perf_mmap_event mmap_event; + + if (!atomic_read(&nr_munmap_tracking)) + return; + + mmap_event = (struct perf_mmap_event){ .file = file, .event = { .header = { .type = PERF_EVENT_MUNMAP, }, @@ -2725,6 +2751,13 @@ done: counter->hw_ops = hw_ops; + if (counter->hw_event.mmap) + atomic_inc(&nr_mmap_tracking); + if (counter->hw_event.munmap) + atomic_inc(&nr_munmap_tracking); + if (counter->hw_event.comm) + atomic_inc(&nr_comm_tracking); + return counter; } -- cgit v0.10.2 From 1ccd15497869f3ed83b5225d410df53a96e52757 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 9 Apr 2009 10:53:45 +0200 Subject: perf_counter: sysctl for system wide perf counters Impact: add sysctl for paranoid/relaxed perfcounters policy Allow the use of system wide perf counters to everybody, but provide a sysctl to disable it for the paranoid security minded. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090409085524.514046352@chello.nl> Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index c22363a..9814328 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -568,6 +568,8 @@ struct perf_callchain_entry { extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); +extern int sysctl_perf_counter_priv; + #else static inline void perf_counter_task_sched_in(struct task_struct *task, int cpu) { } diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 76376ec..7efb7eb 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -42,6 +42,8 @@ static atomic_t nr_mmap_tracking __read_mostly; static atomic_t nr_munmap_tracking __read_mostly; static atomic_t nr_comm_tracking __read_mostly; +int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */ + /* * Mutex for (sysadmin-configurable) counter reservations: */ @@ -1132,7 +1134,7 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu) */ if (cpu != -1) { /* Must be root to operate on a CPU counter: */ - if (!capable(CAP_SYS_ADMIN)) + if (sysctl_perf_counter_priv && !capable(CAP_SYS_ADMIN)) return ERR_PTR(-EACCES); if (cpu < 0 || cpu > num_possible_cpus()) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 4286b62..8ba4578 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -920,6 +921,16 @@ static struct ctl_table kern_table[] = { .child = slow_work_sysctls, }, #endif +#ifdef CONFIG_PERF_COUNTERS + { + .ctl_name = CTL_UNNUMBERED, + .procname = "perf_counter_privileged", + .data = &sysctl_perf_counter_priv, + .maxlen = sizeof(sysctl_perf_counter_priv), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif /* * NOTE: do not add new entries to this table unless you have read * Documentation/sysctl/ctl_unnumbered.txt -- cgit v0.10.2 From d3d21c412d8525eb2e208d990ab5eee5fb0fe03d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 9 Apr 2009 10:53:46 +0200 Subject: perf_counter: log full path names Impact: fix perf-report output for /home mounted binaries, etc. dentry_path() only provide path-names up to the mount root, which is unsuited for out purpose, use d_path() instead. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Corey Ashford LKML-Reference: <20090409085524.601794134@chello.nl> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 7efb7eb..7f9521c 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -2116,7 +2116,7 @@ static void perf_counter_mmap_event(struct perf_mmap_event *mmap_event) name = strncpy(tmp, "//enomem", sizeof(tmp)); goto got_name; } - name = dentry_path(file->f_dentry, buf, PATH_MAX); + name = d_path(&file->f_path, buf, PATH_MAX); if (IS_ERR(name)) { name = strncpy(tmp, "//toolong", sizeof(tmp)); goto got_name; -- cgit v0.10.2 From 894bf92fdec9909fefcfe907786c6c6944a22052 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 9 Apr 2009 12:34:40 +0300 Subject: ASoC: tlv320aic23: add DSP_A format support Add DSP_A interface format support by setting the LRP bit in DSP mode. Signed-off-by: Peter Ujfalusi Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/tlv320aic23.c b/sound/soc/codecs/tlv320aic23.c index c3f4afb..21f69df 100644 --- a/sound/soc/codecs/tlv320aic23.c +++ b/sound/soc/codecs/tlv320aic23.c @@ -523,6 +523,8 @@ static int tlv320aic23_set_dai_fmt(struct snd_soc_dai *codec_dai, case SND_SOC_DAIFMT_I2S: iface_reg |= TLV320AIC23_FOR_I2S; break; + case SND_SOC_DAIFMT_DSP_A: + iface_reg |= TLV320AIC23_LRP_ON; case SND_SOC_DAIFMT_DSP_B: iface_reg |= TLV320AIC23_FOR_DSP; break; -- cgit v0.10.2 From e7c064889606aab3569669078c69b87b2c527e72 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sat, 7 Mar 2009 23:48:41 -0800 Subject: xen: add FIX_TEXT_POKE to fixmap FIX_TEXT_POKE[01] are used to map kernel addresses, so they're mapping pfns, not mfns. Signed-off-by: Jeremy Fitzhardinge diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 77b242c..a96f5b9 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1812,6 +1812,9 @@ static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot) #ifdef CONFIG_X86_LOCAL_APIC case FIX_APIC_BASE: /* maps dummy local APIC */ #endif + case FIX_TEXT_POKE0: + case FIX_TEXT_POKE1: + /* All local page mappings */ pte = pfn_pte(phys, prot); break; -- cgit v0.10.2 From 7a734e7dd93b9aea08ed51036a9a0e2c9dfd8dac Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 1 Apr 2009 18:08:28 -0700 Subject: x86, setup: "glove box" BIOS calls -- infrastructure Impact: new interfaces (not yet used) For all the platforms out there, there is an infinite number of buggy BIOSes. This adds infrastructure to treat BIOS interrupts more like toxic waste and "glove box" them -- we switch out the register set, perform the BIOS interrupt, and then restore the previous state. LKML-Reference: <49DE7F79.4030106@zytor.com> Signed-off-by: H. Peter Anvin Cc: Pavel Machek Cc: Rafael J. Wysocki diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 6633b6e..658bc52 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -26,9 +26,10 @@ targets := vmlinux.bin setup.bin setup.elf bzImage targets += fdimage fdimage144 fdimage288 image.iso mtools.conf subdir- := compressed -setup-y += a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o +setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpucheck.o edd.o setup-y += header.o main.o mca.o memory.o pm.o pmjump.o -setup-y += printf.o string.o tty.o video.o video-mode.o version.o +setup-y += printf.o regs.o string.o tty.o video.o video-mode.o +setup-y += version.o setup-$(CONFIG_X86_APM_BOOT) += apm.o # The link order of the video-*.o modules can matter. In particular, diff --git a/arch/x86/boot/bioscall.S b/arch/x86/boot/bioscall.S new file mode 100644 index 0000000..22b4b3e --- /dev/null +++ b/arch/x86/boot/bioscall.S @@ -0,0 +1,82 @@ +/* ----------------------------------------------------------------------- + * + * Copyright 2009 Intel Corporation; author H. Peter Anvin + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2 or (at your + * option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * "Glove box" for BIOS calls. Avoids the constant problems with BIOSes + * touching memory they shouldn't be. + */ + + .code16 + .text + .globl intcall + .type intcall, @function +intcall: + /* Self-modify the INT instruction. Ugly, but works. */ + cmpb %al, 3f + je 1f + movb %al, 3f + jmp 1f /* Synchronize pipeline */ +1: + /* Save state */ + pushfl + pushw %fs + pushw %gs + pushal + + /* Copy input state to stack frame */ + subw $44, %sp + movw %dx, %si + movw %sp, %di + movw $11, %cx + rep; movsd + + /* Pop full state from the stack */ + popal + popw %gs + popw %fs + popw %es + popw %ds + popfl + + /* Actual INT */ + .byte 0xcd /* INT opcode */ +3: .byte 0 + + /* Push full state to the stack */ + pushfl + pushw %ds + pushw %es + pushw %fs + pushw %gs + pushal + + /* Re-establish C environment invariants */ + cld + movzwl %sp, %esp + movw %cs, %ax + movw %ax, %ds + movw %ax, %es + + /* Copy output state from stack frame */ + movw 68(%esp), %di /* Original %cx == 3rd argument */ + andw %di, %di + jz 4f + movw %sp, %si + movw $11, %cx + rep; movsd +4: addw $44, %sp + + /* Restore state and return */ + popal + popw %gs + popw %fs + popfl + retl + .size intcall, .-intcall diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index 7b2692e..98239d2 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -2,6 +2,7 @@ * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin * * This file is part of the Linux kernel, and is made available under * the terms of the GNU General Public License version 2. @@ -26,6 +27,7 @@ #include #include "bitops.h" #include +#include /* Useful macros */ #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) @@ -241,6 +243,49 @@ int enable_a20(void); /* apm.c */ int query_apm_bios(void); +/* bioscall.c */ +struct biosregs { + union { + struct { + u32 edi; + u32 esi; + u32 ebp; + u32 _esp; + u32 ebx; + u32 edx; + u32 ecx; + u32 eax; + u32 _fsgs; + u32 _dses; + u32 eflags; + }; + struct { + u16 di, hdi; + u16 si, hsi; + u16 bp, hbp; + u16 _sp, _hsp; + u16 bx, hbx; + u16 dx, hdx; + u16 cx, hcx; + u16 ax, hax; + u16 gs, fs; + u16 es, ds; + u16 flags, hflags; + }; + struct { + u8 dil, dih, edi2, edi3; + u8 sil, sih, esi2, esi3; + u8 bpl, bph, ebp2, ebp3; + u8 _spl, _sph, _esp2, _esp3; + u8 bl, bh, ebx2, ebx3; + u8 dl, dh, edx2, edx3; + u8 cl, ch, ecx2, ecx3; + u8 al, ah, eax2, eax3; + }; + }; +}; +void intcall(u8 int_no, const struct biosregs *ireg, struct biosregs *oreg); + /* cmdline.c */ int cmdline_find_option(const char *option, char *buffer, int bufsize); int cmdline_find_option_bool(const char *option); @@ -279,6 +324,9 @@ int sprintf(char *buf, const char *fmt, ...); int vsprintf(char *buf, const char *fmt, va_list args); int printf(const char *fmt, ...); +/* regs.c */ +void initregs(struct biosregs *regs); + /* string.c */ int strcmp(const char *str1, const char *str2); size_t strnlen(const char *s, size_t maxlen); diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 5d84d1c..486d97f 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -221,7 +221,7 @@ setup_data: .quad 0 # 64-bit physical pointer to # End of setup header ##################################################### - .section ".inittext", "ax" + .section ".entrytext", "ax" start_of_setup: #ifdef SAFE_RESET_DISK_CONTROLLER # Reset the disk controller. diff --git a/arch/x86/boot/regs.c b/arch/x86/boot/regs.c new file mode 100644 index 0000000..958019b --- /dev/null +++ b/arch/x86/boot/regs.c @@ -0,0 +1,29 @@ +/* ----------------------------------------------------------------------- + * + * Copyright 2009 Intel Corporation; author H. Peter Anvin + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2 or (at your + * option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * Simple helper function for initializing a register set. + * + * Note that this sets EFLAGS_CF in the input register set; this + * makes it easier to catch functions which do nothing but don't + * explicitly set CF. + */ + +#include "boot.h" + +void initregs(struct biosregs *reg) +{ + memset(reg, 0, sizeof *reg); + reg->eflags |= X86_EFLAGS_CF; + reg->ds = ds(); + reg->es = ds(); + reg->fs = fs(); + reg->gs = gs(); +} diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld index bb8dc2d..0f6ec455 100644 --- a/arch/x86/boot/setup.ld +++ b/arch/x86/boot/setup.ld @@ -15,8 +15,11 @@ SECTIONS . = 497; .header : { *(.header) } + .entrytext : { *(.entrytext) } .inittext : { *(.inittext) } .initdata : { *(.initdata) } + __end_init = .; + .text : { *(.text) } .text32 : { *(.text32) } @@ -52,4 +55,7 @@ SECTIONS . = ASSERT(_end <= 0x8000, "Setup too big!"); . = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!"); + /* Necessary for the very-old-loader check to work... */ + . = ASSERT(__end_init <= 5*512, "init sections too big!"); + } diff --git a/arch/x86/kernel/acpi/realmode/Makefile b/arch/x86/kernel/acpi/realmode/Makefile index 1c31cc0..167bc16 100644 --- a/arch/x86/kernel/acpi/realmode/Makefile +++ b/arch/x86/kernel/acpi/realmode/Makefile @@ -9,7 +9,7 @@ always := wakeup.bin targets := wakeup.elf wakeup.lds -wakeup-y += wakeup.o wakemain.o video-mode.o copy.o +wakeup-y += wakeup.o wakemain.o video-mode.o copy.o bioscall.o regs.o # The link order of the video-*.o modules can matter. In particular, # video-vga.o *must* be listed first, followed by video-vesa.o. diff --git a/arch/x86/kernel/acpi/realmode/bioscall.S b/arch/x86/kernel/acpi/realmode/bioscall.S new file mode 100644 index 0000000..f51eb0b --- /dev/null +++ b/arch/x86/kernel/acpi/realmode/bioscall.S @@ -0,0 +1 @@ +#include "../../../boot/bioscall.S" diff --git a/arch/x86/kernel/acpi/realmode/regs.c b/arch/x86/kernel/acpi/realmode/regs.c new file mode 100644 index 0000000..6206033 --- /dev/null +++ b/arch/x86/kernel/acpi/realmode/regs.c @@ -0,0 +1 @@ +#include "../../../boot/regs.c" -- cgit v0.10.2 From df7699c56421c0476704f24a43409ac8c505f3d2 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 1 Apr 2009 18:13:46 -0700 Subject: x86, setup: "glove box" BIOS interrupts in the core boot code Impact: BIOS proofing "Glove box" off BIOS interrupts in the core boot code. LKML-Reference: <49DE7F79.4030106@zytor.com> Signed-off-by: H. Peter Anvin diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c index 7c19ce8..64a31a6 100644 --- a/arch/x86/boot/a20.c +++ b/arch/x86/boot/a20.c @@ -2,7 +2,7 @@ * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007-2008 rPath, Inc. - All Rights Reserved - * Copyright 2009 Intel Corporation + * Copyright 2009 Intel Corporation; author H. Peter Anvin * * This file is part of the Linux kernel, and is made available under * the terms of the GNU General Public License version 2. @@ -90,8 +90,11 @@ static int a20_test_long(void) static void enable_a20_bios(void) { - asm volatile("pushfl; int $0x15; popfl" - : : "a" ((u16)0x2401)); + struct biosregs ireg; + + initregs(&ireg); + ireg.ax = 0x2401; + intcall(0x15, &ireg, NULL); } static void enable_a20_kbc(void) diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c index 58f0415..140172b 100644 --- a/arch/x86/boot/main.c +++ b/arch/x86/boot/main.c @@ -2,6 +2,7 @@ * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin * * This file is part of the Linux kernel, and is made available under * the terms of the GNU General Public License version 2. @@ -61,11 +62,10 @@ static void copy_boot_params(void) */ static void keyboard_set_repeat(void) { - u16 ax = 0x0305; - u16 bx = 0; - asm volatile("int $0x16" - : "+a" (ax), "+b" (bx) - : : "ecx", "edx", "esi", "edi"); + struct biosregs ireg; + initregs(&ireg); + ireg.ax = 0x0305; + intcall(0x16, &ireg, NULL); } /* @@ -73,18 +73,22 @@ static void keyboard_set_repeat(void) */ static void query_ist(void) { + struct biosregs ireg, oreg; + /* Some older BIOSes apparently crash on this call, so filter it from machines too old to have SpeedStep at all. */ if (cpu.level < 6) return; - asm("int $0x15" - : "=a" (boot_params.ist_info.signature), - "=b" (boot_params.ist_info.command), - "=c" (boot_params.ist_info.event), - "=d" (boot_params.ist_info.perf_level) - : "a" (0x0000e980), /* IST Support */ - "d" (0x47534943)); /* Request value */ + initregs(&ireg); + ireg.ax = 0xe980; /* IST Support */ + ireg.edx = 0x47534943; /* Request value */ + intcall(0x15, &ireg, &oreg); + + boot_params.ist_info.signature = oreg.eax; + boot_params.ist_info.command = oreg.ebx; + boot_params.ist_info.event = oreg.ecx; + boot_params.ist_info.perf_level = oreg.edx; } /* @@ -93,13 +97,12 @@ static void query_ist(void) static void set_bios_mode(void) { #ifdef CONFIG_X86_64 - u32 eax, ebx; + struct biosregs ireg; - eax = 0xec00; - ebx = 2; - asm volatile("int $0x15" - : "+a" (eax), "+b" (ebx) - : : "ecx", "edx", "esi", "edi"); + initregs(&ireg); + ireg.ax = 0xec00; + ireg.bx = 2; + intcall(0x15, &ireg, NULL); #endif } diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c index 5054c2d..d989de8 100644 --- a/arch/x86/boot/memory.c +++ b/arch/x86/boot/memory.c @@ -25,12 +25,16 @@ struct e820_ext_entry { static int detect_memory_e820(void) { int count = 0; - u32 next = 0; - u32 size, id, edi; - u8 err; + struct biosregs ireg, oreg; struct e820entry *desc = boot_params.e820_map; static struct e820_ext_entry buf; /* static so it is zeroed */ + initregs(&ireg); + ireg.ax = 0xe820; + ireg.cx = sizeof buf; + ireg.edx = SMAP; + ireg.di = (size_t)&buf; + /* * Set this here so that if the BIOS doesn't change this field * but still doesn't change %ecx, we're still okay... @@ -38,22 +42,13 @@ static int detect_memory_e820(void) buf.ext_flags = 1; do { - size = sizeof buf; - - /* Important: %edx and %esi are clobbered by some BIOSes, - so they must be either used for the error output - or explicitly marked clobbered. Given that, assume there - is something out there clobbering %ebp and %edi, too. */ - asm("pushl %%ebp; int $0x15; popl %%ebp; setc %0" - : "=d" (err), "+b" (next), "=a" (id), "+c" (size), - "=D" (edi), "+m" (buf) - : "D" (&buf), "d" (SMAP), "a" (0xe820) - : "esi"); + intcall(0x15, &ireg, &oreg); + ireg.ebx = oreg.ebx; /* for next iteration... */ /* BIOSes which terminate the chain with CF = 1 as opposed to %ebx = 0 don't always report the SMAP signature on the final, failing, probe. */ - if (err) + if (oreg.eflags & X86_EFLAGS_CF) break; /* Some BIOSes stop returning SMAP in the middle of @@ -61,7 +56,7 @@ static int detect_memory_e820(void) screwed up the map at that point, we might have a partial map, the full map, or complete garbage, so just return failure. */ - if (id != SMAP) { + if (oreg.eax != SMAP) { count = 0; break; } @@ -69,58 +64,62 @@ static int detect_memory_e820(void) /* ACPI 3.0 added the extended flags support. If bit 0 in the extended flags is zero, we're supposed to simply ignore the entry -- a backwards incompatible change! */ - if (size > 20 && !(buf.ext_flags & 1)) + if (oreg.cx > 20 && !(buf.ext_flags & 1)) continue; *desc++ = buf.std; count++; - } while (next && count < ARRAY_SIZE(boot_params.e820_map)); + } while (ireg.ebx && count < ARRAY_SIZE(boot_params.e820_map)); return boot_params.e820_entries = count; } static int detect_memory_e801(void) { - u16 ax, bx, cx, dx; - u8 err; + struct biosregs ireg, oreg; - bx = cx = dx = 0; - ax = 0xe801; - asm("stc; int $0x15; setc %0" - : "=m" (err), "+a" (ax), "+b" (bx), "+c" (cx), "+d" (dx)); + initregs(&ireg); + ireg.ax = 0xe801; + intcall(0x15, &ireg, &oreg); - if (err) + if (oreg.eflags & X86_EFLAGS_CF) return -1; /* Do we really need to do this? */ - if (cx || dx) { - ax = cx; - bx = dx; + if (oreg.cx || oreg.dx) { + oreg.ax = oreg.cx; + oreg.bx = oreg.dx; } - if (ax > 15*1024) + if (oreg.ax > 15*1024) { return -1; /* Bogus! */ - - /* This ignores memory above 16MB if we have a memory hole - there. If someone actually finds a machine with a memory - hole at 16MB and no support for 0E820h they should probably - generate a fake e820 map. */ - boot_params.alt_mem_k = (ax == 15*1024) ? (dx << 6)+ax : ax; + } else if (oreg.ax == 15*1024) { + boot_params.alt_mem_k = (oreg.dx << 6) + oreg.ax; + } else { + /* + * This ignores memory above 16MB if we have a memory + * hole there. If someone actually finds a machine + * with a memory hole at 16MB and no support for + * 0E820h they should probably generate a fake e820 + * map. + */ + boot_params.alt_mem_k = oreg.ax; + } return 0; } static int detect_memory_88(void) { - u16 ax; - u8 err; + struct biosregs ireg, oreg; - ax = 0x8800; - asm("stc; int $0x15; setc %0" : "=bcdm" (err), "+a" (ax)); + initregs(&ireg); + ireg.ah = 0x88; + intcall(0x15, &ireg, &oreg); - boot_params.screen_info.ext_mem_k = ax; + boot_params.screen_info.ext_mem_k = oreg.ax; - return -err; + return -(oreg.eflags & X86_EFLAGS_CF); /* 0 or -1 */ } int detect_memory(void) diff --git a/arch/x86/boot/tty.c b/arch/x86/boot/tty.c index 7e8e8b2..01ec69c 100644 --- a/arch/x86/boot/tty.c +++ b/arch/x86/boot/tty.c @@ -2,6 +2,7 @@ * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin * * This file is part of the Linux kernel, and is made available under * the terms of the GNU General Public License version 2. @@ -22,24 +23,23 @@ void __attribute__((section(".inittext"))) putchar(int ch) { - unsigned char c = ch; + struct biosregs ireg; - if (c == '\n') + if (ch == '\n') putchar('\r'); /* \n -> \r\n */ - /* int $0x10 is known to have bugs involving touching registers - it shouldn't. Be extra conservative... */ - asm volatile("pushal; pushw %%ds; int $0x10; popw %%ds; popal" - : : "b" (0x0007), "c" (0x0001), "a" (0x0e00|ch)); + initregs(&ireg); + ireg.bx = 0x0007; + ireg.cx = 0x0001; + ireg.ah = 0x0e; + ireg.al = ch; + intcall(0x10, &ireg, NULL); } void __attribute__((section(".inittext"))) puts(const char *str) { - int n = 0; - while (*str) { + while (*str) putchar(*str++); - n++; - } } /* @@ -49,14 +49,13 @@ void __attribute__((section(".inittext"))) puts(const char *str) static u8 gettime(void) { - u16 ax = 0x0200; - u16 cx, dx; + struct biosregs ireg, oreg; - asm volatile("int $0x1a" - : "+a" (ax), "=c" (cx), "=d" (dx) - : : "ebx", "esi", "edi"); + initregs(&ireg); + ireg.ah = 0x02; + intcall(0x1a, &ireg, &oreg); - return dx >> 8; + return oreg.dh; } /* @@ -64,19 +63,24 @@ static u8 gettime(void) */ int getchar(void) { - u16 ax = 0; - asm volatile("int $0x16" : "+a" (ax)); + struct biosregs ireg, oreg; + + initregs(&ireg); + /* ireg.ah = 0x00; */ + intcall(0x16, &ireg, &oreg); - return ax & 0xff; + return oreg.al; } static int kbd_pending(void) { - u8 pending; - asm volatile("int $0x16; setnz %0" - : "=qm" (pending) - : "a" (0x0100)); - return pending; + struct biosregs ireg, oreg; + + initregs(&ireg); + ireg.ah = 0x01; + intcall(0x16, &ireg, &oreg); + + return !(oreg.eflags & X86_EFLAGS_ZF); } void kbd_flush(void) -- cgit v0.10.2 From d54ea252e4c92357226992cf65d94616a96e6fce Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 1 Apr 2009 18:14:26 -0700 Subject: x86, setup: "glove box" BIOS interrupts in the APM code Impact: BIOS proofing "Glove box" off BIOS interrupts in the APM code. LKML-Reference: <49DE7F79.4030106@zytor.com> Signed-off-by: H. Peter Anvin Cc: Stephen Rothwell diff --git a/arch/x86/boot/apm.c b/arch/x86/boot/apm.c index 7aa6033..ee27483 100644 --- a/arch/x86/boot/apm.c +++ b/arch/x86/boot/apm.c @@ -2,6 +2,7 @@ * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin * * Original APM BIOS checking by Stephen Rothwell, May 1994 * (sfr@canb.auug.org.au) @@ -19,75 +20,56 @@ int query_apm_bios(void) { - u16 ax, bx, cx, dx, di; - u32 ebx, esi; - u8 err; + struct biosregs ireg, oreg; /* APM BIOS installation check */ - ax = 0x5300; - bx = cx = 0; - asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp ; setc %0" - : "=d" (err), "+a" (ax), "+b" (bx), "+c" (cx) - : : "esi", "edi"); + initregs(&ireg); + ireg.ah = 0x53; + intcall(0x15, &ireg, &oreg); - if (err) + if (oreg.flags & X86_EFLAGS_CF) return -1; /* No APM BIOS */ - if (bx != 0x504d) /* "PM" signature */ + if (oreg.bx != 0x504d) /* "PM" signature */ return -1; - if (!(cx & 0x02)) /* 32 bits supported? */ + if (!(oreg.cx & 0x02)) /* 32 bits supported? */ return -1; /* Disconnect first, just in case */ - ax = 0x5304; - bx = 0; - asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp" - : "+a" (ax), "+b" (bx) - : : "ecx", "edx", "esi", "edi"); - - /* Paranoia */ - ebx = esi = 0; - cx = dx = di = 0; + ireg.al = 0x04; + intcall(0x15, &ireg, NULL); /* 32-bit connect */ - asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp ; setc %6" - : "=a" (ax), "+b" (ebx), "+c" (cx), "+d" (dx), - "+S" (esi), "+D" (di), "=m" (err) - : "a" (0x5303)); - - boot_params.apm_bios_info.cseg = ax; - boot_params.apm_bios_info.offset = ebx; - boot_params.apm_bios_info.cseg_16 = cx; - boot_params.apm_bios_info.dseg = dx; - boot_params.apm_bios_info.cseg_len = (u16)esi; - boot_params.apm_bios_info.cseg_16_len = esi >> 16; - boot_params.apm_bios_info.dseg_len = di; - - if (err) + ireg.al = 0x03; + intcall(0x15, &ireg, &oreg); + + boot_params.apm_bios_info.cseg = oreg.ax; + boot_params.apm_bios_info.offset = oreg.ebx; + boot_params.apm_bios_info.cseg_16 = oreg.cx; + boot_params.apm_bios_info.dseg = oreg.dx; + boot_params.apm_bios_info.cseg_len = oreg.si; + boot_params.apm_bios_info.cseg_16_len = oreg.hsi; + boot_params.apm_bios_info.dseg_len = oreg.di; + + if (oreg.flags & X86_EFLAGS_CF) return -1; /* Redo the installation check as the 32-bit connect; some BIOSes return different flags this way... */ - ax = 0x5300; - bx = cx = 0; - asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp ; setc %0" - : "=d" (err), "+a" (ax), "+b" (bx), "+c" (cx) - : : "esi", "edi"); + ireg.al = 0x00; + intcall(0x15, &ireg, &oreg); - if (err || bx != 0x504d) { + if ((oreg.eflags & X86_EFLAGS_CF) || oreg.bx != 0x504d) { /* Failure with 32-bit connect, try to disconect and ignore */ - ax = 0x5304; - bx = 0; - asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp" - : "+a" (ax), "+b" (bx) - : : "ecx", "edx", "esi", "edi"); + ireg.al = 0x04; + intcall(0x15, &ireg, NULL); return -1; } - boot_params.apm_bios_info.version = ax; - boot_params.apm_bios_info.flags = cx; + boot_params.apm_bios_info.version = oreg.ax; + boot_params.apm_bios_info.flags = oreg.cx; return 0; } -- cgit v0.10.2 From 3435d3476c5ed955d56a6216ed2d156847b3a575 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 1 Apr 2009 18:17:17 -0700 Subject: x86, setup: "glove box" BIOS interrupts in the EDD code Impact: BIOS proofing "Glove box" off BIOS interrupts in the EDD code. LKML-Reference: <49DE7F79.4030106@zytor.com> Signed-off-by: H. Peter Anvin diff --git a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c index 1aae8f3..c501a5b 100644 --- a/arch/x86/boot/edd.c +++ b/arch/x86/boot/edd.c @@ -2,6 +2,7 @@ * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin * * This file is part of the Linux kernel, and is made available under * the terms of the GNU General Public License version 2. @@ -22,17 +23,17 @@ */ static int read_mbr(u8 devno, void *buf) { - u16 ax, bx, cx, dx; + struct biosregs ireg, oreg; - ax = 0x0201; /* Legacy Read, one sector */ - cx = 0x0001; /* Sector 0-0-1 */ - dx = devno; - bx = (size_t)buf; - asm volatile("pushfl; stc; int $0x13; setc %%al; popfl" - : "+a" (ax), "+c" (cx), "+d" (dx), "+b" (bx) - : : "esi", "edi", "memory"); + initregs(&ireg); + ireg.ax = 0x0201; /* Legacy Read, one sector */ + ireg.cx = 0x0001; /* Sector 0-0-1 */ + ireg.dl = devno; + ireg.bx = (size_t)buf; - return -(u8)ax; /* 0 or -1 */ + intcall(0x13, &ireg, &oreg); + + return -(oreg.eflags & X86_EFLAGS_CF); /* 0 or -1 */ } static u32 read_mbr_sig(u8 devno, struct edd_info *ei, u32 *mbrsig) @@ -72,56 +73,46 @@ static u32 read_mbr_sig(u8 devno, struct edd_info *ei, u32 *mbrsig) static int get_edd_info(u8 devno, struct edd_info *ei) { - u16 ax, bx, cx, dx, di; + struct biosregs ireg, oreg; memset(ei, 0, sizeof *ei); /* Check Extensions Present */ - ax = 0x4100; - bx = EDDMAGIC1; - dx = devno; - asm("pushfl; stc; int $0x13; setc %%al; popfl" - : "+a" (ax), "+b" (bx), "=c" (cx), "+d" (dx) - : : "esi", "edi"); + initregs(&ireg); + ireg.ah = 0x41; + ireg.bx = EDDMAGIC1; + ireg.dl = devno; + intcall(0x13, &ireg, &oreg); - if ((u8)ax) + if (oreg.eflags & X86_EFLAGS_CF) return -1; /* No extended information */ - if (bx != EDDMAGIC2) + if (oreg.bx != EDDMAGIC2) return -1; ei->device = devno; - ei->version = ax >> 8; /* EDD version number */ - ei->interface_support = cx; /* EDD functionality subsets */ + ei->version = oreg.ah; /* EDD version number */ + ei->interface_support = oreg.cx; /* EDD functionality subsets */ /* Extended Get Device Parameters */ ei->params.length = sizeof(ei->params); - ax = 0x4800; - dx = devno; - asm("pushfl; int $0x13; popfl" - : "+a" (ax), "+d" (dx), "=m" (ei->params) - : "S" (&ei->params) - : "ebx", "ecx", "edi"); + ireg.ah = 0x48; + ireg.si = (size_t)&ei->params; + intcall(0x13, &ireg, &oreg); /* Get legacy CHS parameters */ /* Ralf Brown recommends setting ES:DI to 0:0 */ - ax = 0x0800; - dx = devno; - di = 0; - asm("pushw %%es; " - "movw %%di,%%es; " - "pushfl; stc; int $0x13; setc %%al; popfl; " - "popw %%es" - : "+a" (ax), "=b" (bx), "=c" (cx), "+d" (dx), "+D" (di) - : : "esi"); - - if ((u8)ax == 0) { - ei->legacy_max_cylinder = (cx >> 8) + ((cx & 0xc0) << 2); - ei->legacy_max_head = dx >> 8; - ei->legacy_sectors_per_track = cx & 0x3f; + ireg.ah = 0x08; + ireg.es = 0; + intcall(0x13, &ireg, &oreg); + + if (!(oreg.eflags & X86_EFLAGS_CF)) { + ei->legacy_max_cylinder = oreg.ch + ((oreg.cl & 0xc0) << 2); + ei->legacy_max_head = oreg.dh; + ei->legacy_sectors_per_track = oreg.cl & 0x3f; } return 0; -- cgit v0.10.2 From 0a706db320768f8f6e43bbf73b58d2aabdc93354 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 1 Apr 2009 18:19:00 -0700 Subject: x86, setup: "glove box" BIOS interrupts in the MCA code Impact: BIOS proofing "Glove box" off BIOS interrupts in the MCA code. LKML-Reference: <49DE7F79.4030106@zytor.com> Signed-off-by: H. Peter Anvin Cc: James Bottomley diff --git a/arch/x86/boot/mca.c b/arch/x86/boot/mca.c index 911eaae..a95a531 100644 --- a/arch/x86/boot/mca.c +++ b/arch/x86/boot/mca.c @@ -2,6 +2,7 @@ * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin * * This file is part of the Linux kernel, and is made available under * the terms of the GNU General Public License version 2. @@ -16,26 +17,22 @@ int query_mca(void) { - u8 err; - u16 es, bx, len; - - asm("pushw %%es ; " - "int $0x15 ; " - "setc %0 ; " - "movw %%es, %1 ; " - "popw %%es" - : "=acd" (err), "=acdSD" (es), "=b" (bx) - : "a" (0xc000)); - - if (err) + struct biosregs ireg, oreg; + u16 len; + + initregs(&ireg); + ireg.ah = 0xc0; + intcall(0x15, &ireg, &oreg); + + if (oreg.eflags & X86_EFLAGS_CF) return -1; /* No MCA present */ - set_fs(es); - len = rdfs16(bx); + set_fs(oreg.es); + len = rdfs16(oreg.bx); if (len > sizeof(boot_params.sys_desc_table)) len = sizeof(boot_params.sys_desc_table); - copy_from_fs(&boot_params.sys_desc_table, bx, len); + copy_from_fs(&boot_params.sys_desc_table, oreg.bx, len); return 0; } -- cgit v0.10.2 From cf06de7b9cdd3efee7a59dced1977b3c21d43732 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 1 Apr 2009 18:20:11 -0700 Subject: x86, setup: "glove box" BIOS interrupts in the video code Impact: BIOS proofing "Glove box" off BIOS interrupts in the video code. LKML-Reference: <49DE7F79.4030106@zytor.com> Signed-off-by: H. Peter Anvin Cc: Pavel Machek Cc: Rafael J. Wysocki diff --git a/arch/x86/boot/video-bios.c b/arch/x86/boot/video-bios.c index 3fa979c..d660be4 100644 --- a/arch/x86/boot/video-bios.c +++ b/arch/x86/boot/video-bios.c @@ -2,6 +2,7 @@ * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin * * This file is part of the Linux kernel, and is made available under * the terms of the GNU General Public License version 2. @@ -29,21 +30,21 @@ static int bios_set_mode(struct mode_info *mi) static int set_bios_mode(u8 mode) { - u16 ax; + struct biosregs ireg, oreg; u8 new_mode; - ax = mode; /* AH=0x00 Set Video Mode */ - asm volatile(INT10 - : "+a" (ax) - : : "ebx", "ecx", "edx", "esi", "edi"); + initregs(&ireg); + ireg.al = mode; /* AH=0x00 Set Video Mode */ + intcall(0x10, &ireg, NULL); - ax = 0x0f00; /* Get Current Video Mode */ - asm volatile(INT10 - : "+a" (ax) - : : "ebx", "ecx", "edx", "esi", "edi"); + + ireg.ah = 0x0f; /* Get Current Video Mode */ + intcall(0x10, &ireg, &oreg); do_restore = 1; /* Assume video contents were lost */ - new_mode = ax & 0x7f; /* Not all BIOSes are clean with the top bit */ + + /* Not all BIOSes are clean with the top bit */ + new_mode = ireg.al & 0x7f; if (new_mode == mode) return 0; /* Mode change OK */ @@ -53,10 +54,8 @@ static int set_bios_mode(u8 mode) /* Mode setting failed, but we didn't end up where we started. That's bad. Try to revert to the original video mode. */ - ax = boot_params.screen_info.orig_video_mode; - asm volatile(INT10 - : "+a" (ax) - : : "ebx", "ecx", "edx", "esi", "edi"); + ireg.ax = boot_params.screen_info.orig_video_mode; + intcall(0x10, &ireg, NULL); } #endif return -1; diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c index 4a58c8c..c700147 100644 --- a/arch/x86/boot/video-vesa.c +++ b/arch/x86/boot/video-vesa.c @@ -2,6 +2,7 @@ * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin * * This file is part of the Linux kernel, and is made available under * the terms of the GNU General Public License version 2. @@ -31,7 +32,7 @@ static inline void vesa_store_mode_params_graphics(void) {} static int vesa_probe(void) { #if defined(CONFIG_VIDEO_VESA) || defined(CONFIG_FIRMWARE_EDID) - u16 ax, cx, di; + struct biosregs ireg, oreg; u16 mode; addr_t mode_ptr; struct mode_info *mi; @@ -39,13 +40,12 @@ static int vesa_probe(void) video_vesa.modes = GET_HEAP(struct mode_info, 0); - ax = 0x4f00; - di = (size_t)&vginfo; - asm(INT10 - : "+a" (ax), "+D" (di), "=m" (vginfo) - : : "ebx", "ecx", "edx", "esi"); + initregs(&ireg); + ireg.ax = 0x4f00; + ireg.di = (size_t)&vginfo; + intcall(0x10, &ireg, &oreg); - if (ax != 0x004f || + if (ireg.ax != 0x004f || vginfo.signature != VESA_MAGIC || vginfo.version < 0x0102) return 0; /* Not present */ @@ -65,14 +65,12 @@ static int vesa_probe(void) memset(&vminfo, 0, sizeof vminfo); /* Just in case... */ - ax = 0x4f01; - cx = mode; - di = (size_t)&vminfo; - asm(INT10 - : "+a" (ax), "+c" (cx), "+D" (di), "=m" (vminfo) - : : "ebx", "edx", "esi"); + ireg.ax = 0x4f01; + ireg.cx = mode; + ireg.di = (size_t)&vminfo; + intcall(0x10, &ireg, &oreg); - if (ax != 0x004f) + if (ireg.ax != 0x004f) continue; if ((vminfo.mode_attr & 0x15) == 0x05) { @@ -111,20 +109,19 @@ static int vesa_probe(void) static int vesa_set_mode(struct mode_info *mode) { - u16 ax, bx, cx, di; + struct biosregs ireg, oreg; int is_graphic; u16 vesa_mode = mode->mode - VIDEO_FIRST_VESA; memset(&vminfo, 0, sizeof vminfo); /* Just in case... */ - ax = 0x4f01; - cx = vesa_mode; - di = (size_t)&vminfo; - asm(INT10 - : "+a" (ax), "+c" (cx), "+D" (di), "=m" (vminfo) - : : "ebx", "edx", "esi"); + initregs(&ireg); + ireg.ax = 0x4f01; + ireg.cx = vesa_mode; + ireg.di = (size_t)&vminfo; + intcall(0x10, &ireg, &oreg); - if (ax != 0x004f) + if (oreg.ax != 0x004f) return -1; if ((vminfo.mode_attr & 0x15) == 0x05) { @@ -141,14 +138,12 @@ static int vesa_set_mode(struct mode_info *mode) } - ax = 0x4f02; - bx = vesa_mode; - di = 0; - asm volatile(INT10 - : "+a" (ax), "+b" (bx), "+D" (di) - : : "ecx", "edx", "esi"); + initregs(&ireg); + ireg.ax = 0x4f02; + ireg.bx = vesa_mode; + intcall(0x10, &ireg, &oreg); - if (ax != 0x004f) + if (oreg.ax != 0x004f) return -1; graphic_mode = is_graphic; @@ -171,50 +166,45 @@ static int vesa_set_mode(struct mode_info *mode) /* Switch DAC to 8-bit mode */ static void vesa_dac_set_8bits(void) { + struct biosregs ireg, oreg; u8 dac_size = 6; /* If possible, switch the DAC to 8-bit mode */ if (vginfo.capabilities & 1) { - u16 ax, bx; - - ax = 0x4f08; - bx = 0x0800; - asm volatile(INT10 - : "+a" (ax), "+b" (bx) - : : "ecx", "edx", "esi", "edi"); - - if (ax == 0x004f) - dac_size = bx >> 8; + initregs(&ireg); + ireg.ax = 0x4f08; + ireg.bh = 0x08; + intcall(0x10, &ireg, &oreg); + if (oreg.ax == 0x004f) + dac_size = oreg.bh; } /* Set the color sizes to the DAC size, and offsets to 0 */ - boot_params.screen_info.red_size = dac_size; + boot_params.screen_info.red_size = dac_size; boot_params.screen_info.green_size = dac_size; - boot_params.screen_info.blue_size = dac_size; - boot_params.screen_info.rsvd_size = dac_size; + boot_params.screen_info.blue_size = dac_size; + boot_params.screen_info.rsvd_size = dac_size; - boot_params.screen_info.red_pos = 0; - boot_params.screen_info.green_pos = 0; - boot_params.screen_info.blue_pos = 0; - boot_params.screen_info.rsvd_pos = 0; + boot_params.screen_info.red_pos = 0; + boot_params.screen_info.green_pos = 0; + boot_params.screen_info.blue_pos = 0; + boot_params.screen_info.rsvd_pos = 0; } /* Save the VESA protected mode info */ static void vesa_store_pm_info(void) { - u16 ax, bx, di, es; + struct biosregs ireg, oreg; - ax = 0x4f0a; - bx = di = 0; - asm("pushw %%es; "INT10"; movw %%es,%0; popw %%es" - : "=d" (es), "+a" (ax), "+b" (bx), "+D" (di) - : : "ecx", "esi"); + initregs(&ireg); + ireg.ax = 0x4f0a; + intcall(0x10, &ireg, &oreg); - if (ax != 0x004f) + if (oreg.ax != 0x004f) return; - boot_params.screen_info.vesapm_seg = es; - boot_params.screen_info.vesapm_off = di; + boot_params.screen_info.vesapm_seg = oreg.es; + boot_params.screen_info.vesapm_off = oreg.di; } /* @@ -252,7 +242,7 @@ static void vesa_store_mode_params_graphics(void) void vesa_store_edid(void) { #ifdef CONFIG_FIRMWARE_EDID - u16 ax, bx, cx, dx, di; + struct biosregs ireg, oreg; /* Apparently used as a nonsense token... */ memset(&boot_params.edid_info, 0x13, sizeof boot_params.edid_info); @@ -260,33 +250,26 @@ void vesa_store_edid(void) if (vginfo.version < 0x0200) return; /* EDID requires VBE 2.0+ */ - ax = 0x4f15; /* VBE DDC */ - bx = 0x0000; /* Report DDC capabilities */ - cx = 0; /* Controller 0 */ - di = 0; /* ES:DI must be 0 by spec */ - - /* Note: The VBE DDC spec is different from the main VESA spec; - we genuinely have to assume all registers are destroyed here. */ - - asm("pushw %%es; movw %2,%%es; "INT10"; popw %%es" - : "+a" (ax), "+b" (bx), "+c" (cx), "+D" (di) - : : "esi", "edx"); + initregs(&ireg); + ireg.ax = 0x4f15; /* VBE DDC */ + /* ireg.bx = 0x0000; */ /* Report DDC capabilities */ + /* ireg.cx = 0; */ /* Controller 0 */ + ireg.es = 0; /* ES:DI must be 0 by spec */ + intcall(0x10, &ireg, &oreg); - if (ax != 0x004f) + if (oreg.ax != 0x004f) return; /* No EDID */ /* BH = time in seconds to transfer EDD information */ /* BL = DDC level supported */ - ax = 0x4f15; /* VBE DDC */ - bx = 0x0001; /* Read EDID */ - cx = 0; /* Controller 0 */ - dx = 0; /* EDID block number */ - di =(size_t) &boot_params.edid_info; /* (ES:)Pointer to block */ - asm(INT10 - : "+a" (ax), "+b" (bx), "+d" (dx), "=m" (boot_params.edid_info), - "+c" (cx), "+D" (di) - : : "esi"); + ireg.ax = 0x4f15; /* VBE DDC */ + ireg.bx = 0x0001; /* Read EDID */ + /* ireg.cx = 0; */ /* Controller 0 */ + /* ireg.dx = 0; */ /* EDID block number */ + ireg.es = ds(); + ireg.di =(size_t)&boot_params.edid_info; /* (ES:)Pointer to block */ + intcall(0x10, &ireg, &oreg); #endif /* CONFIG_FIRMWARE_EDID */ } diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c index 9e0587a..8f8d827 100644 --- a/arch/x86/boot/video-vga.c +++ b/arch/x86/boot/video-vga.c @@ -2,6 +2,7 @@ * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin * * This file is part of the Linux kernel, and is made available under * the terms of the GNU General Public License version 2. @@ -39,30 +40,30 @@ static __videocard video_vga; /* Set basic 80x25 mode */ static u8 vga_set_basic_mode(void) { + struct biosregs ireg, oreg; u16 ax; u8 rows; u8 mode; + initregs(&ireg); + #ifdef CONFIG_VIDEO_400_HACK if (adapter >= ADAPTER_VGA) { - asm volatile(INT10 - : : "a" (0x1202), "b" (0x0030) - : "ecx", "edx", "esi", "edi"); + ireg.ax = 0x1202; + ireg.bx = 0x0030; + intcall(0x10, &ireg, NULL); } #endif ax = 0x0f00; - asm volatile(INT10 - : "+a" (ax) - : : "ebx", "ecx", "edx", "esi", "edi"); - - mode = (u8)ax; + intcall(0x10, &ireg, &oreg); + mode = oreg.al; set_fs(0); rows = rdfs8(0x484); /* rows minus one */ #ifndef CONFIG_VIDEO_400_HACK - if ((ax == 0x5003 || ax == 0x5007) && + if ((oreg.ax == 0x5003 || oreg.ax == 0x5007) && (rows == 0 || rows == 24)) return mode; #endif @@ -71,10 +72,8 @@ static u8 vga_set_basic_mode(void) mode = 3; /* Set the mode */ - ax = mode; - asm volatile(INT10 - : "+a" (ax) - : : "ebx", "ecx", "edx", "esi", "edi"); + ireg.ax = mode; /* AH=0: set mode */ + intcall(0x10, &ireg, NULL); do_restore = 1; return mode; } @@ -82,43 +81,69 @@ static u8 vga_set_basic_mode(void) static void vga_set_8font(void) { /* Set 8x8 font - 80x43 on EGA, 80x50 on VGA */ + struct biosregs ireg; + + initregs(&ireg); /* Set 8x8 font */ - asm volatile(INT10 : : "a" (0x1112), "b" (0)); + ireg.ax = 0x1112; + /* ireg.bl = 0; */ + intcall(0x10, &ireg, NULL); /* Use alternate print screen */ - asm volatile(INT10 : : "a" (0x1200), "b" (0x20)); + ireg.ax = 0x1200; + ireg.bl = 0x20; + intcall(0x10, &ireg, NULL); /* Turn off cursor emulation */ - asm volatile(INT10 : : "a" (0x1201), "b" (0x34)); + ireg.ax = 0x1201; + ireg.bl = 0x34; + intcall(0x10, &ireg, NULL); /* Cursor is scan lines 6-7 */ - asm volatile(INT10 : : "a" (0x0100), "c" (0x0607)); + ireg.ax = 0x0100; + ireg.cx = 0x0607; + intcall(0x10, &ireg, NULL); } static void vga_set_14font(void) { /* Set 9x14 font - 80x28 on VGA */ + struct biosregs ireg; + + initregs(&ireg); /* Set 9x14 font */ - asm volatile(INT10 : : "a" (0x1111), "b" (0)); + ireg.ax = 0x1111; + /* ireg.bl = 0; */ + intcall(0x10, &ireg, NULL); /* Turn off cursor emulation */ - asm volatile(INT10 : : "a" (0x1201), "b" (0x34)); + ireg.ax = 0x1201; + ireg.bl = 0x34; + intcall(0x10, &ireg, NULL); /* Cursor is scan lines 11-12 */ - asm volatile(INT10 : : "a" (0x0100), "c" (0x0b0c)); + ireg.ax = 0x0100; + ireg.cx = 0x0b0c; + intcall(0x10, &ireg, NULL); } static void vga_set_80x43(void) { /* Set 80x43 mode on VGA (not EGA) */ + struct biosregs ireg; + + initregs(&ireg); /* Set 350 scans */ - asm volatile(INT10 : : "a" (0x1201), "b" (0x30)); + ireg.ax = 0x1201; + ireg.bl = 0x30; + intcall(0x10, &ireg, NULL); /* Reset video mode */ - asm volatile(INT10 : : "a" (0x0003)); + ireg.ax = 0x0003; + intcall(0x10, &ireg, NULL); vga_set_8font(); } @@ -225,8 +250,6 @@ static int vga_set_mode(struct mode_info *mode) */ static int vga_probe(void) { - u16 ega_bx; - static const char *card_name[] = { "CGA/MDA/HGC", "EGA", "VGA" }; @@ -240,26 +263,26 @@ static int vga_probe(void) sizeof(ega_modes)/sizeof(struct mode_info), sizeof(vga_modes)/sizeof(struct mode_info), }; - u8 vga_flag; - asm(INT10 - : "=b" (ega_bx) - : "a" (0x1200), "b" (0x10) /* Check EGA/VGA */ - : "ecx", "edx", "esi", "edi"); + struct biosregs ireg, oreg; + + initregs(&ireg); + + ireg.ax = 0x1200; + ireg.bl = 0x10; /* Check EGA/VGA */ + intcall(0x10, &ireg, &oreg); #ifndef _WAKEUP - boot_params.screen_info.orig_video_ega_bx = ega_bx; + boot_params.screen_info.orig_video_ega_bx = oreg.bx; #endif /* If we have MDA/CGA/HGC then BL will be unchanged at 0x10 */ - if ((u8)ega_bx != 0x10) { + if (oreg.bl != 0x10) { /* EGA/VGA */ - asm(INT10 - : "=a" (vga_flag) - : "a" (0x1a00) - : "ebx", "ecx", "edx", "esi", "edi"); + ireg.ax = 0x1a00; + intcall(0x10, &ireg, &oreg); - if (vga_flag == 0x1a) { + if (oreg.al == 0x1a) { adapter = ADAPTER_VGA; #ifndef _WAKEUP boot_params.screen_info.orig_video_isVGA = 1; diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c index 3bef2c1..bad728b 100644 --- a/arch/x86/boot/video.c +++ b/arch/x86/boot/video.c @@ -2,6 +2,7 @@ * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright 2007 rPath, Inc. - All Rights Reserved + * Copyright 2009 Intel Corporation; author H. Peter Anvin * * This file is part of the Linux kernel, and is made available under * the terms of the GNU General Public License version 2. @@ -18,33 +19,29 @@ static void store_cursor_position(void) { - u16 curpos; - u16 ax, bx; + struct biosregs ireg, oreg; - ax = 0x0300; - bx = 0; - asm(INT10 - : "=d" (curpos), "+a" (ax), "+b" (bx) - : : "ecx", "esi", "edi"); + initregs(&ireg); + ireg.ah = 0x03; + intcall(0x10, &ireg, &oreg); - boot_params.screen_info.orig_x = curpos; - boot_params.screen_info.orig_y = curpos >> 8; + boot_params.screen_info.orig_x = oreg.dl; + boot_params.screen_info.orig_y = oreg.dh; } static void store_video_mode(void) { - u16 ax, page; + struct biosregs ireg, oreg; /* N.B.: the saving of the video page here is a bit silly, since we pretty much assume page 0 everywhere. */ - ax = 0x0f00; - asm(INT10 - : "+a" (ax), "=b" (page) - : : "ecx", "edx", "esi", "edi"); + initregs(&ireg); + ireg.ah = 0x0f; + intcall(0x10, &ireg, &oreg); /* Not all BIOSes are clean with respect to the top bit */ - boot_params.screen_info.orig_video_mode = ax & 0x7f; - boot_params.screen_info.orig_video_page = page >> 8; + boot_params.screen_info.orig_video_mode = oreg.al & 0x7f; + boot_params.screen_info.orig_video_page = oreg.bh; } /* @@ -257,7 +254,7 @@ static void restore_screen(void) int y; addr_t dst = 0; u16 *src = saved.data; - u16 ax, bx, dx; + struct biosregs ireg; if (graphic_mode) return; /* Can't restore onto a graphic mode */ @@ -296,12 +293,11 @@ static void restore_screen(void) } /* Restore cursor position */ - ax = 0x0200; /* Set cursor position */ - bx = 0; /* Page number (<< 8) */ - dx = (saved.cury << 8)+saved.curx; - asm volatile(INT10 - : "+a" (ax), "+b" (bx), "+d" (dx) - : : "ecx", "esi", "edi"); + initregs(&ireg); + ireg.ah = 0x02; /* Set cursor position */ + ireg.dh = saved.cury; + ireg.dl = saved.curx; + intcall(0x10, &ireg, NULL); } #else #define save_screen() ((void)0) diff --git a/arch/x86/boot/video.h b/arch/x86/boot/video.h index ee63f5d..5bb174a 100644 --- a/arch/x86/boot/video.h +++ b/arch/x86/boot/video.h @@ -112,20 +112,6 @@ extern int force_x, force_y; /* Don't query the BIOS for cols/rows */ extern int do_restore; /* Restore screen contents */ extern int graphic_mode; /* Graphics mode with linear frame buffer */ -/* - * int $0x10 is notorious for touching registers it shouldn't. - * gcc doesn't like %ebp being clobbered, so define it as a push/pop - * sequence here. - * - * A number of systems, including the original PC can clobber %bp in - * certain circumstances, like when scrolling. There exists at least - * one Trident video card which could clobber DS under a set of - * circumstances that we are unlikely to encounter (scrolling when - * using an extended graphics mode of more than 800x600 pixels), but - * it's cheap insurance to deal with that here. - */ -#define INT10 "pushl %%ebp; pushw %%ds; int $0x10; popw %%ds; popl %%ebp" - /* Accessing VGA indexed registers */ static inline u8 in_idx(u16 port, u8 index) { -- cgit v0.10.2 From 2062501ae6505dbc5bff3a792246c2661d114050 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 6 Apr 2009 01:49:33 +0200 Subject: tracing/lockdep: report the time waited for a lock While trying to optimize the new lock on reiserfs to replace the bkl, I find the lock tracing very useful though it lacks something important for performance (and latency) instrumentation: the time a task waits for a lock. That's what this patch implements: bash-4816 [000] 202.652815: lock_contended: lock_contended: &sb->s_type->i_mutex_key bash-4816 [000] 202.652819: lock_acquired: &rq->lock (0.000 us) <...>-4787 [000] 202.652825: lock_acquired: &rq->lock (0.000 us) <...>-4787 [000] 202.652829: lock_acquired: &rq->lock (0.000 us) bash-4816 [000] 202.652833: lock_acquired: &sb->s_type->i_mutex_key (16.005 us) As shown above, the "lock acquired" field is followed by the time it has been waiting for the lock. Usually, a lock contended entry is followed by a near lock_acquired entry with a non-zero time waited. Signed-off-by: Frederic Weisbecker Acked-by: Peter Zijlstra Cc: Steven Rostedt LKML-Reference: <1238975373-15739-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/include/trace/lockdep_event_types.h b/include/trace/lockdep_event_types.h index adccfcd..863f1e4 100644 --- a/include/trace/lockdep_event_types.h +++ b/include/trace/lockdep_event_types.h @@ -32,11 +32,24 @@ TRACE_FORMAT(lock_contended, TP_FMT("%s", lock->name) ); -TRACE_FORMAT(lock_acquired, - TP_PROTO(struct lockdep_map *lock, unsigned long ip), - TP_ARGS(lock, ip), - TP_FMT("%s", lock->name) - ); +TRACE_EVENT(lock_acquired, + TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime), + + TP_ARGS(lock, ip, waittime), + + TP_STRUCT__entry( + __field(const char *, name) + __field(unsigned long, wait_usec) + __field(unsigned long, wait_nsec_rem) + ), + TP_fast_assign( + __entry->name = lock->name; + __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC); + __entry->wait_usec = (unsigned long) waittime; + ), + TP_printk("%s (%lu.%03lu us)", __entry->name, __entry->wait_usec, + __entry->wait_nsec_rem) +); #endif #endif diff --git a/kernel/lockdep.c b/kernel/lockdep.c index b0f0118..c4582a6 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -3061,6 +3061,8 @@ found_it: put_lock_stats(stats); } +DEFINE_TRACE(lock_acquired); + static void __lock_acquired(struct lockdep_map *lock, unsigned long ip) { @@ -3099,6 +3101,8 @@ found_it: hlock->holdtime_stamp = now; } + trace_lock_acquired(lock, ip, waittime); + stats = get_lock_stats(hlock_class(hlock)); if (waittime) { if (hlock->read) @@ -3137,14 +3141,10 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip) } EXPORT_SYMBOL_GPL(lock_contended); -DEFINE_TRACE(lock_acquired); - void lock_acquired(struct lockdep_map *lock, unsigned long ip) { unsigned long flags; - trace_lock_acquired(lock, ip); - if (unlikely(!lock_stat)) return; -- cgit v0.10.2 From e71e99c294058a61b7a8b9bb6da2f745ac51aa4f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 25 Mar 2009 14:30:04 -0400 Subject: x86, function-graph: only save return values on x86_64 Impact: speed up The return to handler portion of the function graph tracer should only need to save the return values. The caller already saved off the registers that the callee can modify. The returning function already saved the registers it modified. When we call our own trace function it too will save the registers that the callee must restore. There's no reason to save off anything more that the registers used to return the values. Note, I did a complete kernel build with this modification and the function graph tracer running on x86_64. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index a331ec3..1ac9986 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -147,27 +147,14 @@ END(ftrace_graph_caller) GLOBAL(return_to_handler) subq $80, %rsp + /* Save the return values */ movq %rax, (%rsp) - movq %rcx, 8(%rsp) - movq %rdx, 16(%rsp) - movq %rsi, 24(%rsp) - movq %rdi, 32(%rsp) - movq %r8, 40(%rsp) - movq %r9, 48(%rsp) - movq %r10, 56(%rsp) - movq %r11, 64(%rsp) + movq %rdx, 8(%rsp) call ftrace_return_to_handler movq %rax, 72(%rsp) - movq 64(%rsp), %r11 - movq 56(%rsp), %r10 - movq 48(%rsp), %r9 - movq 40(%rsp), %r8 - movq 32(%rsp), %rdi - movq 24(%rsp), %rsi - movq 16(%rsp), %rdx - movq 8(%rsp), %rcx + movq 8(%rsp), %rdx movq (%rsp), %rax addq $72, %rsp retq -- cgit v0.10.2 From 5cb3d1d9d34ac04bcaa2034139345b2a5fea54c1 Mon Sep 17 00:00:00 2001 From: Zhaolei Date: Thu, 9 Apr 2009 14:08:18 +0800 Subject: tracing, net, skb tracepoint: make skb tracepoint use the TRACE_EVENT() macro TRACE_EVENT is a more generic way to define a tracepoint. Doing so adds these new capabilities to this tracepoint: - zero-copy and per-cpu splice() tracing - binary tracing without printf overhead - structured logging records exposed under /debug/tracing/events - trace events embedded in function tracer output and other plugins - user-defined, per tracepoint filter expressions Signed-off-by: Zhao Lei Acked-by: Neil Horman Cc: "David S. Miller" Cc: Arnaldo Carvalho de Melo Cc: "Steven Rostedt ;" Cc: Frederic Weisbecker Cc: Tom Zanussi LKML-Reference: <49DD90D2.5020604@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/include/trace/skb.h b/include/trace/skb.h index b66206d..d2de717 100644 --- a/include/trace/skb.h +++ b/include/trace/skb.h @@ -4,8 +4,6 @@ #include #include -DECLARE_TRACE(kfree_skb, - TP_PROTO(struct sk_buff *skb, void *location), - TP_ARGS(skb, location)); +#include #endif diff --git a/include/trace/skb_event_types.h b/include/trace/skb_event_types.h new file mode 100644 index 0000000..4a1c504 --- /dev/null +++ b/include/trace/skb_event_types.h @@ -0,0 +1,38 @@ + +/* use instead */ +#ifndef TRACE_EVENT +# error Do not include this file directly. +# error Unless you know what you are doing. +#endif + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM skb + +/* + * Tracepoint for free an sk_buff: + */ +TRACE_EVENT(kfree_skb, + + TP_PROTO(struct sk_buff *skb, void *location), + + TP_ARGS(skb, location), + + TP_STRUCT__entry( + __field( void *, skbaddr ) + __field( unsigned short, protocol ) + __field( void *, location ) + ), + + TP_fast_assign( + __entry->skbaddr = skb; + if (skb) { + __entry->protocol = ntohs(skb->protocol); + } + __entry->location = location; + ), + + TP_printk("skbaddr=%p protocol=%u location=%p", + __entry->skbaddr, __entry->protocol, __entry->location) +); + +#undef TRACE_SYSTEM diff --git a/include/trace/trace_event_types.h b/include/trace/trace_event_types.h index df56f56..33b6bfc 100644 --- a/include/trace/trace_event_types.h +++ b/include/trace/trace_event_types.h @@ -3,3 +3,4 @@ #include #include #include +#include diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index fd13750..0e2aa80 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -3,3 +3,4 @@ #include #include #include +#include -- cgit v0.10.2 From bda869c614c937c318547c3ee1d65a316b693c21 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Thu, 9 Apr 2009 15:05:10 +0200 Subject: x86: cacheinfo: use L3 cache index disable feature only for CPUs that support it AMD family 0x11 CPU doesn't support the feature. Some AMD family 0x10 CPUs do not support it or have an erratum, see erratum #382 in "Revision Guide for AMD Family 10h Processors, 41322 Rev. 3.40 February 2009". Signed-off-by: Andreas Herrmann CC: Mark Langsdorf Cc: Andrew Morton LKML-Reference: <20090409130510.GG31527@alberich.amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 483eda9..7240126 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -291,6 +291,14 @@ amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) { if (index < 3) return; + + if (boot_cpu_data.x86 == 0x11) + return; + + /* see erratum #382 */ + if ((boot_cpu_data.x86 == 0x10) && (boot_cpu_data.x86_model < 0x8)) + return; + this_leaf->can_disable = 1; } -- cgit v0.10.2 From 845d8c761ec763871936c62b837c4a9ea6d0fbdb Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Thu, 9 Apr 2009 15:07:29 +0200 Subject: x86: cacheinfo: correct return value when cache_disable feature is not active Impact: bug fix If user writes to "cache_disable" attribute on a CPU that does not support this feature, the process hangs due to an invalid return value in store_cache_disable(). Signed-off-by: Andreas Herrmann Cc: Andrew Morton Cc: Mark Langsdorf LKML-Reference: <20090409130729.GH31527@alberich.amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 7240126..1ab46e0 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -771,7 +771,7 @@ store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, unsigned int ret, index, val; if (!this_leaf->can_disable) - return 0; + return -EINVAL; if (strlen(buf) > 15) return -EINVAL; -- cgit v0.10.2 From afd9fceec55225d33be878927056a548c2eef26c Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Thu, 9 Apr 2009 15:16:17 +0200 Subject: x86: cacheinfo: use cached K8 NB_MISC devices instead of scanning for it Impact: avoid code duplication Signed-off-by: Andreas Herrmann Cc: Andrew Morton Cc: Mark Langsdorf LKML-Reference: <20090409131617.GI31527@alberich.amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/k8.h b/arch/x86/include/asm/k8.h index 54c8cc5..c23b3d1 100644 --- a/arch/x86/include/asm/k8.h +++ b/arch/x86/include/asm/k8.h @@ -12,4 +12,12 @@ extern int cache_k8_northbridges(void); extern void k8_flush_garts(void); extern int k8_scan_nodes(unsigned long start, unsigned long end); +#ifdef CONFIG_K8_NB +#define node_to_k8_nb_misc(node) \ + (node < num_k8_northbridges) ? k8_northbridges[node] : NULL +#else +#define node_to_k8_nb_misc(node) NULL +#endif + + #endif /* _ASM_X86_K8_H */ diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 1ab46e0..0cde071 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -17,6 +17,7 @@ #include #include +#include #define LVL_1_INST 1 #define LVL_1_DATA 2 @@ -159,14 +160,6 @@ struct _cpuid4_info_regs { unsigned long can_disable; }; -#if defined(CONFIG_PCI) && defined(CONFIG_SYSFS) -static struct pci_device_id k8_nb_id[] = { - { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, - {} -}; -#endif - unsigned short num_cache_leaves; /* AMD doesn't have CPUID4. Emulate it here to report the same @@ -704,30 +697,6 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) #define to_object(k) container_of(k, struct _index_kobject, kobj) #define to_attr(a) container_of(a, struct _cache_attr, attr) -#ifdef CONFIG_PCI -static struct pci_dev *get_k8_northbridge(int node) -{ - struct pci_dev *dev = NULL; - int i; - - for (i = 0; i <= node; i++) { - do { - dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); - if (!dev) - break; - } while (!pci_match_id(&k8_nb_id[0], dev)); - if (!dev) - break; - } - return dev; -} -#else -static struct pci_dev *get_k8_northbridge(int node) -{ - return NULL; -} -#endif - static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) { const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map); @@ -739,7 +708,7 @@ static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) if (!this_leaf->can_disable) return sprintf(buf, "Feature not enabled\n"); - dev = get_k8_northbridge(node); + dev = node_to_k8_nb_misc(node); if (!dev) { printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n"); return -EINVAL; @@ -783,7 +752,7 @@ store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, return -EINVAL; val |= 0xc0000000; - dev = get_k8_northbridge(node); + dev = node_to_k8_nb_misc(node); if (!dev) { printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n"); return -EINVAL; -- cgit v0.10.2 From f8b201fc7110c3673437254e8ba02451461ece0b Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Thu, 9 Apr 2009 15:18:49 +0200 Subject: x86: cacheinfo: replace sysfs interface for cache_disable feature Impact: replace sysfs attribute Current interface violates against "one-value-per-sysfs-attribute rule". This patch replaces current attribute with two attributes -- one for each L3 Cache Index Disable register. Signed-off-by: Mark Langsdorf Signed-off-by: Andreas Herrmann Cc: Andrew Morton LKML-Reference: <20090409131849.GJ31527@alberich.amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 0cde071..fc28291 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -697,73 +697,69 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) #define to_object(k) container_of(k, struct _index_kobject, kobj) #define to_attr(a) container_of(a, struct _cache_attr, attr) -static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) +static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, + unsigned int index) { - const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map); - int node = cpu_to_node(cpumask_first(mask)); - struct pci_dev *dev = NULL; - ssize_t ret = 0; - int i; + int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); + int node = cpu_to_node(cpu); + struct pci_dev *dev = node_to_k8_nb_misc(node); + unsigned int reg = 0; if (!this_leaf->can_disable) - return sprintf(buf, "Feature not enabled\n"); - - dev = node_to_k8_nb_misc(node); - if (!dev) { - printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n"); return -EINVAL; - } - for (i = 0; i < 2; i++) { - unsigned int reg; + if (!dev) + return -EINVAL; - pci_read_config_dword(dev, 0x1BC + i * 4, ®); + pci_read_config_dword(dev, 0x1BC + index * 4, ®); + return sprintf(buf, "%x\n", reg); +} - ret += sprintf(buf, "%sEntry: %d\n", buf, i); - ret += sprintf(buf, "%sReads: %s\tNew Entries: %s\n", - buf, - reg & 0x80000000 ? "Disabled" : "Allowed", - reg & 0x40000000 ? "Disabled" : "Allowed"); - ret += sprintf(buf, "%sSubCache: %x\tIndex: %x\n", - buf, (reg & 0x30000) >> 16, reg & 0xfff); - } - return ret; +#define SHOW_CACHE_DISABLE(index) \ +static ssize_t \ +show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf) \ +{ \ + return show_cache_disable(this_leaf, buf, index); \ } +SHOW_CACHE_DISABLE(0) +SHOW_CACHE_DISABLE(1) -static ssize_t -store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, - size_t count) +static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, + const char *buf, size_t count, unsigned int index) { - const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map); - int node = cpu_to_node(cpumask_first(mask)); - struct pci_dev *dev = NULL; - unsigned int ret, index, val; + int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); + int node = cpu_to_node(cpu); + struct pci_dev *dev = node_to_k8_nb_misc(node); + unsigned long val = 0; if (!this_leaf->can_disable) return -EINVAL; - if (strlen(buf) > 15) - return -EINVAL; + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; - ret = sscanf(buf, "%x %x", &index, &val); - if (ret != 2) - return -EINVAL; - if (index > 1) + if (!dev) return -EINVAL; - val |= 0xc0000000; - dev = node_to_k8_nb_misc(node); - if (!dev) { - printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n"); + if (strict_strtoul(buf, 10, &val) < 0) return -EINVAL; - } + val |= 0xc0000000; pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000); wbinvd(); pci_write_config_dword(dev, 0x1BC + index * 4, val); + return count; +} - return 1; +#define STORE_CACHE_DISABLE(index) \ +static ssize_t \ +store_cache_disable_##index(struct _cpuid4_info *this_leaf, \ + const char *buf, size_t count) \ +{ \ + return store_cache_disable(this_leaf, buf, count, index); \ } +STORE_CACHE_DISABLE(0) +STORE_CACHE_DISABLE(1) struct _cache_attr { struct attribute attr; @@ -785,7 +781,10 @@ define_one_ro(size); define_one_ro(shared_cpu_map); define_one_ro(shared_cpu_list); -static struct _cache_attr cache_disable = __ATTR(cache_disable, 0644, show_cache_disable, store_cache_disable); +static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644, + show_cache_disable_0, store_cache_disable_0); +static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, + show_cache_disable_1, store_cache_disable_1); static struct attribute * default_attrs[] = { &type.attr, @@ -797,7 +796,8 @@ static struct attribute * default_attrs[] = { &size.attr, &shared_cpu_map.attr, &shared_cpu_list.attr, - &cache_disable.attr, + &cache_disable_0.attr, + &cache_disable_1.attr, NULL }; -- cgit v0.10.2 From ba518bea2db21c72d44a6cbfd825b026ef9cdcb6 Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Thu, 9 Apr 2009 15:24:06 +0200 Subject: x86: cacheinfo: disable L3 ECC scrubbing when L3 cache index is disabled (Use correct mask to zero out bits 24-28 by Andreas) Signed-off-by: Mark Langsdorf Signed-off-by: Andreas Herrmann Cc: Andrew Morton LKML-Reference: <20090409132406.GK31527@alberich.amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index fc28291..d46a849 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -731,6 +731,7 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, int node = cpu_to_node(cpu); struct pci_dev *dev = node_to_k8_nb_misc(node); unsigned long val = 0; + unsigned int scrubber = 0; if (!this_leaf->can_disable) return -EINVAL; @@ -745,6 +746,11 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, return -EINVAL; val |= 0xc0000000; + + pci_read_config_dword(dev, 0x58, &scrubber); + scrubber &= ~0x1f000000; + pci_write_config_dword(dev, 0x58, scrubber); + pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000); wbinvd(); pci_write_config_dword(dev, 0x1BC + index * 4, val); -- cgit v0.10.2 From 2fad2d9bb8310889f3261035b594b4e068b6eb8b Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Thu, 9 Apr 2009 15:31:53 +0200 Subject: x86/docs: add description for cache_disable sysfs interface Signed-off-by: Mark Langsdorf Signed-off-by: Andreas Herrmann Cc: Andrew Morton LKML-Reference: <20090409133153.GL31527@alberich.amd.com> Signed-off-by: Ingo Molnar diff --git a/Documentation/ABI/testing/sysfs-devices-cache_disable b/Documentation/ABI/testing/sysfs-devices-cache_disable new file mode 100644 index 0000000..175bb4f --- /dev/null +++ b/Documentation/ABI/testing/sysfs-devices-cache_disable @@ -0,0 +1,18 @@ +What: /sys/devices/system/cpu/cpu*/cache/index*/cache_disable_X +Date: August 2008 +KernelVersion: 2.6.27 +Contact: mark.langsdorf@amd.com +Description: These files exist in every cpu's cache index directories. + There are currently 2 cache_disable_# files in each + directory. Reading from these files on a supported + processor will return that cache disable index value + for that processor and node. Writing to one of these + files will cause the specificed cache index to be disabled. + + Currently, only AMD Family 10h Processors support cache index + disable, and only for their L3 caches. See the BIOS and + Kernel Developer's Guide at + http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/31116-Public-GH-BKDG_3.20_2-4-09.pdf + for formatting information and other details on the + cache index disable. +Users: joachim.deguara@amd.com -- cgit v0.10.2 From f465145235313c451164bdfa9037ac254bf00c9a Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 9 Apr 2009 11:52:18 +0300 Subject: x86: move x86_quirk_pre_intr_init() to irqinit_32.c Impact: cleanup In preparation for unifying irqinit_{32,64}.c, make x86_quirk_pre_intr_init() local to irqinit_32.c. Reviewed-by Cyrill Gorcunov Signed-off-by: Pekka Enberg Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h index 1a99e6c..58d7091 100644 --- a/arch/x86/include/asm/i8259.h +++ b/arch/x86/include/asm/i8259.h @@ -60,8 +60,4 @@ extern struct irq_chip i8259A_chip; extern void mask_8259A(void); extern void unmask_8259A(void); -#ifdef CONFIG_X86_32 -extern void init_ISA_irqs(void); -#endif - #endif /* _ASM_X86_I8259_H */ diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index bdc2ada..4093d1e 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -33,7 +33,6 @@ struct x86_quirks { int (*setup_ioapic_ids)(void); }; -extern void x86_quirk_pre_intr_init(void); extern void x86_quirk_intr_init(void); extern void x86_quirk_trap_init(void); diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 368b0a8..0c0dedc 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -53,7 +53,7 @@ static struct irqaction fpu_irq = { .name = "fpu", }; -void __init init_ISA_irqs(void) +static void __init init_ISA_irqs(void) { int i; @@ -121,6 +121,24 @@ int vector_used_by_percpu_irq(unsigned int vector) /* Overridden in paravirt.c */ void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); +/** + * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors + * + * Description: + * Perform any necessary interrupt initialisation prior to setting up + * the "ordinary" interrupt call gates. For legacy reasons, the ISA + * interrupts should be initialised here if the machine emulates a PC + * in any way. + **/ +static void __init x86_quirk_pre_intr_init(void) +{ + if (x86_quirks->arch_pre_intr_init) { + if (x86_quirks->arch_pre_intr_init()) + return; + } + init_ISA_irqs(); +} + void __init native_init_IRQ(void) { int i; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b415843..523bb69 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -997,24 +997,6 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_X86_32 /** - * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors - * - * Description: - * Perform any necessary interrupt initialisation prior to setting up - * the "ordinary" interrupt call gates. For legacy reasons, the ISA - * interrupts should be initialised here if the machine emulates a PC - * in any way. - **/ -void __init x86_quirk_pre_intr_init(void) -{ - if (x86_quirks->arch_pre_intr_init) { - if (x86_quirks->arch_pre_intr_init()) - return; - } - init_ISA_irqs(); -} - -/** * x86_quirk_intr_init - post gate setup interrupt initialisation * * Description: -- cgit v0.10.2 From 7371d9fcb88dc9185be9719f64744a339c537a92 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 9 Apr 2009 11:52:19 +0300 Subject: x86: move init_ISA_irqs() in irqinit_32.c to match ordering in irqinit_64.c Impact: cleanup Reviewed-by Cyrill Gorcunov Signed-off-by: Pekka Enberg Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 0c0dedc..c5cb769 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -53,30 +53,6 @@ static struct irqaction fpu_irq = { .name = "fpu", }; -static void __init init_ISA_irqs(void) -{ - int i; - -#ifdef CONFIG_X86_LOCAL_APIC - init_bsp_APIC(); -#endif - init_8259A(0); - - /* - * 16 old-style INTA-cycle interrupts: - */ - for (i = 0; i < NR_IRQS_LEGACY; i++) { - struct irq_desc *desc = irq_to_desc(i); - - desc->status = IRQ_DISABLED; - desc->action = NULL; - desc->depth = 1; - - set_irq_chip_and_handler_name(i, &i8259A_chip, - handle_level_irq, "XT"); - } -} - /* * IRQ2 is cascade interrupt to second interrupt controller */ @@ -118,6 +94,30 @@ int vector_used_by_percpu_irq(unsigned int vector) return 0; } +static void __init init_ISA_irqs(void) +{ + int i; + +#ifdef CONFIG_X86_LOCAL_APIC + init_bsp_APIC(); +#endif + init_8259A(0); + + /* + * 16 old-style INTA-cycle interrupts: + */ + for (i = 0; i < NR_IRQS_LEGACY; i++) { + struct irq_desc *desc = irq_to_desc(i); + + desc->status = IRQ_DISABLED; + desc->action = NULL; + desc->depth = 1; + + set_irq_chip_and_handler_name(i, &i8259A_chip, + handle_level_irq, "XT"); + } +} + /* Overridden in paravirt.c */ void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); -- cgit v0.10.2 From 36290d87f5abf260a543e5b711be4ceed03e6b1a Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 9 Apr 2009 11:52:20 +0300 Subject: x86: introduce smp_intr_init() in irqinit_32.c Impact: cleanup Reviewed-by Cyrill Gorcunov Signed-off-by: Pekka Enberg Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index c5cb769..df0aad5 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -121,6 +121,38 @@ static void __init init_ISA_irqs(void) /* Overridden in paravirt.c */ void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); +static void __init smp_intr_init(void) +{ +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP) + /* + * The reschedule interrupt is a CPU-to-CPU reschedule-helper + * IPI, driven by wakeup. + */ + alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); + + /* IPIs for invalidation */ + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7); + + /* IPI for generic function call */ + alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); + + /* IPI for single call function */ + alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, + call_function_single_interrupt); + + /* Low priority IPI to cleanup after moving an irq */ + set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); + set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); +#endif +} + /** * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors * @@ -158,34 +190,7 @@ void __init native_init_IRQ(void) } -#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP) - /* - * The reschedule interrupt is a CPU-to-CPU reschedule-helper - * IPI, driven by wakeup. - */ - alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); - - /* IPIs for invalidation */ - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7); - - /* IPI for generic function call */ - alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); - - /* IPI for single call function */ - alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, - call_function_single_interrupt); - - /* Low priority IPI to cleanup after moving an irq */ - set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); - set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); -#endif + smp_intr_init(); #ifdef CONFIG_X86_LOCAL_APIC /* self generated IPI for local APIC timer */ -- cgit v0.10.2 From 22813c45228160b07244a7c4ed7580388ac0f33d Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 9 Apr 2009 11:52:21 +0300 Subject: x86: introduce apic_intr_init() in irqinit_32.c Impact: cleanup Reviewed-by Cyrill Gorcunov Signed-off-by: Pekka Enberg Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index df0aad5..9ba68c4 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -171,25 +171,8 @@ static void __init x86_quirk_pre_intr_init(void) init_ISA_irqs(); } -void __init native_init_IRQ(void) +static void __init apic_intr_init(void) { - int i; - - /* Execute any quirks before the call gates are initialised: */ - x86_quirk_pre_intr_init(); - - /* - * Cover the whole vector space, no vector can escape - * us. (some of these will be overridden and become - * 'special' SMP interrupts) - */ - for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { - /* SYSCALL_VECTOR was reserved in trap_init. */ - if (i != SYSCALL_VECTOR) - set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); - } - - smp_intr_init(); #ifdef CONFIG_X86_LOCAL_APIC @@ -208,6 +191,27 @@ void __init native_init_IRQ(void) /* thermal monitor LVT interrupt */ alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); #endif +} + +void __init native_init_IRQ(void) +{ + int i; + + /* Execute any quirks before the call gates are initialised: */ + x86_quirk_pre_intr_init(); + + /* + * Cover the whole vector space, no vector can escape + * us. (some of these will be overridden and become + * 'special' SMP interrupts) + */ + for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { + /* SYSCALL_VECTOR was reserved in trap_init. */ + if (i != SYSCALL_VECTOR) + set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); + } + + apic_intr_init(); if (!acpi_ioapic) setup_irq(2, &irq2); -- cgit v0.10.2 From d3496c85cae22fb7713af6ed542a6aeae8ee4210 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 9 Apr 2009 11:52:22 +0300 Subject: x86: use identical loop constructs in 32-bit and 64-bit native_init_IRQ() Impact: cleanup Reviewed-by Cyrill Gorcunov Signed-off-by: Pekka Enberg Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 9ba68c4..1029a18 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -205,7 +205,7 @@ void __init native_init_IRQ(void) * us. (some of these will be overridden and become * 'special' SMP interrupts) */ - for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { + for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { /* SYSCALL_VECTOR was reserved in trap_init. */ if (i != SYSCALL_VECTOR) set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index 8cd1053..1c8858b 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c @@ -159,15 +159,16 @@ void __init native_init_IRQ(void) int i; init_ISA_irqs(); + /* * Cover the whole vector space, no vector can escape * us. (some of these will be overridden and become * 'special' SMP interrupts) */ - for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { - int vector = FIRST_EXTERNAL_VECTOR + i; - if (vector != IA32_SYSCALL_VECTOR) - set_intr_gate(vector, interrupt[i]); + for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { + /* IA32_SYSCALL_VECTOR was reserved in trap_init. */ + if (i != IA32_SYSCALL_VECTOR) + set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); } apic_intr_init(); -- cgit v0.10.2 From b0096bb0b640d0a7713618b3472fd0f4adf30a96 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 9 Apr 2009 11:52:23 +0300 Subject: x86: unify smp_intr_init() in irqinit_{32,64}.h Impact: cleanup Reviewed-by Cyrill Gorcunov Signed-off-by: Pekka Enberg Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 1029a18..ef2528d 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -123,7 +123,8 @@ void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); static void __init smp_intr_init(void) { -#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP) +#ifdef CONFIG_SMP +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) /* * The reschedule interrupt is a CPU-to-CPU reschedule-helper * IPI, driven by wakeup. @@ -143,14 +144,15 @@ static void __init smp_intr_init(void) /* IPI for generic function call */ alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); - /* IPI for single call function */ + /* IPI for generic single function call */ alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, - call_function_single_interrupt); + call_function_single_interrupt); /* Low priority IPI to cleanup after moving an irq */ set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); #endif +#endif /* CONFIG_SMP */ } /** diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index 1c8858b..9e7c57d 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c @@ -107,6 +107,7 @@ void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); static void __init smp_intr_init(void) { #ifdef CONFIG_SMP +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) /* * The reschedule interrupt is a CPU-to-CPU reschedule-helper * IPI, driven by wakeup. @@ -134,6 +135,7 @@ static void __init smp_intr_init(void) set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); #endif +#endif /* CONFIG_SMP */ } static void __init apic_intr_init(void) -- cgit v0.10.2 From 598c73d250ffb112715aa48fb325d79e255be23b Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 9 Apr 2009 11:52:24 +0300 Subject: x86: unify init_ISA_irqs() in irqinit_{32,64}.c Impact: cleanup Reviewed-by Cyrill Gorcunov Signed-off-by: Pekka Enberg Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index ef2528d..4488b71 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -98,7 +98,7 @@ static void __init init_ISA_irqs(void) { int i; -#ifdef CONFIG_X86_LOCAL_APIC +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) init_bsp_APIC(); #endif init_8259A(0); diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index 9e7c57d..61c9a92 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c @@ -84,9 +84,14 @@ static void __init init_ISA_irqs(void) { int i; +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) init_bsp_APIC(); +#endif init_8259A(0); + /* + * 16 old-style INTA-cycle interrupts: + */ for (i = 0; i < NR_IRQS_LEGACY; i++) { struct irq_desc *desc = irq_to_desc(i); @@ -94,11 +99,8 @@ static void __init init_ISA_irqs(void) desc->action = NULL; desc->depth = 1; - /* - * 16 old-style INTA-cycle interrupts: - */ set_irq_chip_and_handler_name(i, &i8259A_chip, - handle_level_irq, "XT"); + handle_level_irq, "XT"); } } -- cgit v0.10.2 From 320fd99672a44ece6d1cd0d838ba31c8ebbf5979 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 9 Apr 2009 11:52:25 +0300 Subject: x86: unify native_init_IRQ() in irqinit_{32,64}.c Impact: cleanup Reviewed-by Cyrill Gorcunov Signed-off-by: Pekka Enberg Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 4488b71..a780de3 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -22,7 +22,7 @@ #include #include - +#ifdef CONFIG_X86_32 /* * Note that on a 486, we don't want to do a SIGFPE on an irq13 * as the irq is unreliable, and exception 16 works correctly @@ -52,6 +52,7 @@ static struct irqaction fpu_irq = { .handler = math_error_irq, .name = "fpu", }; +#endif /* * IRQ2 is cascade interrupt to second interrupt controller @@ -155,24 +156,6 @@ static void __init smp_intr_init(void) #endif /* CONFIG_SMP */ } -/** - * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors - * - * Description: - * Perform any necessary interrupt initialisation prior to setting up - * the "ordinary" interrupt call gates. For legacy reasons, the ISA - * interrupts should be initialised here if the machine emulates a PC - * in any way. - **/ -static void __init x86_quirk_pre_intr_init(void) -{ - if (x86_quirks->arch_pre_intr_init) { - if (x86_quirks->arch_pre_intr_init()) - return; - } - init_ISA_irqs(); -} - static void __init apic_intr_init(void) { smp_intr_init(); @@ -195,12 +178,36 @@ static void __init apic_intr_init(void) #endif } +#ifdef CONFIG_X86_32 +/** + * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors + * + * Description: + * Perform any necessary interrupt initialisation prior to setting up + * the "ordinary" interrupt call gates. For legacy reasons, the ISA + * interrupts should be initialised here if the machine emulates a PC + * in any way. + **/ +static void __init x86_quirk_pre_intr_init(void) +{ + if (x86_quirks->arch_pre_intr_init) { + if (x86_quirks->arch_pre_intr_init()) + return; + } + init_ISA_irqs(); +} +#endif + void __init native_init_IRQ(void) { int i; +#ifdef CONFIG_X86_32 /* Execute any quirks before the call gates are initialised: */ x86_quirk_pre_intr_init(); +#else + init_ISA_irqs(); +#endif /* * Cover the whole vector space, no vector can escape @@ -208,9 +215,15 @@ void __init native_init_IRQ(void) * 'special' SMP interrupts) */ for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { +#ifdef CONFIG_X86_32 /* SYSCALL_VECTOR was reserved in trap_init. */ if (i != SYSCALL_VECTOR) set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); +#else + /* IA32_SYSCALL_VECTOR was reserved in trap_init. */ + if (i != IA32_SYSCALL_VECTOR) + set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); +#endif } apic_intr_init(); @@ -218,6 +231,7 @@ void __init native_init_IRQ(void) if (!acpi_ioapic) setup_irq(2, &irq2); +#ifdef CONFIG_X86_32 /* * Call quirks after call gates are initialised (usually add in * the architecture specific gates): @@ -232,4 +246,5 @@ void __init native_init_IRQ(void) setup_irq(FPU_IRQ, &fpu_irq); irq_ctx_init(smp_processor_id()); +#endif } diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index 61c9a92..ed50e35 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c @@ -39,14 +39,46 @@ * (these are usually mapped into the 0x30-0xff vector range) */ +#ifdef CONFIG_X86_32 /* - * IRQ2 is cascade interrupt to second interrupt controller + * Note that on a 486, we don't want to do a SIGFPE on an irq13 + * as the irq is unreliable, and exception 16 works correctly + * (ie as explained in the intel literature). On a 386, you + * can't use exception 16 due to bad IBM design, so we have to + * rely on the less exact irq13. + * + * Careful.. Not only is IRQ13 unreliable, but it is also + * leads to races. IBM designers who came up with it should + * be shot. + */ + +static irqreturn_t math_error_irq(int cpl, void *dev_id) +{ + outb(0, 0xF0); + if (ignore_fpu_irq || !boot_cpu_data.hard_math) + return IRQ_NONE; + math_error((void __user *)get_irq_regs()->ip); + return IRQ_HANDLED; +} + +/* + * New motherboards sometimes make IRQ 13 be a PCI interrupt, + * so allow interrupt sharing. */ +static struct irqaction fpu_irq = { + .handler = math_error_irq, + .name = "fpu", +}; +#endif +/* + * IRQ2 is cascade interrupt to second interrupt controller + */ static struct irqaction irq2 = { .handler = no_action, .name = "cascade", }; + DEFINE_PER_CPU(vector_irq_t, vector_irq) = { [0 ... IRQ0_VECTOR - 1] = -1, [IRQ0_VECTOR] = 0, @@ -158,11 +190,36 @@ static void __init apic_intr_init(void) alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); } +#ifdef CONFIG_X86_32 +/** + * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors + * + * Description: + * Perform any necessary interrupt initialisation prior to setting up + * the "ordinary" interrupt call gates. For legacy reasons, the ISA + * interrupts should be initialised here if the machine emulates a PC + * in any way. + **/ +static void __init x86_quirk_pre_intr_init(void) +{ + if (x86_quirks->arch_pre_intr_init) { + if (x86_quirks->arch_pre_intr_init()) + return; + } + init_ISA_irqs(); +} +#endif + void __init native_init_IRQ(void) { int i; +#ifdef CONFIG_X86_32 + /* Execute any quirks before the call gates are initialised: */ + x86_quirk_pre_intr_init(); +#else init_ISA_irqs(); +#endif /* * Cover the whole vector space, no vector can escape @@ -170,13 +227,36 @@ void __init native_init_IRQ(void) * 'special' SMP interrupts) */ for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { +#ifdef CONFIG_X86_32 + /* SYSCALL_VECTOR was reserved in trap_init. */ + if (i != SYSCALL_VECTOR) + set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); +#else /* IA32_SYSCALL_VECTOR was reserved in trap_init. */ if (i != IA32_SYSCALL_VECTOR) set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); +#endif } apic_intr_init(); if (!acpi_ioapic) setup_irq(2, &irq2); + +#ifdef CONFIG_X86_32 + /* + * Call quirks after call gates are initialised (usually add in + * the architecture specific gates): + */ + x86_quirk_intr_init(); + + /* + * External FPU? Set up irq13 if so, for + * original braindamaged IBM FERR coupling. + */ + if (boot_cpu_data.hard_math && !cpu_has_fpu) + setup_irq(FPU_IRQ, &fpu_irq); + + irq_ctx_init(smp_processor_id()); +#endif } -- cgit v0.10.2 From 778838600eb6973bdb6fd11e7f91b43cea4d6f45 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 9 Apr 2009 11:52:26 +0300 Subject: x86: unify trivial differences in irqinit_{32,64}.c Impact: cleanup Reviewed-by Cyrill Gorcunov Signed-off-by: Pekka Enberg Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index a780de3..72ce942 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -1,20 +1,24 @@ +#include #include #include #include #include #include +#include #include #include #include #include #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -22,6 +26,22 @@ #include #include +/* + * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: + * (these are usually mapped to vectors 0x30-0x3f) + */ + +/* + * The IO-APIC gives us many more interrupt sources. Most of these + * are unused but an SMP system is supposed to have enough memory ... + * sometimes (mostly wrt. hw bugs) we get corrupted vectors all + * across the spectrum, so we really want to be prepared to get all + * of these. Plus, more powerful systems might have more than 64 + * IO-APIC registers. + * + * (these are usually mapped into the 0x30-0xff vector range) + */ + #ifdef CONFIG_X86_32 /* * Note that on a 486, we don't want to do a SIGFPE on an irq13 diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index ed50e35..687b6c3 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c @@ -17,11 +17,14 @@ #include #include +#include #include #include #include #include +#include #include +#include /* * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: @@ -136,6 +139,7 @@ static void __init init_ISA_irqs(void) } } +/* Overridden in paravirt.c */ void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); static void __init smp_intr_init(void) -- cgit v0.10.2 From ab19c25abd14db28d7454f00805ea59f22ed6057 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 9 Apr 2009 11:52:27 +0300 Subject: x86: unify apic_intr_init() in irqinit_{32,64}.c Impact: cleanup Reviewed-by Cyrill Gorcunov Signed-off-by: Pekka Enberg Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 72ce942..f3be5e9 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -180,7 +180,12 @@ static void __init apic_intr_init(void) { smp_intr_init(); -#ifdef CONFIG_X86_LOCAL_APIC +#ifdef CONFIG_X86_64 + alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); + alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); +#endif + +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) /* self generated IPI for local APIC timer */ alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); @@ -192,10 +197,12 @@ static void __init apic_intr_init(void) alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); #endif +#ifdef CONFIG_X86_32 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL) /* thermal monitor LVT interrupt */ alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); #endif +#endif } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index 687b6c3..f3be5e9 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c @@ -180,9 +180,12 @@ static void __init apic_intr_init(void) { smp_intr_init(); +#ifdef CONFIG_X86_64 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); +#endif +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) /* self generated IPI for local APIC timer */ alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); @@ -192,6 +195,14 @@ static void __init apic_intr_init(void) /* IPI vectors for APIC spurious and error interrupts */ alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); +#endif + +#ifdef CONFIG_X86_32 +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL) + /* thermal monitor LVT interrupt */ + alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); +#endif +#endif } #ifdef CONFIG_X86_32 -- cgit v0.10.2 From 31cb45ef2600d47191d51253ec94b5e3f689260d Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 9 Apr 2009 11:52:28 +0300 Subject: x86: unify irqinit_{32,64}.c into irqinit.c Impact: cleanup Reviewed-by Cyrill Gorcunov Signed-off-by: Pekka Enberg Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 145cce7..16e3acf 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -28,7 +28,7 @@ CFLAGS_paravirt.o := $(nostackp) obj-y := process_$(BITS).o signal.o entry_$(BITS).o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o -obj-y += setup.o i8259.o irqinit_$(BITS).o +obj-y += setup.o i8259.o irqinit.o obj-$(CONFIG_X86_VISWS) += visws_quirks.o obj-$(CONFIG_X86_32) += probe_roms_32.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c new file mode 100644 index 0000000..f3be5e9 --- /dev/null +++ b/arch/x86/kernel/irqinit.c @@ -0,0 +1,277 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: + * (these are usually mapped to vectors 0x30-0x3f) + */ + +/* + * The IO-APIC gives us many more interrupt sources. Most of these + * are unused but an SMP system is supposed to have enough memory ... + * sometimes (mostly wrt. hw bugs) we get corrupted vectors all + * across the spectrum, so we really want to be prepared to get all + * of these. Plus, more powerful systems might have more than 64 + * IO-APIC registers. + * + * (these are usually mapped into the 0x30-0xff vector range) + */ + +#ifdef CONFIG_X86_32 +/* + * Note that on a 486, we don't want to do a SIGFPE on an irq13 + * as the irq is unreliable, and exception 16 works correctly + * (ie as explained in the intel literature). On a 386, you + * can't use exception 16 due to bad IBM design, so we have to + * rely on the less exact irq13. + * + * Careful.. Not only is IRQ13 unreliable, but it is also + * leads to races. IBM designers who came up with it should + * be shot. + */ + +static irqreturn_t math_error_irq(int cpl, void *dev_id) +{ + outb(0, 0xF0); + if (ignore_fpu_irq || !boot_cpu_data.hard_math) + return IRQ_NONE; + math_error((void __user *)get_irq_regs()->ip); + return IRQ_HANDLED; +} + +/* + * New motherboards sometimes make IRQ 13 be a PCI interrupt, + * so allow interrupt sharing. + */ +static struct irqaction fpu_irq = { + .handler = math_error_irq, + .name = "fpu", +}; +#endif + +/* + * IRQ2 is cascade interrupt to second interrupt controller + */ +static struct irqaction irq2 = { + .handler = no_action, + .name = "cascade", +}; + +DEFINE_PER_CPU(vector_irq_t, vector_irq) = { + [0 ... IRQ0_VECTOR - 1] = -1, + [IRQ0_VECTOR] = 0, + [IRQ1_VECTOR] = 1, + [IRQ2_VECTOR] = 2, + [IRQ3_VECTOR] = 3, + [IRQ4_VECTOR] = 4, + [IRQ5_VECTOR] = 5, + [IRQ6_VECTOR] = 6, + [IRQ7_VECTOR] = 7, + [IRQ8_VECTOR] = 8, + [IRQ9_VECTOR] = 9, + [IRQ10_VECTOR] = 10, + [IRQ11_VECTOR] = 11, + [IRQ12_VECTOR] = 12, + [IRQ13_VECTOR] = 13, + [IRQ14_VECTOR] = 14, + [IRQ15_VECTOR] = 15, + [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 +}; + +int vector_used_by_percpu_irq(unsigned int vector) +{ + int cpu; + + for_each_online_cpu(cpu) { + if (per_cpu(vector_irq, cpu)[vector] != -1) + return 1; + } + + return 0; +} + +static void __init init_ISA_irqs(void) +{ + int i; + +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) + init_bsp_APIC(); +#endif + init_8259A(0); + + /* + * 16 old-style INTA-cycle interrupts: + */ + for (i = 0; i < NR_IRQS_LEGACY; i++) { + struct irq_desc *desc = irq_to_desc(i); + + desc->status = IRQ_DISABLED; + desc->action = NULL; + desc->depth = 1; + + set_irq_chip_and_handler_name(i, &i8259A_chip, + handle_level_irq, "XT"); + } +} + +/* Overridden in paravirt.c */ +void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); + +static void __init smp_intr_init(void) +{ +#ifdef CONFIG_SMP +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) + /* + * The reschedule interrupt is a CPU-to-CPU reschedule-helper + * IPI, driven by wakeup. + */ + alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); + + /* IPIs for invalidation */ + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6); + alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7); + + /* IPI for generic function call */ + alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); + + /* IPI for generic single function call */ + alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, + call_function_single_interrupt); + + /* Low priority IPI to cleanup after moving an irq */ + set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); + set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); +#endif +#endif /* CONFIG_SMP */ +} + +static void __init apic_intr_init(void) +{ + smp_intr_init(); + +#ifdef CONFIG_X86_64 + alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); + alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); +#endif + +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) + /* self generated IPI for local APIC timer */ + alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); + + /* generic IPI for platform specific use */ + alloc_intr_gate(GENERIC_INTERRUPT_VECTOR, generic_interrupt); + + /* IPI vectors for APIC spurious and error interrupts */ + alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); + alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); +#endif + +#ifdef CONFIG_X86_32 +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL) + /* thermal monitor LVT interrupt */ + alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); +#endif +#endif +} + +#ifdef CONFIG_X86_32 +/** + * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors + * + * Description: + * Perform any necessary interrupt initialisation prior to setting up + * the "ordinary" interrupt call gates. For legacy reasons, the ISA + * interrupts should be initialised here if the machine emulates a PC + * in any way. + **/ +static void __init x86_quirk_pre_intr_init(void) +{ + if (x86_quirks->arch_pre_intr_init) { + if (x86_quirks->arch_pre_intr_init()) + return; + } + init_ISA_irqs(); +} +#endif + +void __init native_init_IRQ(void) +{ + int i; + +#ifdef CONFIG_X86_32 + /* Execute any quirks before the call gates are initialised: */ + x86_quirk_pre_intr_init(); +#else + init_ISA_irqs(); +#endif + + /* + * Cover the whole vector space, no vector can escape + * us. (some of these will be overridden and become + * 'special' SMP interrupts) + */ + for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { +#ifdef CONFIG_X86_32 + /* SYSCALL_VECTOR was reserved in trap_init. */ + if (i != SYSCALL_VECTOR) + set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); +#else + /* IA32_SYSCALL_VECTOR was reserved in trap_init. */ + if (i != IA32_SYSCALL_VECTOR) + set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); +#endif + } + + apic_intr_init(); + + if (!acpi_ioapic) + setup_irq(2, &irq2); + +#ifdef CONFIG_X86_32 + /* + * Call quirks after call gates are initialised (usually add in + * the architecture specific gates): + */ + x86_quirk_intr_init(); + + /* + * External FPU? Set up irq13 if so, for + * original braindamaged IBM FERR coupling. + */ + if (boot_cpu_data.hard_math && !cpu_has_fpu) + setup_irq(FPU_IRQ, &fpu_irq); + + irq_ctx_init(smp_processor_id()); +#endif +} diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c deleted file mode 100644 index f3be5e9..0000000 --- a/arch/x86/kernel/irqinit_32.c +++ /dev/null @@ -1,277 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: - * (these are usually mapped to vectors 0x30-0x3f) - */ - -/* - * The IO-APIC gives us many more interrupt sources. Most of these - * are unused but an SMP system is supposed to have enough memory ... - * sometimes (mostly wrt. hw bugs) we get corrupted vectors all - * across the spectrum, so we really want to be prepared to get all - * of these. Plus, more powerful systems might have more than 64 - * IO-APIC registers. - * - * (these are usually mapped into the 0x30-0xff vector range) - */ - -#ifdef CONFIG_X86_32 -/* - * Note that on a 486, we don't want to do a SIGFPE on an irq13 - * as the irq is unreliable, and exception 16 works correctly - * (ie as explained in the intel literature). On a 386, you - * can't use exception 16 due to bad IBM design, so we have to - * rely on the less exact irq13. - * - * Careful.. Not only is IRQ13 unreliable, but it is also - * leads to races. IBM designers who came up with it should - * be shot. - */ - -static irqreturn_t math_error_irq(int cpl, void *dev_id) -{ - outb(0, 0xF0); - if (ignore_fpu_irq || !boot_cpu_data.hard_math) - return IRQ_NONE; - math_error((void __user *)get_irq_regs()->ip); - return IRQ_HANDLED; -} - -/* - * New motherboards sometimes make IRQ 13 be a PCI interrupt, - * so allow interrupt sharing. - */ -static struct irqaction fpu_irq = { - .handler = math_error_irq, - .name = "fpu", -}; -#endif - -/* - * IRQ2 is cascade interrupt to second interrupt controller - */ -static struct irqaction irq2 = { - .handler = no_action, - .name = "cascade", -}; - -DEFINE_PER_CPU(vector_irq_t, vector_irq) = { - [0 ... IRQ0_VECTOR - 1] = -1, - [IRQ0_VECTOR] = 0, - [IRQ1_VECTOR] = 1, - [IRQ2_VECTOR] = 2, - [IRQ3_VECTOR] = 3, - [IRQ4_VECTOR] = 4, - [IRQ5_VECTOR] = 5, - [IRQ6_VECTOR] = 6, - [IRQ7_VECTOR] = 7, - [IRQ8_VECTOR] = 8, - [IRQ9_VECTOR] = 9, - [IRQ10_VECTOR] = 10, - [IRQ11_VECTOR] = 11, - [IRQ12_VECTOR] = 12, - [IRQ13_VECTOR] = 13, - [IRQ14_VECTOR] = 14, - [IRQ15_VECTOR] = 15, - [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 -}; - -int vector_used_by_percpu_irq(unsigned int vector) -{ - int cpu; - - for_each_online_cpu(cpu) { - if (per_cpu(vector_irq, cpu)[vector] != -1) - return 1; - } - - return 0; -} - -static void __init init_ISA_irqs(void) -{ - int i; - -#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) - init_bsp_APIC(); -#endif - init_8259A(0); - - /* - * 16 old-style INTA-cycle interrupts: - */ - for (i = 0; i < NR_IRQS_LEGACY; i++) { - struct irq_desc *desc = irq_to_desc(i); - - desc->status = IRQ_DISABLED; - desc->action = NULL; - desc->depth = 1; - - set_irq_chip_and_handler_name(i, &i8259A_chip, - handle_level_irq, "XT"); - } -} - -/* Overridden in paravirt.c */ -void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); - -static void __init smp_intr_init(void) -{ -#ifdef CONFIG_SMP -#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) - /* - * The reschedule interrupt is a CPU-to-CPU reschedule-helper - * IPI, driven by wakeup. - */ - alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); - - /* IPIs for invalidation */ - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7); - - /* IPI for generic function call */ - alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); - - /* IPI for generic single function call */ - alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, - call_function_single_interrupt); - - /* Low priority IPI to cleanup after moving an irq */ - set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); - set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); -#endif -#endif /* CONFIG_SMP */ -} - -static void __init apic_intr_init(void) -{ - smp_intr_init(); - -#ifdef CONFIG_X86_64 - alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); - alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); -#endif - -#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) - /* self generated IPI for local APIC timer */ - alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); - - /* generic IPI for platform specific use */ - alloc_intr_gate(GENERIC_INTERRUPT_VECTOR, generic_interrupt); - - /* IPI vectors for APIC spurious and error interrupts */ - alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); - alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); -#endif - -#ifdef CONFIG_X86_32 -#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL) - /* thermal monitor LVT interrupt */ - alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); -#endif -#endif -} - -#ifdef CONFIG_X86_32 -/** - * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors - * - * Description: - * Perform any necessary interrupt initialisation prior to setting up - * the "ordinary" interrupt call gates. For legacy reasons, the ISA - * interrupts should be initialised here if the machine emulates a PC - * in any way. - **/ -static void __init x86_quirk_pre_intr_init(void) -{ - if (x86_quirks->arch_pre_intr_init) { - if (x86_quirks->arch_pre_intr_init()) - return; - } - init_ISA_irqs(); -} -#endif - -void __init native_init_IRQ(void) -{ - int i; - -#ifdef CONFIG_X86_32 - /* Execute any quirks before the call gates are initialised: */ - x86_quirk_pre_intr_init(); -#else - init_ISA_irqs(); -#endif - - /* - * Cover the whole vector space, no vector can escape - * us. (some of these will be overridden and become - * 'special' SMP interrupts) - */ - for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { -#ifdef CONFIG_X86_32 - /* SYSCALL_VECTOR was reserved in trap_init. */ - if (i != SYSCALL_VECTOR) - set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); -#else - /* IA32_SYSCALL_VECTOR was reserved in trap_init. */ - if (i != IA32_SYSCALL_VECTOR) - set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); -#endif - } - - apic_intr_init(); - - if (!acpi_ioapic) - setup_irq(2, &irq2); - -#ifdef CONFIG_X86_32 - /* - * Call quirks after call gates are initialised (usually add in - * the architecture specific gates): - */ - x86_quirk_intr_init(); - - /* - * External FPU? Set up irq13 if so, for - * original braindamaged IBM FERR coupling. - */ - if (boot_cpu_data.hard_math && !cpu_has_fpu) - setup_irq(FPU_IRQ, &fpu_irq); - - irq_ctx_init(smp_processor_id()); -#endif -} diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c deleted file mode 100644 index f3be5e9..0000000 --- a/arch/x86/kernel/irqinit_64.c +++ /dev/null @@ -1,277 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: - * (these are usually mapped to vectors 0x30-0x3f) - */ - -/* - * The IO-APIC gives us many more interrupt sources. Most of these - * are unused but an SMP system is supposed to have enough memory ... - * sometimes (mostly wrt. hw bugs) we get corrupted vectors all - * across the spectrum, so we really want to be prepared to get all - * of these. Plus, more powerful systems might have more than 64 - * IO-APIC registers. - * - * (these are usually mapped into the 0x30-0xff vector range) - */ - -#ifdef CONFIG_X86_32 -/* - * Note that on a 486, we don't want to do a SIGFPE on an irq13 - * as the irq is unreliable, and exception 16 works correctly - * (ie as explained in the intel literature). On a 386, you - * can't use exception 16 due to bad IBM design, so we have to - * rely on the less exact irq13. - * - * Careful.. Not only is IRQ13 unreliable, but it is also - * leads to races. IBM designers who came up with it should - * be shot. - */ - -static irqreturn_t math_error_irq(int cpl, void *dev_id) -{ - outb(0, 0xF0); - if (ignore_fpu_irq || !boot_cpu_data.hard_math) - return IRQ_NONE; - math_error((void __user *)get_irq_regs()->ip); - return IRQ_HANDLED; -} - -/* - * New motherboards sometimes make IRQ 13 be a PCI interrupt, - * so allow interrupt sharing. - */ -static struct irqaction fpu_irq = { - .handler = math_error_irq, - .name = "fpu", -}; -#endif - -/* - * IRQ2 is cascade interrupt to second interrupt controller - */ -static struct irqaction irq2 = { - .handler = no_action, - .name = "cascade", -}; - -DEFINE_PER_CPU(vector_irq_t, vector_irq) = { - [0 ... IRQ0_VECTOR - 1] = -1, - [IRQ0_VECTOR] = 0, - [IRQ1_VECTOR] = 1, - [IRQ2_VECTOR] = 2, - [IRQ3_VECTOR] = 3, - [IRQ4_VECTOR] = 4, - [IRQ5_VECTOR] = 5, - [IRQ6_VECTOR] = 6, - [IRQ7_VECTOR] = 7, - [IRQ8_VECTOR] = 8, - [IRQ9_VECTOR] = 9, - [IRQ10_VECTOR] = 10, - [IRQ11_VECTOR] = 11, - [IRQ12_VECTOR] = 12, - [IRQ13_VECTOR] = 13, - [IRQ14_VECTOR] = 14, - [IRQ15_VECTOR] = 15, - [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 -}; - -int vector_used_by_percpu_irq(unsigned int vector) -{ - int cpu; - - for_each_online_cpu(cpu) { - if (per_cpu(vector_irq, cpu)[vector] != -1) - return 1; - } - - return 0; -} - -static void __init init_ISA_irqs(void) -{ - int i; - -#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) - init_bsp_APIC(); -#endif - init_8259A(0); - - /* - * 16 old-style INTA-cycle interrupts: - */ - for (i = 0; i < NR_IRQS_LEGACY; i++) { - struct irq_desc *desc = irq_to_desc(i); - - desc->status = IRQ_DISABLED; - desc->action = NULL; - desc->depth = 1; - - set_irq_chip_and_handler_name(i, &i8259A_chip, - handle_level_irq, "XT"); - } -} - -/* Overridden in paravirt.c */ -void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); - -static void __init smp_intr_init(void) -{ -#ifdef CONFIG_SMP -#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) - /* - * The reschedule interrupt is a CPU-to-CPU reschedule-helper - * IPI, driven by wakeup. - */ - alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); - - /* IPIs for invalidation */ - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6); - alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7); - - /* IPI for generic function call */ - alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); - - /* IPI for generic single function call */ - alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, - call_function_single_interrupt); - - /* Low priority IPI to cleanup after moving an irq */ - set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); - set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); -#endif -#endif /* CONFIG_SMP */ -} - -static void __init apic_intr_init(void) -{ - smp_intr_init(); - -#ifdef CONFIG_X86_64 - alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); - alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); -#endif - -#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) - /* self generated IPI for local APIC timer */ - alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); - - /* generic IPI for platform specific use */ - alloc_intr_gate(GENERIC_INTERRUPT_VECTOR, generic_interrupt); - - /* IPI vectors for APIC spurious and error interrupts */ - alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); - alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); -#endif - -#ifdef CONFIG_X86_32 -#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL) - /* thermal monitor LVT interrupt */ - alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); -#endif -#endif -} - -#ifdef CONFIG_X86_32 -/** - * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors - * - * Description: - * Perform any necessary interrupt initialisation prior to setting up - * the "ordinary" interrupt call gates. For legacy reasons, the ISA - * interrupts should be initialised here if the machine emulates a PC - * in any way. - **/ -static void __init x86_quirk_pre_intr_init(void) -{ - if (x86_quirks->arch_pre_intr_init) { - if (x86_quirks->arch_pre_intr_init()) - return; - } - init_ISA_irqs(); -} -#endif - -void __init native_init_IRQ(void) -{ - int i; - -#ifdef CONFIG_X86_32 - /* Execute any quirks before the call gates are initialised: */ - x86_quirk_pre_intr_init(); -#else - init_ISA_irqs(); -#endif - - /* - * Cover the whole vector space, no vector can escape - * us. (some of these will be overridden and become - * 'special' SMP interrupts) - */ - for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { -#ifdef CONFIG_X86_32 - /* SYSCALL_VECTOR was reserved in trap_init. */ - if (i != SYSCALL_VECTOR) - set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); -#else - /* IA32_SYSCALL_VECTOR was reserved in trap_init. */ - if (i != IA32_SYSCALL_VECTOR) - set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); -#endif - } - - apic_intr_init(); - - if (!acpi_ioapic) - setup_irq(2, &irq2); - -#ifdef CONFIG_X86_32 - /* - * Call quirks after call gates are initialised (usually add in - * the architecture specific gates): - */ - x86_quirk_intr_init(); - - /* - * External FPU? Set up irq13 if so, for - * original braindamaged IBM FERR coupling. - */ - if (boot_cpu_data.hard_math && !cpu_has_fpu) - setup_irq(FPU_IRQ, &fpu_irq); - - irq_ctx_init(smp_processor_id()); -#endif -} -- cgit v0.10.2 From ac3048dfd4740becf8d768844cf47ebee363c9f8 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 9 Apr 2009 11:52:29 +0300 Subject: x86: define IA32_SYSCALL_VECTOR on 32-bit to reduce ifdefs Impact: cleanup We can remove some #ifdefs if we define IA32_SYSCALL_VECTOR on 32-bit. Reviewed-by Cyrill Gorcunov Signed-off-by: Pekka Enberg Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 3cbd79b..910b5a3 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -34,6 +34,7 @@ #ifdef CONFIG_X86_32 # define SYSCALL_VECTOR 0x80 +# define IA32_SYSCALL_VECTOR 0x80 #else # define IA32_SYSCALL_VECTOR 0x80 #endif diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index f3be5e9..f2c60a5 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -242,15 +242,9 @@ void __init native_init_IRQ(void) * 'special' SMP interrupts) */ for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { -#ifdef CONFIG_X86_32 - /* SYSCALL_VECTOR was reserved in trap_init. */ - if (i != SYSCALL_VECTOR) - set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); -#else /* IA32_SYSCALL_VECTOR was reserved in trap_init. */ if (i != IA32_SYSCALL_VECTOR) set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); -#endif } apic_intr_init(); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index a1d2883..2310700 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -969,11 +969,8 @@ void __init trap_init(void) for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) set_bit(i, used_vectors); -#ifdef CONFIG_X86_64 set_bit(IA32_SYSCALL_VECTOR, used_vectors); -#else - set_bit(SYSCALL_VECTOR, used_vectors); -#endif + /* * Should be a barrier for any external CPU state: */ -- cgit v0.10.2 From abdb5a5713330e17dfe91ab0d3e29c4744d95162 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 9 Apr 2009 11:52:30 +0300 Subject: x86: remove some ifdefs from native_init_IRQ() Impact: cleanup Reviewed-by Cyrill Gorcunov Signed-off-by: Pekka Enberg Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index f2c60a5..6269772 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -205,7 +205,6 @@ static void __init apic_intr_init(void) #endif } -#ifdef CONFIG_X86_32 /** * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors * @@ -217,24 +216,21 @@ static void __init apic_intr_init(void) **/ static void __init x86_quirk_pre_intr_init(void) { +#ifdef CONFIG_X86_32 if (x86_quirks->arch_pre_intr_init) { if (x86_quirks->arch_pre_intr_init()) return; } +#endif init_ISA_irqs(); } -#endif void __init native_init_IRQ(void) { int i; -#ifdef CONFIG_X86_32 /* Execute any quirks before the call gates are initialised: */ x86_quirk_pre_intr_init(); -#else - init_ISA_irqs(); -#endif /* * Cover the whole vector space, no vector can escape -- cgit v0.10.2 From 6265ff19ca08df0d96c859ae5e4dc2d9ad07070e Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Thu, 9 Apr 2009 15:47:10 +0200 Subject: x86: cacheinfo: complete L2/L3 Cache and TLB associativity field definitions See "CPUID Specification" (AMD Publication #: 25481, Rev. 2.28, April 2008) Signed-off-by: Andreas Herrmann Cc: Mark Langsdorf LKML-Reference: <20090409134710.GA8026@alberich.amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index d46a849..789efe2 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -200,10 +200,17 @@ union l3_cache { }; static const unsigned short __cpuinitconst assocs[] = { - [1] = 1, [2] = 2, [4] = 4, [6] = 8, - [8] = 16, [0xa] = 32, [0xb] = 48, + [1] = 1, + [2] = 2, + [4] = 4, + [6] = 8, + [8] = 16, + [0xa] = 32, + [0xb] = 48, [0xc] = 64, - [0xf] = 0xffff // ?? + [0xd] = 96, + [0xe] = 128, + [0xf] = 0xffff /* fully associative - no way to show this currently */ }; static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 }; @@ -264,7 +271,8 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, eax->split.type = types[leaf]; eax->split.level = levels[leaf]; if (leaf == 3) - eax->split.num_threads_sharing = current_cpu_data.x86_max_cores - 1; + eax->split.num_threads_sharing = + current_cpu_data.x86_max_cores - 1; else eax->split.num_threads_sharing = 0; eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1; -- cgit v0.10.2 From 47f16ca7631f9c6bad8e6d968cfb1433029b09ec Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 10 Apr 2009 14:58:05 +0200 Subject: x86, irqinit: preempt merge conflicts To make the topic merge life easier for tip:perfcounters/core, include two (inactive in this topic) IRQ vector initializations here. Also fix build bug - missing kprobes.h inclusion. Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 6269772..b424c32 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -195,6 +196,13 @@ static void __init apic_intr_init(void) /* IPI vectors for APIC spurious and error interrupts */ alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); + + /* Performance monitoring interrupts: */ +# ifdef CONFIG_PERF_COUNTERS + alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt); + alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt); +# endif + #endif #ifdef CONFIG_X86_32 -- cgit v0.10.2 From a5a2a0c7fa039c59619bc908b3b1ed24734d442a Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Fri, 10 Apr 2009 09:50:05 -0700 Subject: futex: fix futex_wait_setup key handling If the get_futex_key() call were to fail, the existing code would try and put_futex_key() prior to returning. This patch makes sure we only put_futex_key() if get_futex_key() succeeded. Reported-by: Clark Williams Signed-off-by: Darren Hart LKML-Reference: <20090410165005.14342.16973.stgit@Aeon> Signed-off-by: Thomas Gleixner diff --git a/kernel/futex.c b/kernel/futex.c index 041bf3a..6d2daa4 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1668,7 +1668,7 @@ retry: q->key = FUTEX_KEY_INIT; ret = get_futex_key(uaddr, fshared, &q->key); if (unlikely(ret != 0)) - goto out; + return ret; retry_private: *hb = queue_lock(q); -- cgit v0.10.2 From 295c01f90db2b8b8394e1f7cde59b96d71f98e07 Mon Sep 17 00:00:00 2001 From: Christian Hohnstaedt Date: Sun, 12 Apr 2009 13:01:44 +0800 Subject: crypto: ixp4xx - check firmware for crypto support - the loaded firmware may not support crypto at all or only support DES and 3DES but not AES or support DES, 3DES and AES. - in case of no crypto support of the firmware, the module load will fail. - in case of missing AES support, the AES algorithms are not registered and a warning is printed during module load. Signed-off-by: Christian Hohnstaedt Signed-off-by: Herbert Xu diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c index af9761c..9224c1f 100644 --- a/drivers/crypto/ixp4xx_crypto.c +++ b/drivers/crypto/ixp4xx_crypto.c @@ -415,6 +415,7 @@ static void crypto_done_action(unsigned long arg) static int init_ixp_crypto(void) { int ret = -ENODEV; + u32 msg[2] = { 0, 0 }; if (! ( ~(*IXP4XX_EXP_CFG2) & (IXP4XX_FEATURE_HASH | IXP4XX_FEATURE_AES | IXP4XX_FEATURE_DES))) { @@ -426,9 +427,35 @@ static int init_ixp_crypto(void) return ret; if (!npe_running(npe_c)) { - npe_load_firmware(npe_c, npe_name(npe_c), dev); + ret = npe_load_firmware(npe_c, npe_name(npe_c), dev); + if (ret) { + return ret; + } + if (npe_recv_message(npe_c, msg, "STATUS_MSG")) + goto npe_error; + } else { + if (npe_send_message(npe_c, msg, "STATUS_MSG")) + goto npe_error; + + if (npe_recv_message(npe_c, msg, "STATUS_MSG")) + goto npe_error; } + switch ((msg[1]>>16) & 0xff) { + case 3: + printk(KERN_WARNING "Firmware of %s lacks AES support\n", + npe_name(npe_c)); + support_aes = 0; + break; + case 4: + case 5: + support_aes = 1; + break; + default: + printk(KERN_ERR "Firmware of %s lacks crypto support\n", + npe_name(npe_c)); + return -ENODEV; + } /* buffer_pool will also be used to sometimes store the hmac, * so assure it is large enough */ @@ -459,6 +486,10 @@ static int init_ixp_crypto(void) qmgr_enable_irq(RECV_QID); return 0; + +npe_error: + printk(KERN_ERR "%s not responding\n", npe_name(npe_c)); + ret = -EIO; err: if (ctx_pool) dma_pool_destroy(ctx_pool); -- cgit v0.10.2 From cf9972a921470b0a2da7906104bcd540b20e33bf Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Sat, 11 Apr 2009 22:24:05 -0700 Subject: x86, setup: fix comment in the "glove box" code Impact: Comment change only The glove box is about avoiding problems with *registers* being touched, not *memory*. Signed-off-by: H. Peter Anvin diff --git a/arch/x86/boot/bioscall.S b/arch/x86/boot/bioscall.S index 22b4b3e..5077937 100644 --- a/arch/x86/boot/bioscall.S +++ b/arch/x86/boot/bioscall.S @@ -10,7 +10,7 @@ /* * "Glove box" for BIOS calls. Avoids the constant problems with BIOSes - * touching memory they shouldn't be. + * touching registers they shouldn't be. */ .code16 -- cgit v0.10.2 From f4c1724f3437ac70d8330968379148c954ca34c7 Mon Sep 17 00:00:00 2001 From: Alexander Beregalov Date: Sun, 12 Apr 2009 05:04:43 +0400 Subject: ASoC: n810: replace BUG() with BUG_ON() Signed-off-by: Alexander Beregalov Signed-off-by: Mark Brown diff --git a/sound/soc/omap/n810.c b/sound/soc/omap/n810.c index a6d1178..e54e1c2 100644 --- a/sound/soc/omap/n810.c +++ b/sound/soc/omap/n810.c @@ -383,10 +383,9 @@ static int __init n810_soc_init(void) clk_set_parent(sys_clkout2_src, func96m_clk); clk_set_rate(sys_clkout2, 12000000); - if (gpio_request(N810_HEADSET_AMP_GPIO, "hs_amp") < 0) - BUG(); - if (gpio_request(N810_SPEAKER_AMP_GPIO, "spk_amp") < 0) - BUG(); + BUG_ON((gpio_request(N810_HEADSET_AMP_GPIO, "hs_amp") < 0) || + (gpio_request(N810_SPEAKER_AMP_GPIO, "spk_amp") < 0)); + gpio_direction_output(N810_HEADSET_AMP_GPIO, 0); gpio_direction_output(N810_SPEAKER_AMP_GPIO, 0); -- cgit v0.10.2 From 2de1f33e99cec5fd79542a1d0e26efb9c36a98bb Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 11 Apr 2009 12:55:26 +0530 Subject: x86: apic/x2apic_cluster.c x86_cpu_to_logical_apicid should be static Impact: reduce kernel size a bit, address sparse warning Addresses the problem pointed out by this sparse warning: arch/x86/kernel/apic/x2apic_cluster.c:13:1: warning: symbol 'per_cpu__x86_cpu_to_logical_apicid' was not declared. Should it be static? Signed-off-by: Jaswinder Singh Rajput Cc: Suresh Siddha LKML-Reference: <1239434726.4418.24.camel@localhost.localdomain> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 4a903e2..8e4cbb2 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -10,7 +10,7 @@ #include #include -DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); +static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { -- cgit v0.10.2 From 56c49951747f250d8398582509e02ae5ce1d36d1 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 11 Apr 2009 15:51:19 -0400 Subject: tracing: Add documentation for the power tracer Signed-off-by: "Theodore Ts'o" Acked-by: Arjan van de Ven Cc: Frederic Weisbecker Cc: Steven Rostedt LKML-Reference: <1239479479-2603-4-git-send-email-tytso@mit.edu> Signed-off-by: Ingo Molnar diff --git a/Documentation/trace/power.txt b/Documentation/trace/power.txt new file mode 100644 index 0000000..cd805e1 --- /dev/null +++ b/Documentation/trace/power.txt @@ -0,0 +1,17 @@ +The power tracer collects detailed information about C-state and P-state +transitions, instead of just looking at the high-level "average" +information. + +There is a helper script found in scrips/tracing/power.pl in the kernel +sources which can be used to parse this information and create a +Scalable Vector Graphics (SVG) picture from the trace data. + +To use this tracer: + + echo 0 > /sys/kernel/debug/tracing/tracing_enabled + echo power > /sys/kernel/debug/tracing/current_tracer + echo 1 > /sys/kernel/debug/tracing/tracing_enabled + sleep 1 + echo 0 > /sys/kernel/debug/tracing/tracing_enabled + cat /sys/kernel/debug/tracing/trace | \ + perl scripts/tracing/power.pl > out.sv -- cgit v0.10.2 From abd41443ac76d3e9c29a8c1d9e9a3312306cc55e Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 11 Apr 2009 15:51:18 -0400 Subject: tracing: Document the event tracing system Signed-off-by: "Theodore Ts'o" Cc: Theodore Ts'o Cc: Steven Rostedt LKML-Reference: <1239479479-2603-3-git-send-email-tytso@mit.edu> Signed-off-by: Ingo Molnar diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt new file mode 100644 index 0000000..abdee66 --- /dev/null +++ b/Documentation/trace/events.txt @@ -0,0 +1,135 @@ + Event Tracing + + Documentation written by Theodore Ts'o + +Introduction +============ + +Tracepoints (see Documentation/trace/tracepoints.txt) can be used +without creating custom kernel modules to register probe functions +using the event tracing infrastructure. + +Not all tracepoints can be traced using the event tracing system; +the kernel developer must provide code snippets which define how the +tracing information is saved into the tracing buffer, and how the +the tracing information should be printed. + +Using Event Tracing +=================== + +The events which are available for tracing can be found in the file +/sys/kernel/debug/tracing/available_events. + +To enable a particular event, such as 'sched_wakeup', simply echo it +to /sys/debug/tracing/set_event. For example: + + # echo sched_wakeup > /sys/kernel/debug/tracing/set_event + +[ Note: events can also be enabled/disabled via the 'enabled' toggle + found in the /sys/kernel/tracing/events/ hierarchy of directories. ] + +To disable an event, echo the event name to the set_event file prefixed +with an exclamation point: + + # echo '!sched_wakeup' >> /sys/kernel/debug/tracing/set_event + +To disable events, echo an empty line to the set_event file: + + # echo > /sys/kernel/debug/tracing/set_event + +The events are organized into subsystems, such as ext4, irq, sched, +etc., and a full event name looks like this: :. The +subsystem name is optional, but it is displayed in the available_events +file. All of the events in a subsystem can be specified via the syntax +":*"; for example, to enable all irq events, you can use the +command: + + # echo 'irq:*' > /sys/kernel/debug/tracing/set_event + +Defining an event-enabled tracepoint +------------------------------------ + +A kernel developer which wishes to define an event-enabled tracepoint +must declare the tracepoint using TRACE_EVENT instead of DECLARE_TRACE. +This is done via two header files in include/trace. For example, to +event-enable the jbd2 subsystem, we must create two files, +include/trace/jbd2.h and include/trace/jbd2_event_types.h. The +include/trace/jbd2.h file should be included by kernel source files that +will have a tracepoint inserted, and might look like this: + +#ifndef _TRACE_JBD2_H +#define _TRACE_JBD2_H + +#include +#include + +#include + +#endif + +In a file that utilizes a jbd2 tracepoint, this header file would be +included. Note that you still have to use DEFINE_TRACE(). So for +example, if fs/jbd2/commit.c planned to use the jbd2_start_commit +tracepoint, it would have the following near the beginning of the file: + +#include + +DEFINE_TRACE(jbd2_start_commit); + +Then in the function that would call the tracepoint, it would call the +tracepoint function. (For more information, please see the tracepoint +documentation in Documentation/trace/tracepoints.txt): + + trace_jbd2_start_commit(journal, commit_transaction); + +The code snippets which allow jbd2_start_commit to be an event-enabled +tracepoint are placed in the file include/trace/jbd2_event_types.h: + +/* use instead */ +#ifndef TRACE_EVENT +# error Do not include this file directly. +# error Unless you know what you are doing. +#endif + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM jbd2 + +#include + +TRACE_EVENT(jbd2_start_commit, + TP_PROTO(journal_t *journal, transaction_t *commit_transaction), + TP_ARGS(journal, commit_transaction), + TP_STRUCT__entry( + __array( char, devname, BDEVNAME_SIZE+24 ) + __field( int, transaction ) + ), + TP_fast_assign( + memcpy(__entry->devname, journal->j_devname, BDEVNAME_SIZE+24); + __entry->transaction = commit_transaction->t_tid; + ), + TP_printk("dev %s transaction %d", + __entry->devname, __entry->transaction) +); + +The TP_PROTO and TP_ARGS are unchanged from DECLARE_TRACE. The new +arguments to TRACE_EVENT are TP_STRUCT__entry, TP_fast_assign, and +TP_printk. + +TP_STRUCT__entry defines the data structure which will be stored in the +trace buffer. Normally, fields in __entry will be arrays or simple +types. It is possible to place data structures in __entry --- however, +pointers in the data structure can not be trusted, since they will be +accessed sometime later by TP_printk, and if the data structure contains +fields that will not or cannot be used by TP_printk, this will waste +space in the trace buffer. In general, data structures should be +avoided, unless they do only contain non-pointer types and all of the +fields will be used by TP_printk. + +TP_fast_assign defines the code snippet which saves information into the +__entry data structure, using the passed-in arguments defined in +TP_PROTO and TP_ARGS. + +Finally, TP_printk will print the __entry data structure. At the time +when the code snippet defined by TP_printk is executed, it will not have +access to the TP_ARGS arguments; it can only use the information saved +in the __entry data structure. -- cgit v0.10.2 From 2c1b284e4fa260fd922b9a65c99169e2630c6862 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 11 Apr 2009 00:03:10 +0530 Subject: x86: clean up declarations and variables Impact: cleanup, no code changed - syscalls.h update declarations due to unifications - irq.c declare smp_generic_interrupt() before it gets used - process.c declare sys_fork() and sys_vfork() before they get used - tsc.c rename tsc_khz shadowed variable - apic/probe_32.c declare apic_default before it gets used - apic/nmi.c prev_nmi_count should be unsigned - apic/io_apic.c declare smp_irq_move_cleanup_interrupt() before it gets used - mm/init.c declare direct_gbpages and free_initrd_mem before they get used Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 42f2f83..5773660 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -478,6 +478,9 @@ static inline unsigned int read_apic_id(void) extern void default_setup_apic_routing(void); #ifdef CONFIG_X86_32 + +extern struct apic apic_default; + /* * Set up the logical destination ID. * diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index b762ea4..be9ae41 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -78,7 +78,11 @@ extern void eisa_set_level_irq(unsigned int irq); /* SMP */ extern void smp_apic_timer_interrupt(struct pt_regs *); extern void smp_spurious_interrupt(struct pt_regs *); +extern void smp_generic_interrupt(struct pt_regs *); extern void smp_error_interrupt(struct pt_regs *); +#ifdef CONFIG_X86_IO_APIC +extern asmlinkage void smp_irq_move_cleanup_interrupt(void); +#endif #ifdef CONFIG_SMP extern void smp_reschedule_interrupt(struct pt_regs *); extern void smp_call_function_interrupt(struct pt_regs *); diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 29d96d1..3f8d09d 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -503,6 +503,8 @@ static inline int pgd_none(pgd_t pgd) #ifndef __ASSEMBLY__ +extern int direct_gbpages; + /* local pte updates need not use xchg for locking */ static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) { diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 6b87bc6..abde308 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -25,10 +25,6 @@ extern pgd_t init_level4_pgt[]; extern void paging_init(void); -#endif /* !__ASSEMBLY__ */ - -#ifndef __ASSEMBLY__ - #define pte_ERROR(e) \ printk("%s:%d: bad pte %p(%016lx).\n", \ __FILE__, __LINE__, &(e), pte_val(e)) @@ -135,8 +131,6 @@ static inline int pgd_large(pgd_t pgd) { return 0; } #define update_mmu_cache(vma, address, pte) do { } while (0) -extern int direct_gbpages; - /* Encode and de-code a swap entry */ #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE #define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 7043408..372b76e 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -1,7 +1,7 @@ /* * syscalls.h - Linux syscall interfaces (arch-specific) * - * Copyright (c) 2008 Jaswinder Singh + * Copyright (c) 2008 Jaswinder Singh Rajput * * This file is released under the GPLv2. * See the file COPYING for more details. @@ -12,50 +12,55 @@ #include #include -#include #include +#include /* Common in X86_32 and X86_64 */ /* kernel/ioport.c */ asmlinkage long sys_ioperm(unsigned long, unsigned long, int); +/* kernel/process.c */ +int sys_fork(struct pt_regs *); +int sys_vfork(struct pt_regs *); + /* kernel/ldt.c */ asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); +/* kernel/signal.c */ +long sys_rt_sigreturn(struct pt_regs *); + /* kernel/tls.c */ asmlinkage int sys_set_thread_area(struct user_desc __user *); asmlinkage int sys_get_thread_area(struct user_desc __user *); /* X86_32 only */ #ifdef CONFIG_X86_32 +/* kernel/ioport.c */ +long sys_iopl(struct pt_regs *); + /* kernel/process_32.c */ -int sys_fork(struct pt_regs *); int sys_clone(struct pt_regs *); -int sys_vfork(struct pt_regs *); int sys_execve(struct pt_regs *); -/* kernel/signal_32.c */ +/* kernel/signal.c */ asmlinkage int sys_sigsuspend(int, int, old_sigset_t); asmlinkage int sys_sigaction(int, const struct old_sigaction __user *, struct old_sigaction __user *); int sys_sigaltstack(struct pt_regs *); unsigned long sys_sigreturn(struct pt_regs *); -long sys_rt_sigreturn(struct pt_regs *); - -/* kernel/ioport.c */ -long sys_iopl(struct pt_regs *); /* kernel/sys_i386_32.c */ +struct mmap_arg_struct; +struct sel_arg_struct; +struct oldold_utsname; +struct old_utsname; + asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); -struct mmap_arg_struct; asmlinkage int old_mmap(struct mmap_arg_struct __user *); -struct sel_arg_struct; asmlinkage int old_select(struct sel_arg_struct __user *); asmlinkage int sys_ipc(uint, int, int, int, void __user *, long); -struct old_utsname; asmlinkage int sys_uname(struct old_utsname __user *); -struct oldold_utsname; asmlinkage int sys_olduname(struct oldold_utsname __user *); /* kernel/vm86_32.c */ @@ -65,29 +70,27 @@ int sys_vm86(struct pt_regs *); #else /* CONFIG_X86_32 */ /* X86_64 only */ +/* kernel/ioport.c */ +asmlinkage long sys_iopl(unsigned int, struct pt_regs *); + /* kernel/process_64.c */ -asmlinkage long sys_fork(struct pt_regs *); asmlinkage long sys_clone(unsigned long, unsigned long, void __user *, void __user *, struct pt_regs *); -asmlinkage long sys_vfork(struct pt_regs *); asmlinkage long sys_execve(char __user *, char __user * __user *, char __user * __user *, struct pt_regs *); long sys_arch_prctl(int, unsigned long); -/* kernel/ioport.c */ -asmlinkage long sys_iopl(unsigned int, struct pt_regs *); - -/* kernel/signal_64.c */ +/* kernel/signal.c */ asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *, struct pt_regs *); -long sys_rt_sigreturn(struct pt_regs *); /* kernel/sys_x86_64.c */ +struct new_utsname; + asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); -struct new_utsname; asmlinkage long sys_uname(struct new_utsname __user *); #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 767fe7e..870c92d 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -59,6 +59,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index d6bd624..0205631 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c @@ -104,7 +104,7 @@ static __init void nmi_cpu_busy(void *data) } #endif -static void report_broken_nmi(int cpu, int *prev_nmi_count) +static void report_broken_nmi(int cpu, unsigned int *prev_nmi_count) { printk(KERN_CONT "\n"); diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 01eda2a..440a8bc 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -160,7 +160,6 @@ extern struct apic apic_summit; extern struct apic apic_bigsmp; extern struct apic apic_es7000; extern struct apic apic_es7000_cluster; -extern struct apic apic_default; struct apic *apic = &apic_default; EXPORT_SYMBOL_GPL(apic); diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 3aaf7b9..2188267 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -12,6 +12,7 @@ #include #include #include +#include atomic_t irq_err_count; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ca98915..3e21e38 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 7a567eb..a8dc0d0 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -384,13 +384,13 @@ unsigned long native_calibrate_tsc(void) { u64 tsc1, tsc2, delta, ref1, ref2; unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; - unsigned long flags, latch, ms, fast_calibrate, tsc_khz; + unsigned long flags, latch, ms, fast_calibrate, hv_tsc_khz; int hpet = is_hpet_enabled(), i, loopmin; - tsc_khz = get_hypervisor_tsc_freq(); - if (tsc_khz) { + hv_tsc_khz = get_hypervisor_tsc_freq(); + if (hv_tsc_khz) { printk(KERN_INFO "TSC: Frequency read from the hypervisor\n"); - return tsc_khz; + return hv_tsc_khz; } local_irq_save(flags); diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index fd3da1d..40924e4 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -1,3 +1,4 @@ +#include #include #include -- cgit v0.10.2 From 02af61bb50f5d5f0322dbe5ab2a0d75808d25c7b Mon Sep 17 00:00:00 2001 From: Zhaolei Date: Fri, 10 Apr 2009 14:26:18 +0800 Subject: tracing, kmemtrace: Separate include/trace/kmemtrace.h to kmemtrace part and tracepoint part Impact: refactor code for future changes Current kmemtrace.h is used both as header file of kmemtrace and kmem's tracepoints definition. Tracepoints' definition file may be used by other code, and should only have definition of tracepoint. We can separate include/trace/kmemtrace.h into 2 files: include/linux/kmemtrace.h: header file for kmemtrace include/trace/kmem.h: definition of kmem tracepoints Signed-off-by: Zhao Lei Acked-by: Eduard - Gabriel Munteanu Acked-by: Pekka Enberg Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Tom Zanussi LKML-Reference: <49DEE68A.5040902@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/include/linux/kmemtrace.h b/include/linux/kmemtrace.h new file mode 100644 index 0000000..15c45a2 --- /dev/null +++ b/include/linux/kmemtrace.h @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2008 Eduard - Gabriel Munteanu + * + * This file is released under GPL version 2. + */ + +#ifndef _LINUX_KMEMTRACE_H +#define _LINUX_KMEMTRACE_H + +#ifdef __KERNEL__ + +#include + +#ifdef CONFIG_KMEMTRACE +extern void kmemtrace_init(void); +#else +static inline void kmemtrace_init(void) +{ +} +#endif + +#endif /* __KERNEL__ */ + +#endif /* _LINUX_KMEMTRACE_H */ + diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 5ac9b0b..713f841 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -14,7 +14,7 @@ #include /* kmalloc_sizes.h needs PAGE_SIZE */ #include /* kmalloc_sizes.h needs L1_CACHE_BYTES */ #include -#include +#include /* Size description struct for general caches. */ struct cache_sizes { diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 5046f90..be5d40c 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -10,7 +10,7 @@ #include #include #include -#include +#include enum stat_item { ALLOC_FASTPATH, /* Allocation from cpu slab */ diff --git a/include/trace/kmem.h b/include/trace/kmem.h new file mode 100644 index 0000000..24d2519 --- /dev/null +++ b/include/trace/kmem.h @@ -0,0 +1,44 @@ +#ifndef _TRACE_KMEM_H +#define _TRACE_KMEM_H + +#include +#include + +DECLARE_TRACE(kmalloc, + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags), + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)); +DECLARE_TRACE(kmem_cache_alloc, + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags), + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)); +DECLARE_TRACE(kmalloc_node, + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node), + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)); +DECLARE_TRACE(kmem_cache_alloc_node, + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node), + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)); +DECLARE_TRACE(kfree, + TP_PROTO(unsigned long call_site, const void *ptr), + TP_ARGS(call_site, ptr)); +DECLARE_TRACE(kmem_cache_free, + TP_PROTO(unsigned long call_site, const void *ptr), + TP_ARGS(call_site, ptr)); + +#endif /* _TRACE_KMEM_H */ diff --git a/include/trace/kmemtrace.h b/include/trace/kmemtrace.h deleted file mode 100644 index 28ee69f..0000000 --- a/include/trace/kmemtrace.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (C) 2008 Eduard - Gabriel Munteanu - * - * This file is released under GPL version 2. - */ - -#ifndef _LINUX_KMEMTRACE_H -#define _LINUX_KMEMTRACE_H - -#ifdef __KERNEL__ - -#include -#include - -#ifdef CONFIG_KMEMTRACE -extern void kmemtrace_init(void); -#else -static inline void kmemtrace_init(void) -{ -} -#endif - -DECLARE_TRACE(kmalloc, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)); -DECLARE_TRACE(kmem_cache_alloc, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)); -DECLARE_TRACE(kmalloc_node, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)); -DECLARE_TRACE(kmem_cache_alloc_node, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)); -DECLARE_TRACE(kfree, - TP_PROTO(unsigned long call_site, const void *ptr), - TP_ARGS(call_site, ptr)); -DECLARE_TRACE(kmem_cache_free, - TP_PROTO(unsigned long call_site, const void *ptr), - TP_ARGS(call_site, ptr)); - -#endif /* __KERNEL__ */ - -#endif /* _LINUX_KMEMTRACE_H */ - diff --git a/init/main.c b/init/main.c index 3585f07..eece40c 100644 --- a/init/main.c +++ b/init/main.c @@ -64,6 +64,7 @@ #include #include #include +#include #include #include @@ -71,7 +72,6 @@ #include #include #include -#include #ifdef CONFIG_X86_LOCAL_APIC #include diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c index 5011f4d..7a0aa0e 100644 --- a/kernel/trace/kmemtrace.c +++ b/kernel/trace/kmemtrace.c @@ -12,7 +12,7 @@ #include #include -#include +#include #include "trace_output.h" #include "trace.h" diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index f76a8f8..34b94c3 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include enum trace_type { diff --git a/mm/slab.c b/mm/slab.c index 9a90b00..f85831d 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -102,7 +102,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/mm/slob.c b/mm/slob.c index a2d4ab3..494f05f 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -65,7 +65,7 @@ #include #include #include -#include +#include #include /* diff --git a/mm/slub.c b/mm/slub.c index 7ab54ec..ea9e716 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include -- cgit v0.10.2 From fc182a4330fc22ea1b68fa3d5064dd85a73a4c4a Mon Sep 17 00:00:00 2001 From: Zhaolei Date: Fri, 10 Apr 2009 14:27:38 +0800 Subject: tracing, kmemtrace: Make kmem tracepoints use TRACE_EVENT macro TRACE_EVENT is a more generic way to define tracepoints. Doing so adds these new capabilities to this tracepoint: - zero-copy and per-cpu splice() tracing - binary tracing without printf overhead - structured logging records exposed under /debug/tracing/events - trace events embedded in function tracer output and other plugins - user-defined, per tracepoint filter expressions Signed-off-by: Zhao Lei Acked-by: Eduard - Gabriel Munteanu Acked-by: Pekka Enberg Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Tom Zanussi LKML-Reference: <49DEE6DA.80600@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/include/trace/kmem.h b/include/trace/kmem.h index 24d2519..46efc24 100644 --- a/include/trace/kmem.h +++ b/include/trace/kmem.h @@ -1,44 +1,9 @@ #ifndef _TRACE_KMEM_H #define _TRACE_KMEM_H -#include #include +#include -DECLARE_TRACE(kmalloc, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)); -DECLARE_TRACE(kmem_cache_alloc, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)); -DECLARE_TRACE(kmalloc_node, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)); -DECLARE_TRACE(kmem_cache_alloc_node, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)); -DECLARE_TRACE(kfree, - TP_PROTO(unsigned long call_site, const void *ptr), - TP_ARGS(call_site, ptr)); -DECLARE_TRACE(kmem_cache_free, - TP_PROTO(unsigned long call_site, const void *ptr), - TP_ARGS(call_site, ptr)); +#include #endif /* _TRACE_KMEM_H */ diff --git a/include/trace/kmem_event_types.h b/include/trace/kmem_event_types.h new file mode 100644 index 0000000..4ff420f --- /dev/null +++ b/include/trace/kmem_event_types.h @@ -0,0 +1,193 @@ + +/* use instead */ +#ifndef TRACE_EVENT +# error Do not include this file directly. +# error Unless you know what you are doing. +#endif + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kmem + +TRACE_EVENT(kmalloc, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags) +); + +TRACE_EVENT(kmem_cache_alloc, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags) +); + +TRACE_EVENT(kmalloc_node, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + __field( int, node ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + __entry->node = node; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags, + __entry->node) +); + +TRACE_EVENT(kmem_cache_alloc_node, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + __field( int, node ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + __entry->node = node; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags, + __entry->node) +); + +TRACE_EVENT(kfree, + + TP_PROTO(unsigned long call_site, const void *ptr), + + TP_ARGS(call_site, ptr), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + ), + + TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) +); + +TRACE_EVENT(kmem_cache_free, + + TP_PROTO(unsigned long call_site, const void *ptr), + + TP_ARGS(call_site, ptr), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + ), + + TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) +); + +#undef TRACE_SYSTEM diff --git a/include/trace/trace_event_types.h b/include/trace/trace_event_types.h index 33b6bfc..552a50e 100644 --- a/include/trace/trace_event_types.h +++ b/include/trace/trace_event_types.h @@ -4,3 +4,4 @@ #include #include #include +#include diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index 0e2aa80..13d6b85 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -4,3 +4,4 @@ #include #include #include +#include -- cgit v0.10.2 From b78825d608f30a47e3154ab6872a03f0de0c9d45 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 1 Apr 2009 16:18:53 +0800 Subject: blktrace: fix output of unknown events Not all events are pc (packet command) events. An event is a pc event only if it has BLK_TC_PC bit set. Signed-off-by: Li Zefan Acked-by: Arnaldo Carvalho de Melo Cc: Jens Axboe Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <49D3236D.3090705@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 921ef5d..e45e1af 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1182,7 +1182,7 @@ static enum print_line_t print_one_line(struct trace_iterator *iter, } if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act))) - ret = trace_seq_printf(s, "Bad pc action %x\n", what); + ret = trace_seq_printf(s, "Unknown action %x\n", what); else { ret = log_action(iter, what2act[what].act[long_act]); if (ret) -- cgit v0.10.2 From 66de7792c02693b49671afe58c771fde3b092fc7 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 1 Apr 2009 16:19:19 +0800 Subject: blktrace: fix output of BLK_TC_PC events BLK_TC_PC events should be treated differently with BLK_TC_FS events. Before this patch: # echo 1 > /sys/block/sda/sda1/trace/enable # echo pc > /sys/block/sda/sda1/trace/act_mask # echo blk > /debugfs/tracing/current_tracer # (generate some BLK_TC_PC events) # cat trace bash-2184 [000] 1774.275413: 8,7 I N [bash] bash-2184 [000] 1774.275435: 8,7 D N [bash] bash-2184 [000] 1774.275540: 8,7 I R [bash] bash-2184 [000] 1774.275547: 8,7 D R [bash] ksoftirqd/0-4 [000] 1774.275580: 8,7 C N 0 [0] bash-2184 [000] 1774.275648: 8,7 I R [bash] bash-2184 [000] 1774.275653: 8,7 D R [bash] ksoftirqd/0-4 [000] 1774.275682: 8,7 C N 0 [0] bash-2184 [000] 1774.275739: 8,7 I R [bash] bash-2184 [000] 1774.275744: 8,7 D R [bash] ksoftirqd/0-4 [000] 1774.275771: 8,7 C N 0 [0] bash-2184 [000] 1774.275804: 8,7 I R [bash] bash-2184 [000] 1774.275808: 8,7 D R [bash] ksoftirqd/0-4 [000] 1774.275836: 8,7 C N 0 [0] After this patch: # cat trace bash-2263 [000] 366.782149: 8,7 I N 0 (00 ..) [bash] bash-2263 [000] 366.782323: 8,7 D N 0 (00 ..) [bash] bash-2263 [000] 366.782557: 8,7 I R 8 (25 00 ..) [bash] bash-2263 [000] 366.782560: 8,7 D R 8 (25 00 ..) [bash] ksoftirqd/0-4 [000] 366.782582: 8,7 C N (25 00 ..) [0] bash-2263 [000] 366.782648: 8,7 I R 8 (5a 00 3f 00) [bash] bash-2263 [000] 366.782650: 8,7 D R 8 (5a 00 3f 00) [bash] ksoftirqd/0-4 [000] 366.782669: 8,7 C N (5a 00 3f 00) [0] bash-2263 [000] 366.782710: 8,7 I R 8 (5a 00 08 00) [bash] bash-2263 [000] 366.782713: 8,7 D R 8 (5a 00 08 00) [bash] ksoftirqd/0-4 [000] 366.782730: 8,7 C N (5a 00 08 00) [0] bash-2263 [000] 366.783375: 8,7 I R 36 (5a 00 08 00) [bash] bash-2263 [000] 366.783379: 8,7 D R 36 (5a 00 08 00) [bash] ksoftirqd/0-4 [000] 366.783404: 8,7 C N (5a 00 08 00) [0] This is what we do with PC events in user-space blktrace. Signed-off-by: Li Zefan Acked-by: Arnaldo Carvalho de Melo Cc: Jens Axboe Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <49D32387.9040106@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index e45e1af..2b98195 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -971,6 +971,16 @@ static inline const void *pdu_start(const struct trace_entry *ent) return te_blk_io_trace(ent) + 1; } +static inline u32 t_action(const struct trace_entry *ent) +{ + return te_blk_io_trace(ent)->action; +} + +static inline u32 t_bytes(const struct trace_entry *ent) +{ + return te_blk_io_trace(ent)->bytes; +} + static inline u32 t_sec(const struct trace_entry *ent) { return te_blk_io_trace(ent)->bytes >> 9; @@ -1031,25 +1041,87 @@ static int blk_log_action(struct trace_iterator *iter, const char *act) MAJOR(t->device), MINOR(t->device), act, rwbs); } +static int blk_log_dump_pdu(struct trace_seq *s, const struct trace_entry *ent) +{ + const char *pdu_buf; + int pdu_len; + int i, end, ret; + + pdu_buf = pdu_start(ent); + pdu_len = te_blk_io_trace(ent)->pdu_len; + + if (!pdu_len) + return 1; + + /* find the last zero that needs to be printed */ + for (end = pdu_len - 1; end >= 0; end--) + if (pdu_buf[end]) + break; + end++; + + if (!trace_seq_putc(s, '(')) + return 0; + + for (i = 0; i < pdu_len; i++) { + + ret = trace_seq_printf(s, "%s%02x", + i == 0 ? "" : " ", pdu_buf[i]); + if (!ret) + return ret; + + /* + * stop when the rest is just zeroes and indicate so + * with a ".." appended + */ + if (i == end && end != pdu_len - 1) + return trace_seq_puts(s, " ..) "); + } + + return trace_seq_puts(s, ") "); +} + static int blk_log_generic(struct trace_seq *s, const struct trace_entry *ent) { char cmd[TASK_COMM_LEN]; trace_find_cmdline(ent->pid, cmd); - if (t_sec(ent)) - return trace_seq_printf(s, "%llu + %u [%s]\n", - t_sector(ent), t_sec(ent), cmd); - return trace_seq_printf(s, "[%s]\n", cmd); + if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) { + int ret; + + ret = trace_seq_printf(s, "%u ", t_bytes(ent)); + if (!ret) + return 0; + ret = blk_log_dump_pdu(s, ent); + if (!ret) + return 0; + return trace_seq_printf(s, "[%s]\n", cmd); + } else { + if (t_sec(ent)) + return trace_seq_printf(s, "%llu + %u [%s]\n", + t_sector(ent), t_sec(ent), cmd); + return trace_seq_printf(s, "[%s]\n", cmd); + } } static int blk_log_with_error(struct trace_seq *s, const struct trace_entry *ent) { - if (t_sec(ent)) - return trace_seq_printf(s, "%llu + %u [%d]\n", t_sector(ent), - t_sec(ent), t_error(ent)); - return trace_seq_printf(s, "%llu [%d]\n", t_sector(ent), t_error(ent)); + if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) { + int ret; + + ret = blk_log_dump_pdu(s, ent); + if (ret) + return trace_seq_printf(s, "[%d]\n", t_error(ent)); + return 0; + } else { + if (t_sec(ent)) + return trace_seq_printf(s, "%llu + %u [%d]\n", + t_sector(ent), + t_sec(ent), t_error(ent)); + return trace_seq_printf(s, "%llu [%d]\n", + t_sector(ent), t_error(ent)); + } } static int blk_log_remap(struct trace_seq *s, const struct trace_entry *ent) -- cgit v0.10.2 From 3fa89ca7ba5ba50b3924a11f6604b4bdce5f7842 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sun, 12 Apr 2009 20:37:25 +0530 Subject: x86: vdso/vma.c declare vdso_enabled and arch_setup_additional_pages before they get used Impact: cleanup, address sparse warnings Addresses the problem pointed out by these sparse warning: arch/x86/vdso/vma.c:19:28: warning: symbol 'vdso_enabled' was not declared. Should it be static? arch/x86/vdso/vma.c:101:5: warning: symbol 'arch_setup_additional_pages' was not declared. Should it be static? Signed-off-by: Jaswinder Singh Rajput LKML-Reference: <1239548845.4170.2.camel@localhost.localdomain> Signed-off-by: Ingo Molnar diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index 7133cdf..cac0833 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include -- cgit v0.10.2 From edea7148a87c099e5d5d4838285cc27e459588b7 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 12 Apr 2009 20:47:39 +0400 Subject: x86: irq.c - tiny cleanup Impact: cleanup, robustization 1) guard ack_bad_irq with printk_ratelimit since there is no guarantee we will not be flooded one day 2) use pr_emerg() helper Signed-off-by: Cyrill Gorcunov LKML-Reference: <20090412165058.277579847@openvz.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 3aaf7b9..6603492 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -24,7 +24,8 @@ void (*generic_interrupt_extension)(void) = NULL; */ void ack_bad_irq(unsigned int irq) { - printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq); + if (printk_ratelimit()) + pr_err("unexpected IRQ trap at vector %02x\n", irq); #ifdef CONFIG_X86_LOCAL_APIC /* @@ -178,7 +179,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu) sum += irq_stats(cpu)->irq_thermal_count; # ifdef CONFIG_X86_64 sum += irq_stats(cpu)->irq_threshold_count; -#endif +# endif #endif return sum; } @@ -219,8 +220,8 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs) #endif if (printk_ratelimit()) - printk(KERN_EMERG "%s: %d.%d No irq handler for vector (irq %d)\n", - __func__, smp_processor_id(), vector, irq); + pr_emerg("%s: %d.%d No irq handler for vector (irq %d)\n", + __func__, smp_processor_id(), vector, irq); } irq_exit(); -- cgit v0.10.2 From c0eaa4536f08b98fbcfa7fce5b7b0de1bebcb0e1 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 12 Apr 2009 20:47:40 +0400 Subject: x86: apic - introduce imcr_ helpers Impact: cleanup Distinguish port writting magic into helpers with comments. Signed-off-by: Cyrill Gorcunov LKML-Reference: <20090412165058.535921550@openvz.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 098ec84..c3be10f5 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -98,6 +98,29 @@ early_param("lapic", parse_lapic); /* Local APIC was disabled by the BIOS and enabled by the kernel */ static int enabled_via_apicbase; +/* + * Handle interrupt mode configuration register (IMCR). + * This register controls whether the interrupt signals + * that reach the BSP come from the master PIC or from the + * local APIC. Before entering Symmetric I/O Mode, either + * the BIOS or the operating system must switch out of + * PIC Mode by changing the IMCR. + */ +static inline imcr_pic_to_apic(void) +{ + /* select IMCR register */ + outb(0x70, 0x22); + /* NMI and 8259 INTR go through APIC */ + outb(0x01, 0x23); +} + +static inline imcr_apic_to_pic(void) +{ + /* select IMCR register */ + outb(0x70, 0x22); + /* NMI and 8259 INTR go directly to BSP */ + outb(0x00, 0x23); +} #endif #ifdef CONFIG_X86_64 @@ -1727,8 +1750,7 @@ void __init connect_bsp_APIC(void) */ apic_printk(APIC_VERBOSE, "leaving PIC mode, " "enabling APIC mode.\n"); - outb(0x70, 0x22); - outb(0x01, 0x23); + imcr_pic_to_apic(); } #endif if (apic->enable_apic_mode) @@ -1756,8 +1778,7 @@ void disconnect_bsp_APIC(int virt_wire_setup) */ apic_printk(APIC_VERBOSE, "disabling APIC mode, " "entering PIC mode.\n"); - outb(0x70, 0x22); - outb(0x00, 0x23); + imcr_apic_to_pic(); return; } #endif -- cgit v0.10.2 From 08306ce61d6848e6fbf74fa4cc693c3fb29e943f Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 12 Apr 2009 20:47:41 +0400 Subject: x86: apic - introduce dummy apic operations Impact: refactor, speed up and robustize code In case if apic was disabled by kernel option or by hardware limits we can use dummy operations in apic->write to simplify the ack_APIC_irq() code. At the lame time the patch fixes the missed EOI in do_IRQ function (which has place if kernel is compiled as X86-32 and interrupt without handler happens where apic was not asked to be disabled via kernel option). Note that native_apic_write_dummy() consists of WARN_ON_ONCE to catch any buggy writes on enabled APICs. Could be removed after some time of testing. Signed-off-by: Cyrill Gorcunov LKML-Reference: <20090412165058.724788431@openvz.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 42f2f83..2bd5a46 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -212,6 +212,7 @@ static inline void ack_x2APIC_irq(void) } #endif +extern void apic_disable(void); extern int lapic_get_maxlvt(void); extern void clear_local_APIC(void); extern void connect_bsp_APIC(void); @@ -252,7 +253,7 @@ static inline void lapic_shutdown(void) { } #define local_apic_timer_c2_ok 1 static inline void init_apic_mappings(void) { } static inline void disable_local_APIC(void) { } - +static inline void apic_disable(void) { } #endif /* !CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index c3be10f5..9b849d4 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -232,6 +232,24 @@ static int modern_apic(void) return lapic_get_version() >= 0x14; } +/* + * bare function to substitute write operation + * and it's _that_ fast :) + */ +void native_apic_write_dummy(u32 reg, u32 v) +{ + WARN_ON_ONCE((cpu_has_apic || !disable_apic)); +} + +/* + * right after this call apic->write doesn't do anything + * note that there is no restore operation it works one way + */ +void apic_disable(void) +{ + apic->write = native_apic_write_dummy; +} + void native_apic_wait_icr_idle(void) { while (apic_read(APIC_ICR) & APIC_ICR_BUSY) @@ -1582,6 +1600,12 @@ void __init init_apic_mappings(void) */ if (boot_cpu_physical_apicid == -1U) boot_cpu_physical_apicid = read_apic_id(); + + /* lets check if we may to NOP'ify apic operations */ + if (!cpu_has_apic) { + pr_info("APIC: disable apic facility\n"); + apic_disable(); + } } /* diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 6603492..fd57bf3 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -27,7 +27,6 @@ void ack_bad_irq(unsigned int irq) if (printk_ratelimit()) pr_err("unexpected IRQ trap at vector %02x\n", irq); -#ifdef CONFIG_X86_LOCAL_APIC /* * Currently unexpected vectors happen only on SMP and APIC. * We _must_ ack these because every local APIC has only N @@ -37,9 +36,7 @@ void ack_bad_irq(unsigned int irq) * completely. * But only ack when the APIC is enabled -AK */ - if (cpu_has_apic) - ack_APIC_irq(); -#endif + ack_APIC_irq(); } #define irq_stats(x) (&per_cpu(irq_stat, x)) @@ -214,10 +211,7 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs) irq = __get_cpu_var(vector_irq)[vector]; if (!handle_irq(irq, regs)) { -#ifdef CONFIG_X86_64 - if (!disable_apic) - ack_APIC_irq(); -#endif + ack_APIC_irq(); if (printk_ratelimit()) pr_emerg("%s: %d.%d No irq handler for vector (irq %d)\n", -- cgit v0.10.2 From b9b34f24b23ba9e79e07c0980e7fff16af2a67d1 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 12 Apr 2009 20:47:42 +0400 Subject: x86: smp.c - align smp_ops assignments Impact: cleanup It's a bit hard to parse by eyes without them being aligned. Signed-off-by: Cyrill Gorcunov LKML-Reference: <20090412165058.924175574@openvz.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 13f33ea..f6db48c 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -193,19 +193,19 @@ void smp_call_function_single_interrupt(struct pt_regs *regs) } struct smp_ops smp_ops = { - .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, - .smp_prepare_cpus = native_smp_prepare_cpus, - .smp_cpus_done = native_smp_cpus_done, + .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, + .smp_prepare_cpus = native_smp_prepare_cpus, + .smp_cpus_done = native_smp_cpus_done, - .smp_send_stop = native_smp_send_stop, - .smp_send_reschedule = native_smp_send_reschedule, + .smp_send_stop = native_smp_send_stop, + .smp_send_reschedule = native_smp_send_reschedule, - .cpu_up = native_cpu_up, - .cpu_die = native_cpu_die, - .cpu_disable = native_cpu_disable, - .play_dead = native_play_dead, + .cpu_up = native_cpu_up, + .cpu_die = native_cpu_die, + .cpu_disable = native_cpu_disable, + .play_dead = native_play_dead, - .send_call_func_ipi = native_send_call_func_ipi, + .send_call_func_ipi = native_send_call_func_ipi, .send_call_func_single_ipi = native_send_call_func_single_ipi, }; EXPORT_SYMBOL_GPL(smp_ops); -- cgit v0.10.2 From f4976116a98f108bf385f217332aadb3ca98fe66 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 13 Apr 2009 10:53:02 +0100 Subject: ASoC: WM9713 requires symmetric rates on the voice DAI Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/wm9713.c b/sound/soc/codecs/wm9713.c index 523bad0..aa94cc6 100644 --- a/sound/soc/codecs/wm9713.c +++ b/sound/soc/codecs/wm9713.c @@ -1069,6 +1069,7 @@ struct snd_soc_dai wm9713_dai[] = { .rates = WM9713_PCM_RATES, .formats = WM9713_PCM_FORMATS,}, .ops = &wm9713_dai_ops_voice, + .symmetric_rates = 1, }, }; EXPORT_SYMBOL_GPL(wm9713_dai); -- cgit v0.10.2 From 025756eca458b4a3d5e3d76baaffb2e8e3df79db Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 13 Apr 2009 11:09:18 +0100 Subject: ASoC: Factor out application of power for generic widgets This is simple code motion, intended to support future refactoring of the DAPM algorithms and (more immediately) the additon of events for DACs and ADCs. Signed-off-by: Mark Brown diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 46485de..713d125 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -522,6 +522,65 @@ int dapm_reg_event(struct snd_soc_dapm_widget *w, } EXPORT_SYMBOL_GPL(dapm_reg_event); +/* Standard power change method, used to apply power changes to most + * widgets. + */ +static int dapm_generic_apply_power(struct snd_soc_dapm_widget *w) +{ + int ret; + + /* call any power change event handlers */ + if (w->event) + pr_debug("power %s event for %s flags %x\n", + w->power ? "on" : "off", + w->name, w->event_flags); + + /* power up pre event */ + if (w->power && w->event && + (w->event_flags & SND_SOC_DAPM_PRE_PMU)) { + ret = w->event(w, NULL, SND_SOC_DAPM_PRE_PMU); + if (ret < 0) + return ret; + } + + /* power down pre event */ + if (!w->power && w->event && + (w->event_flags & SND_SOC_DAPM_PRE_PMD)) { + ret = w->event(w, NULL, SND_SOC_DAPM_PRE_PMD); + if (ret < 0) + return ret; + } + + /* Lower PGA volume to reduce pops */ + if (w->id == snd_soc_dapm_pga && !w->power) + dapm_set_pga(w, w->power); + + dapm_update_bits(w); + + /* Raise PGA volume to reduce pops */ + if (w->id == snd_soc_dapm_pga && w->power) + dapm_set_pga(w, w->power); + + /* power up post event */ + if (w->power && w->event && + (w->event_flags & SND_SOC_DAPM_POST_PMU)) { + ret = w->event(w, + NULL, SND_SOC_DAPM_POST_PMU); + if (ret < 0) + return ret; + } + + /* power down post event */ + if (!w->power && w->event && + (w->event_flags & SND_SOC_DAPM_POST_PMD)) { + ret = w->event(w, NULL, SND_SOC_DAPM_POST_PMD); + if (ret < 0) + return ret; + } + + return 0; +} + /* * Scan a single DAPM widget for a complete audio path and update the * power status appropriately. @@ -601,56 +660,7 @@ static int dapm_power_widget(struct snd_soc_codec *codec, int event, if (!power_change) return 0; - /* call any power change event handlers */ - if (w->event) - pr_debug("power %s event for %s flags %x\n", - w->power ? "on" : "off", - w->name, w->event_flags); - - /* power up pre event */ - if (power && w->event && - (w->event_flags & SND_SOC_DAPM_PRE_PMU)) { - ret = w->event(w, NULL, SND_SOC_DAPM_PRE_PMU); - if (ret < 0) - return ret; - } - - /* power down pre event */ - if (!power && w->event && - (w->event_flags & SND_SOC_DAPM_PRE_PMD)) { - ret = w->event(w, NULL, SND_SOC_DAPM_PRE_PMD); - if (ret < 0) - return ret; - } - - /* Lower PGA volume to reduce pops */ - if (w->id == snd_soc_dapm_pga && !power) - dapm_set_pga(w, power); - - dapm_update_bits(w); - - /* Raise PGA volume to reduce pops */ - if (w->id == snd_soc_dapm_pga && power) - dapm_set_pga(w, power); - - /* power up post event */ - if (power && w->event && - (w->event_flags & SND_SOC_DAPM_POST_PMU)) { - ret = w->event(w, - NULL, SND_SOC_DAPM_POST_PMU); - if (ret < 0) - return ret; - } - - /* power down post event */ - if (!power && w->event && - (w->event_flags & SND_SOC_DAPM_POST_PMD)) { - ret = w->event(w, NULL, SND_SOC_DAPM_POST_PMD); - if (ret < 0) - return ret; - } - - return 0; + return dapm_generic_apply_power(w); } /* -- cgit v0.10.2 From f6d655a6e6974e474a11b25052c29d10b80814b3 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 13 Apr 2009 11:27:03 +0100 Subject: ASoC: Support DAPM events for DACs and ADCs Signed-off-by: Mark Brown diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index a7def6a..fcc929d 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -140,9 +140,19 @@ #define SND_SOC_DAPM_DAC(wname, stname, wreg, wshift, winvert) \ { .id = snd_soc_dapm_dac, .name = wname, .sname = stname, .reg = wreg, \ .shift = wshift, .invert = winvert} +#define SND_SOC_DAPM_DAC_E(wname, stname, wreg, wshift, winvert, \ + wevent, wflags) \ +{ .id = snd_soc_dapm_dac, .name = wname, .sname = stname, .reg = wreg, \ + .shift = wshift, .invert = winvert, \ + .event = wevent, .event_flags = wflags} #define SND_SOC_DAPM_ADC(wname, stname, wreg, wshift, winvert) \ { .id = snd_soc_dapm_adc, .name = wname, .sname = stname, .reg = wreg, \ .shift = wshift, .invert = winvert} +#define SND_SOC_DAPM_ADC_E(wname, stname, wreg, wshift, winvert, \ + wevent, wflags) \ +{ .id = snd_soc_dapm_adc, .name = wname, .sname = stname, .reg = wreg, \ + .shift = wshift, .invert = winvert, \ + .event = wevent, .event_flags = wflags} /* generic register modifier widget */ #define SND_SOC_DAPM_REG(wid, wname, wreg, wshift, wmask, won_val, woff_val) \ diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 713d125..a6d7337 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -598,18 +598,22 @@ static int dapm_power_widget(struct snd_soc_codec *codec, int event, if (w->id == snd_soc_dapm_adc && w->active) { in = is_connected_input_ep(w); dapm_clear_walk(w->codec); - w->power = (in != 0) ? 1 : 0; - dapm_update_bits(w); - return 0; + power = (in != 0) ? 1 : 0; + if (power == w->power) + return 0; + w->power = power; + return dapm_generic_apply_power(w); } /* active DAC */ if (w->id == snd_soc_dapm_dac && w->active) { out = is_connected_output_ep(w); dapm_clear_walk(w->codec); - w->power = (out != 0) ? 1 : 0; - dapm_update_bits(w); - return 0; + power = (out != 0) ? 1 : 0; + if (power == w->power) + return 0; + w->power = power; + return dapm_generic_apply_power(w); } /* pre and post event widgets */ -- cgit v0.10.2 From 6bbcb459cd50807511491ddf96bca1ef92006bf8 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 13 Apr 2009 11:29:10 +0100 Subject: ASoC: Move the WM9713 voice DAC powerdown to a DAPM event This ensures that we sync with the DAPM powerdown sequencing properly and don't need to bounce the power on the voice DAC so often. Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/wm9713.c b/sound/soc/codecs/wm9713.c index aa94cc6..a6feb784 100644 --- a/sound/soc/codecs/wm9713.c +++ b/sound/soc/codecs/wm9713.c @@ -189,6 +189,26 @@ SOC_SINGLE("3D Lower Cut-off Switch", AC97_REC_GAIN_MIC, 4, 1, 0), SOC_SINGLE("3D Depth", AC97_REC_GAIN_MIC, 0, 15, 1), }; +static int wm9713_voice_shutdown(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + struct snd_soc_codec *codec = w->codec; + u16 status, rate; + + BUG_ON(event != SND_SOC_DAPM_PRE_PMD); + + /* Gracefully shut down the voice interface. */ + status = ac97_read(codec, AC97_EXTENDED_MID) | 0x1000; + rate = ac97_read(codec, AC97_HANDSET_RATE) & 0xF0FF; + ac97_write(codec, AC97_HANDSET_RATE, rate | 0x0200); + schedule_timeout_interruptible(msecs_to_jiffies(1)); + ac97_write(codec, AC97_HANDSET_RATE, rate | 0x0F00); + ac97_write(codec, AC97_EXTENDED_MID, status); + + return 0; +} + + /* We have to create a fake left and right HP mixers because * the codec only has a single control that is shared by both channels. * This makes it impossible to determine the audio path using the current @@ -400,7 +420,8 @@ SND_SOC_DAPM_MIXER("AC97 Mixer", SND_SOC_NOPM, 0, 0, NULL, 0), SND_SOC_DAPM_MIXER("HP Mixer", SND_SOC_NOPM, 0, 0, NULL, 0), SND_SOC_DAPM_MIXER("Line Mixer", SND_SOC_NOPM, 0, 0, NULL, 0), SND_SOC_DAPM_MIXER("Capture Mixer", SND_SOC_NOPM, 0, 0, NULL, 0), -SND_SOC_DAPM_DAC("Voice DAC", "Voice Playback", AC97_EXTENDED_MID, 12, 1), +SND_SOC_DAPM_DAC_E("Voice DAC", "Voice Playback", AC97_EXTENDED_MID, 12, 1, + wm9713_voice_shutdown, SND_SOC_DAPM_PRE_PMD), SND_SOC_DAPM_DAC("Aux DAC", "Aux Playback", AC97_EXTENDED_MID, 11, 1), SND_SOC_DAPM_PGA("Left ADC", AC97_EXTENDED_MID, 5, 1, NULL, 0), SND_SOC_DAPM_PGA("Right ADC", AC97_EXTENDED_MID, 4, 1, NULL, 0), @@ -936,21 +957,6 @@ static int wm9713_pcm_hw_params(struct snd_pcm_substream *substream, return 0; } -static void wm9713_voiceshutdown(struct snd_pcm_substream *substream, - struct snd_soc_dai *dai) -{ - struct snd_soc_codec *codec = dai->codec; - u16 status, rate; - - /* Gracefully shut down the voice interface. */ - status = ac97_read(codec, AC97_EXTENDED_STATUS) | 0x1000; - rate = ac97_read(codec, AC97_HANDSET_RATE) & 0xF0FF; - ac97_write(codec, AC97_HANDSET_RATE, rate | 0x0200); - schedule_timeout_interruptible(msecs_to_jiffies(1)); - ac97_write(codec, AC97_HANDSET_RATE, rate | 0x0F00); - ac97_write(codec, AC97_EXTENDED_MID, status); -} - static int ac97_hifi_prepare(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { @@ -1019,7 +1025,6 @@ static struct snd_soc_dai_ops wm9713_dai_ops_aux = { static struct snd_soc_dai_ops wm9713_dai_ops_voice = { .hw_params = wm9713_pcm_hw_params, - .shutdown = wm9713_voiceshutdown, .set_clkdiv = wm9713_set_dai_clkdiv, .set_pll = wm9713_set_dai_pll, .set_fmt = wm9713_set_dai_fmt, -- cgit v0.10.2 From a820532002e70e3a06f1ea7133e9b02443d07382 Mon Sep 17 00:00:00 2001 From: Daniel Ribeiro Date: Wed, 8 Apr 2009 10:51:24 -0300 Subject: ASoC: pxa-ssp.c fix clock/frame invert SCMODE(0): Data Driven (Falling), Data Sampled (Rising), Idle State (Low) SCMODE(1): Data Driven (Rising), Data Sampled (Falling), Idle State (Low) SCMODE(2): Data Driven (Rising), Data Sampled (Falling), Idle State (High) SCMODE(3): Data Driven (Falling), Data Sampled (Rising), Idle State (High) SCMODE(3) does not invert the clock polarity compared to the default SCMODE(0). This patch also adds all possible NF/IF, NB/IB combinations to the DSP_A and DSP_B modes. Signed-off-by: Daniel Ribeiro Signed-off-by: Mark Brown diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c index c7c1996..176af7ff 100644 --- a/sound/soc/pxa/pxa-ssp.c +++ b/sound/soc/pxa/pxa-ssp.c @@ -568,7 +568,10 @@ static int pxa_ssp_set_dai_fmt(struct snd_soc_dai *cpu_dai, case SND_SOC_DAIFMT_NB_IF: break; case SND_SOC_DAIFMT_IB_IF: - sspsp |= SSPSP_SCMODE(3); + sspsp |= SSPSP_SCMODE(2); + break; + case SND_SOC_DAIFMT_IB_NF: + sspsp |= SSPSP_SCMODE(2) | SSPSP_SFRMP; break; default: return -EINVAL; @@ -585,7 +588,13 @@ static int pxa_ssp_set_dai_fmt(struct snd_soc_dai *cpu_dai, case SND_SOC_DAIFMT_NB_NF: sspsp |= SSPSP_SFRMP; break; + case SND_SOC_DAIFMT_NB_IF: + break; case SND_SOC_DAIFMT_IB_IF: + sspsp |= SSPSP_SCMODE(2); + break; + case SND_SOC_DAIFMT_IB_NF: + sspsp |= SSPSP_SCMODE(2) | SSPSP_SFRMP; break; default: return -EINVAL; -- cgit v0.10.2 From f2644a2c00a06236a9c5e85488b0680825bad39c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 7 Apr 2009 19:20:14 +0100 Subject: ASoC: Add WM8960 CODEC driver The WM8960 is a low power, high quality stereo codec designed for portable digital audio applications. Stereo class D speaker drivers provide 1W per channel into 8W loads. Guaranteed low leakage, excellent PSRR and pop/click suppression mechanisms enable direct battery connection for the speaker supply. The device also integrates a complete microphone interface and a stereo headphone driver. External component requirements are drastically reduced as no separate microphone, speaker or headphone amplifiers are required. Advanced on-chip digital signal processing performs automatic level control for the microphone or line input. Stereo 24-bit sigma-delta ADCs and DACs are used with low power over-sampling digital interpolation and decimation filters and a flexible digital audio interface. The master clock can be input directly or generated internally by an onboard PLL, supporting most commonly-used clocking schemes. This driver was originally written by Liam Girdwood, with substantial subsequent additions and updates for feature completeness and changes in the ASoC framework from me. Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index ab36485..121d63f 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -35,6 +35,7 @@ config SND_SOC_ALL_CODECS select SND_SOC_WM8753 if SND_SOC_I2C_AND_SPI select SND_SOC_WM8900 if I2C select SND_SOC_WM8903 if I2C + select SND_SOC_WM8960 if I2C select SND_SOC_WM8971 if I2C select SND_SOC_WM8988 if SND_SOC_I2C_AND_SPI select SND_SOC_WM8990 if I2C @@ -139,6 +140,9 @@ config SND_SOC_WM8900 config SND_SOC_WM8903 tristate +config SND_SOC_WM8960 + tristate + config SND_SOC_WM8971 tristate diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile index a72548d..d8e15a4 100644 --- a/sound/soc/codecs/Makefile +++ b/sound/soc/codecs/Makefile @@ -23,6 +23,7 @@ snd-soc-wm8750-objs := wm8750.o snd-soc-wm8753-objs := wm8753.o snd-soc-wm8900-objs := wm8900.o snd-soc-wm8903-objs := wm8903.o +snd-soc-wm8960-objs := wm8960.o snd-soc-wm8971-objs := wm8971.o snd-soc-wm8988-objs := wm8988.o snd-soc-wm8990-objs := wm8990.o @@ -56,6 +57,7 @@ obj-$(CONFIG_SND_SOC_WM8753) += snd-soc-wm8753.o obj-$(CONFIG_SND_SOC_WM8900) += snd-soc-wm8900.o obj-$(CONFIG_SND_SOC_WM8903) += snd-soc-wm8903.o obj-$(CONFIG_SND_SOC_WM8971) += snd-soc-wm8971.o +obj-$(CONFIG_SND_SOC_WM8960) += snd-soc-wm8960.o obj-$(CONFIG_SND_SOC_WM8988) += snd-soc-wm8988.o obj-$(CONFIG_SND_SOC_WM8990) += snd-soc-wm8990.o obj-$(CONFIG_SND_SOC_WM8991) += snd-soc-wm8991.o diff --git a/sound/soc/codecs/wm8960.c b/sound/soc/codecs/wm8960.c new file mode 100644 index 0000000..e224d8a --- /dev/null +++ b/sound/soc/codecs/wm8960.c @@ -0,0 +1,969 @@ +/* + * wm8960.c -- WM8960 ALSA SoC Audio driver + * + * Author: Liam Girdwood + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "wm8960.h" + +#define AUDIO_NAME "wm8960" + +struct snd_soc_codec_device soc_codec_dev_wm8960; + +/* R25 - Power 1 */ +#define WM8960_VREF 0x40 + +/* R28 - Anti-pop 1 */ +#define WM8960_POBCTRL 0x80 +#define WM8960_BUFDCOPEN 0x10 +#define WM8960_BUFIOEN 0x08 +#define WM8960_SOFT_ST 0x04 +#define WM8960_HPSTBY 0x01 + +/* R29 - Anti-pop 2 */ +#define WM8960_DISOP 0x40 + +/* + * wm8960 register cache + * We can't read the WM8960 register space when we are + * using 2 wire for device control, so we cache them instead. + */ +static const u16 wm8960_reg[WM8960_CACHEREGNUM] = { + 0x0097, 0x0097, 0x0000, 0x0000, + 0x0000, 0x0008, 0x0000, 0x000a, + 0x01c0, 0x0000, 0x00ff, 0x00ff, + 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x007b, 0x0100, 0x0032, + 0x0000, 0x00c3, 0x00c3, 0x01c0, + 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, + 0x0100, 0x0100, 0x0050, 0x0050, + 0x0050, 0x0050, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0040, 0x0000, + 0x0000, 0x0050, 0x0050, 0x0000, + 0x0002, 0x0037, 0x004d, 0x0080, + 0x0008, 0x0031, 0x0026, 0x00e9, +}; + +struct wm8960_priv { + u16 reg_cache[WM8960_CACHEREGNUM]; + struct snd_soc_codec codec; +}; + +/* + * read wm8960 register cache + */ +static inline unsigned int wm8960_read_reg_cache(struct snd_soc_codec *codec, + unsigned int reg) +{ + u16 *cache = codec->reg_cache; + if (reg == WM8960_RESET) + return 0; + if (reg >= WM8960_CACHEREGNUM) + return -1; + return cache[reg]; +} + +/* + * write wm8960 register cache + */ +static inline void wm8960_write_reg_cache(struct snd_soc_codec *codec, + u16 reg, unsigned int value) +{ + u16 *cache = codec->reg_cache; + if (reg >= WM8960_CACHEREGNUM) + return; + cache[reg] = value; +} + +static inline unsigned int wm8960_read(struct snd_soc_codec *codec, + unsigned int reg) +{ + return wm8960_read_reg_cache(codec, reg); +} + +/* + * write to the WM8960 register space + */ +static int wm8960_write(struct snd_soc_codec *codec, unsigned int reg, + unsigned int value) +{ + u8 data[2]; + + /* data is + * D15..D9 WM8960 register offset + * D8...D0 register data + */ + data[0] = (reg << 1) | ((value >> 8) & 0x0001); + data[1] = value & 0x00ff; + + wm8960_write_reg_cache(codec, reg, value); + if (codec->hw_write(codec->control_data, data, 2) == 2) + return 0; + else + return -EIO; +} + +#define wm8960_reset(c) wm8960_write(c, WM8960_RESET, 0) + +/* enumerated controls */ +static const char *wm8960_deemph[] = {"None", "32Khz", "44.1Khz", "48Khz"}; +static const char *wm8960_polarity[] = {"No Inversion", "Left Inverted", + "Right Inverted", "Stereo Inversion"}; +static const char *wm8960_3d_upper_cutoff[] = {"High", "Low"}; +static const char *wm8960_3d_lower_cutoff[] = {"Low", "High"}; +static const char *wm8960_alcfunc[] = {"Off", "Right", "Left", "Stereo"}; +static const char *wm8960_alcmode[] = {"ALC", "Limiter"}; + +static const struct soc_enum wm8960_enum[] = { + SOC_ENUM_SINGLE(WM8960_DACCTL1, 1, 4, wm8960_deemph), + SOC_ENUM_SINGLE(WM8960_DACCTL1, 5, 4, wm8960_polarity), + SOC_ENUM_SINGLE(WM8960_DACCTL2, 5, 4, wm8960_polarity), + SOC_ENUM_SINGLE(WM8960_3D, 6, 2, wm8960_3d_upper_cutoff), + SOC_ENUM_SINGLE(WM8960_3D, 5, 2, wm8960_3d_lower_cutoff), + SOC_ENUM_SINGLE(WM8960_ALC1, 7, 4, wm8960_alcfunc), + SOC_ENUM_SINGLE(WM8960_ALC3, 8, 2, wm8960_alcmode), +}; + +static const DECLARE_TLV_DB_SCALE(adc_tlv, -9700, 50, 0); +static const DECLARE_TLV_DB_SCALE(dac_tlv, -12700, 50, 1); +static const DECLARE_TLV_DB_SCALE(bypass_tlv, -2100, 300, 0); +static const DECLARE_TLV_DB_SCALE(out_tlv, -12100, 100, 1); + +static const struct snd_kcontrol_new wm8960_snd_controls[] = { +SOC_DOUBLE_R_TLV("Capture Volume", WM8960_LINVOL, WM8960_RINVOL, + 0, 63, 0, adc_tlv), +SOC_DOUBLE_R("Capture Volume ZC Switch", WM8960_LINVOL, WM8960_RINVOL, + 6, 1, 0), +SOC_DOUBLE_R("Capture Switch", WM8960_LINVOL, WM8960_RINVOL, + 7, 1, 0), + +SOC_DOUBLE_R_TLV("Playback Volume", WM8960_LDAC, WM8960_RDAC, + 0, 255, 0, dac_tlv), + +SOC_DOUBLE_R_TLV("Headphone Playback Volume", WM8960_LOUT1, WM8960_ROUT1, + 0, 127, 0, out_tlv), +SOC_DOUBLE_R("Headphone Playback ZC Switch", WM8960_LOUT1, WM8960_ROUT1, + 7, 1, 0), + +SOC_DOUBLE_R_TLV("Speaker Playback Volume", WM8960_LOUT2, WM8960_ROUT2, + 0, 127, 0, out_tlv), +SOC_DOUBLE_R("Speaker Playback ZC Switch", WM8960_LOUT2, WM8960_ROUT2, + 7, 1, 0), +SOC_SINGLE("Speaker DC Volume", WM8960_CLASSD3, 3, 5, 0), +SOC_SINGLE("Speaker AC Volume", WM8960_CLASSD3, 0, 5, 0), + +SOC_SINGLE("PCM Playback -6dB Switch", WM8960_DACCTL1, 7, 1, 0), +SOC_ENUM("ADC Polarity", wm8960_enum[1]), +SOC_ENUM("Playback De-emphasis", wm8960_enum[0]), +SOC_SINGLE("ADC High Pass Filter Switch", WM8960_DACCTL1, 0, 1, 0), + +SOC_ENUM("DAC Polarity", wm8960_enum[2]), + +SOC_ENUM("3D Filter Upper Cut-Off", wm8960_enum[3]), +SOC_ENUM("3D Filter Lower Cut-Off", wm8960_enum[4]), +SOC_SINGLE("3D Volume", WM8960_3D, 1, 15, 0), +SOC_SINGLE("3D Switch", WM8960_3D, 0, 1, 0), + +SOC_ENUM("ALC Function", wm8960_enum[5]), +SOC_SINGLE("ALC Max Gain", WM8960_ALC1, 4, 7, 0), +SOC_SINGLE("ALC Target", WM8960_ALC1, 0, 15, 1), +SOC_SINGLE("ALC Min Gain", WM8960_ALC2, 4, 7, 0), +SOC_SINGLE("ALC Hold Time", WM8960_ALC2, 0, 15, 0), +SOC_ENUM("ALC Mode", wm8960_enum[6]), +SOC_SINGLE("ALC Decay", WM8960_ALC3, 4, 15, 0), +SOC_SINGLE("ALC Attack", WM8960_ALC3, 0, 15, 0), + +SOC_SINGLE("Noise Gate Threshold", WM8960_NOISEG, 3, 31, 0), +SOC_SINGLE("Noise Gate Switch", WM8960_NOISEG, 0, 1, 0), + +SOC_DOUBLE_R("ADC PCM Capture Volume", WM8960_LINPATH, WM8960_RINPATH, + 0, 127, 0), + +SOC_SINGLE_TLV("Left Output Mixer Boost Bypass Volume", + WM8960_BYPASS1, 4, 7, 1, bypass_tlv), +SOC_SINGLE_TLV("Left Output Mixer LINPUT3 Volume", + WM8960_LOUTMIX, 4, 7, 1, bypass_tlv), +SOC_SINGLE_TLV("Right Output Mixer Boost Bypass Volume", + WM8960_BYPASS2, 4, 7, 1, bypass_tlv), +SOC_SINGLE_TLV("Right Output Mixer RINPUT3 Volume", + WM8960_ROUTMIX, 4, 7, 1, bypass_tlv), +}; + +static const struct snd_kcontrol_new wm8960_lin_boost[] = { +SOC_DAPM_SINGLE("LINPUT2 Switch", WM8960_LINPATH, 6, 1, 0), +SOC_DAPM_SINGLE("LINPUT3 Switch", WM8960_LINPATH, 7, 1, 0), +SOC_DAPM_SINGLE("LINPUT1 Switch", WM8960_LINPATH, 8, 1, 0), +}; + +static const struct snd_kcontrol_new wm8960_lin[] = { +SOC_DAPM_SINGLE("Boost Switch", WM8960_LINPATH, 3, 1, 0), +}; + +static const struct snd_kcontrol_new wm8960_rin_boost[] = { +SOC_DAPM_SINGLE("RINPUT2 Switch", WM8960_RINPATH, 6, 1, 0), +SOC_DAPM_SINGLE("RINPUT3 Switch", WM8960_RINPATH, 7, 1, 0), +SOC_DAPM_SINGLE("RINPUT1 Switch", WM8960_RINPATH, 8, 1, 0), +}; + +static const struct snd_kcontrol_new wm8960_rin[] = { +SOC_DAPM_SINGLE("Boost Switch", WM8960_RINPATH, 3, 1, 0), +}; + +static const struct snd_kcontrol_new wm8960_loutput_mixer[] = { +SOC_DAPM_SINGLE("PCM Playback Switch", WM8960_LOUTMIX, 8, 1, 0), +SOC_DAPM_SINGLE("LINPUT3 Switch", WM8960_LOUTMIX, 7, 1, 0), +SOC_DAPM_SINGLE("Boost Bypass Switch", WM8960_BYPASS1, 7, 1, 0), +}; + +static const struct snd_kcontrol_new wm8960_routput_mixer[] = { +SOC_DAPM_SINGLE("PCM Playback Switch", WM8960_ROUTMIX, 8, 1, 0), +SOC_DAPM_SINGLE("RINPUT3 Switch", WM8960_ROUTMIX, 7, 1, 0), +SOC_DAPM_SINGLE("Boost Bypass Switch", WM8960_BYPASS2, 7, 1, 0), +}; + +static const struct snd_kcontrol_new wm8960_mono_out[] = { +SOC_DAPM_SINGLE("Left Switch", WM8960_MONOMIX1, 7, 1, 0), +SOC_DAPM_SINGLE("Right Switch", WM8960_MONOMIX2, 7, 1, 0), +}; + +static const struct snd_soc_dapm_widget wm8960_dapm_widgets[] = { +SND_SOC_DAPM_INPUT("LINPUT1"), +SND_SOC_DAPM_INPUT("RINPUT1"), +SND_SOC_DAPM_INPUT("LINPUT2"), +SND_SOC_DAPM_INPUT("RINPUT2"), +SND_SOC_DAPM_INPUT("LINPUT3"), +SND_SOC_DAPM_INPUT("RINPUT3"), + +SND_SOC_DAPM_MICBIAS("MICB", WM8960_POWER1, 1, 0), + +SND_SOC_DAPM_MIXER("Left Boost Mixer", WM8960_POWER1, 5, 0, + wm8960_lin_boost, ARRAY_SIZE(wm8960_lin_boost)), +SND_SOC_DAPM_MIXER("Right Boost Mixer", WM8960_POWER1, 4, 0, + wm8960_rin_boost, ARRAY_SIZE(wm8960_rin_boost)), + +SND_SOC_DAPM_MIXER("Left Input Mixer", WM8960_POWER3, 5, 0, + wm8960_lin, ARRAY_SIZE(wm8960_lin)), +SND_SOC_DAPM_MIXER("Right Input Mixer", WM8960_POWER3, 4, 0, + wm8960_rin, ARRAY_SIZE(wm8960_rin)), + +SND_SOC_DAPM_ADC("Left ADC", "Capture", WM8960_POWER2, 3, 0), +SND_SOC_DAPM_ADC("Right ADC", "Capture", WM8960_POWER2, 2, 0), + +SND_SOC_DAPM_DAC("Left DAC", "Playback", WM8960_POWER2, 8, 0), +SND_SOC_DAPM_DAC("Right DAC", "Playback", WM8960_POWER2, 7, 0), + +SND_SOC_DAPM_MIXER("Left Output Mixer", WM8960_POWER3, 3, 0, + &wm8960_loutput_mixer[0], + ARRAY_SIZE(wm8960_loutput_mixer)), +SND_SOC_DAPM_MIXER("Right Output Mixer", WM8960_POWER3, 2, 0, + &wm8960_routput_mixer[0], + ARRAY_SIZE(wm8960_routput_mixer)), + +SND_SOC_DAPM_MIXER("Mono Output Mixer", WM8960_POWER2, 1, 0, + &wm8960_mono_out[0], + ARRAY_SIZE(wm8960_mono_out)), + +SND_SOC_DAPM_PGA("LOUT1 PGA", WM8960_POWER2, 6, 0, NULL, 0), +SND_SOC_DAPM_PGA("ROUT1 PGA", WM8960_POWER2, 5, 0, NULL, 0), + +SND_SOC_DAPM_PGA("Left Speaker PGA", WM8960_POWER2, 4, 0, NULL, 0), +SND_SOC_DAPM_PGA("Right Speaker PGA", WM8960_POWER2, 3, 0, NULL, 0), + +SND_SOC_DAPM_PGA("Right Speaker Output", WM8960_CLASSD1, 7, 0, NULL, 0), +SND_SOC_DAPM_PGA("Left Speaker Output", WM8960_CLASSD1, 6, 0, NULL, 0), + +SND_SOC_DAPM_OUTPUT("SPK_LP"), +SND_SOC_DAPM_OUTPUT("SPK_LN"), +SND_SOC_DAPM_OUTPUT("HP_L"), +SND_SOC_DAPM_OUTPUT("HP_R"), +SND_SOC_DAPM_OUTPUT("SPK_RP"), +SND_SOC_DAPM_OUTPUT("SPK_RN"), +SND_SOC_DAPM_OUTPUT("OUT3"), +}; + +static const struct snd_soc_dapm_route audio_paths[] = { + { "Left Boost Mixer", "LINPUT1 Switch", "LINPUT1" }, + { "Left Boost Mixer", "LINPUT2 Switch", "LINPUT2" }, + { "Left Boost Mixer", "LINPUT3 Switch", "LINPUT3" }, + + { "Left Input Mixer", "Boost Switch", "Left Boost Mixer", }, + { "Left Input Mixer", NULL, "LINPUT1", }, /* Really Boost Switch */ + { "Left Input Mixer", NULL, "LINPUT2" }, + { "Left Input Mixer", NULL, "LINPUT3" }, + + { "Right Boost Mixer", "RINPUT1 Switch", "RINPUT1" }, + { "Right Boost Mixer", "RINPUT2 Switch", "RINPUT2" }, + { "Right Boost Mixer", "RINPUT3 Switch", "RINPUT3" }, + + { "Right Input Mixer", "Boost Switch", "Right Boost Mixer", }, + { "Right Input Mixer", NULL, "RINPUT1", }, /* Really Boost Switch */ + { "Right Input Mixer", NULL, "RINPUT2" }, + { "Right Input Mixer", NULL, "LINPUT3" }, + + { "Left ADC", NULL, "Left Input Mixer" }, + { "Right ADC", NULL, "Right Input Mixer" }, + + { "Left Output Mixer", "LINPUT3 Switch", "LINPUT3" }, + { "Left Output Mixer", "Boost Bypass Switch", "Left Boost Mixer"} , + { "Left Output Mixer", "PCM Playback Switch", "Left DAC" }, + + { "Right Output Mixer", "RINPUT3 Switch", "RINPUT3" }, + { "Right Output Mixer", "Boost Bypass Switch", "Right Boost Mixer" } , + { "Right Output Mixer", "PCM Playback Switch", "Right DAC" }, + + { "Mono Output Mixer", "Left Switch", "Left Output Mixer" }, + { "Mono Output Mixer", "Right Switch", "Right Output Mixer" }, + + { "LOUT1 PGA", NULL, "Left Output Mixer" }, + { "ROUT1 PGA", NULL, "Right Output Mixer" }, + + { "HP_L", NULL, "LOUT1 PGA" }, + { "HP_R", NULL, "ROUT1 PGA" }, + + { "Left Speaker PGA", NULL, "Left Output Mixer" }, + { "Right Speaker PGA", NULL, "Right Output Mixer" }, + + { "Left Speaker Output", NULL, "Left Speaker PGA" }, + { "Right Speaker Output", NULL, "Right Speaker PGA" }, + + { "SPK_LN", NULL, "Left Speaker Output" }, + { "SPK_LP", NULL, "Left Speaker Output" }, + { "SPK_RN", NULL, "Right Speaker Output" }, + { "SPK_RP", NULL, "Right Speaker Output" }, + + { "OUT3", NULL, "Mono Output Mixer", } +}; + +static int wm8960_add_widgets(struct snd_soc_codec *codec) +{ + snd_soc_dapm_new_controls(codec, wm8960_dapm_widgets, + ARRAY_SIZE(wm8960_dapm_widgets)); + + snd_soc_dapm_add_routes(codec, audio_paths, ARRAY_SIZE(audio_paths)); + + snd_soc_dapm_new_widgets(codec); + return 0; +} + +static int wm8960_set_dai_fmt(struct snd_soc_dai *codec_dai, + unsigned int fmt) +{ + struct snd_soc_codec *codec = codec_dai->codec; + u16 iface = 0; + + /* set master/slave audio interface */ + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: + iface |= 0x0040; + break; + case SND_SOC_DAIFMT_CBS_CFS: + break; + default: + return -EINVAL; + } + + /* interface format */ + switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { + case SND_SOC_DAIFMT_I2S: + iface |= 0x0002; + break; + case SND_SOC_DAIFMT_RIGHT_J: + break; + case SND_SOC_DAIFMT_LEFT_J: + iface |= 0x0001; + break; + case SND_SOC_DAIFMT_DSP_A: + iface |= 0x0003; + break; + case SND_SOC_DAIFMT_DSP_B: + iface |= 0x0013; + break; + default: + return -EINVAL; + } + + /* clock inversion */ + switch (fmt & SND_SOC_DAIFMT_INV_MASK) { + case SND_SOC_DAIFMT_NB_NF: + break; + case SND_SOC_DAIFMT_IB_IF: + iface |= 0x0090; + break; + case SND_SOC_DAIFMT_IB_NF: + iface |= 0x0080; + break; + case SND_SOC_DAIFMT_NB_IF: + iface |= 0x0010; + break; + default: + return -EINVAL; + } + + /* set iface */ + wm8960_write(codec, WM8960_IFACE1, iface); + return 0; +} + +static int wm8960_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct snd_soc_device *socdev = rtd->socdev; + struct snd_soc_codec *codec = socdev->card->codec; + u16 iface = wm8960_read(codec, WM8960_IFACE1) & 0xfff3; + + /* bit size */ + switch (params_format(params)) { + case SNDRV_PCM_FORMAT_S16_LE: + break; + case SNDRV_PCM_FORMAT_S20_3LE: + iface |= 0x0004; + break; + case SNDRV_PCM_FORMAT_S24_LE: + iface |= 0x0008; + break; + } + + /* set iface */ + wm8960_write(codec, WM8960_IFACE1, iface); + return 0; +} + +static int wm8960_mute(struct snd_soc_dai *dai, int mute) +{ + struct snd_soc_codec *codec = dai->codec; + u16 mute_reg = wm8960_read(codec, WM8960_DACCTL1) & 0xfff7; + + if (mute) + wm8960_write(codec, WM8960_DACCTL1, mute_reg | 0x8); + else + wm8960_write(codec, WM8960_DACCTL1, mute_reg); + return 0; +} + +static int wm8960_set_bias_level(struct snd_soc_codec *codec, + enum snd_soc_bias_level level) +{ + struct wm8960_data *pdata = codec->dev->platform_data; + u16 reg; + + switch (level) { + case SND_SOC_BIAS_ON: + break; + + case SND_SOC_BIAS_PREPARE: + /* Set VMID to 2x50k */ + reg = wm8960_read(codec, WM8960_POWER1); + reg &= ~0x180; + reg |= 0x80; + wm8960_write(codec, WM8960_POWER1, reg); + break; + + case SND_SOC_BIAS_STANDBY: + if (codec->bias_level == SND_SOC_BIAS_OFF) { + /* Enable anti-pop features */ + wm8960_write(codec, WM8960_APOP1, + WM8960_POBCTRL | WM8960_SOFT_ST | + WM8960_BUFDCOPEN | WM8960_BUFIOEN); + + /* Discharge HP output */ + reg = WM8960_DISOP; + if (pdata) + reg |= pdata->dres << 4; + wm8960_write(codec, WM8960_APOP2, reg); + + msleep(400); + + wm8960_write(codec, WM8960_APOP2, 0); + + /* Enable & ramp VMID at 2x50k */ + reg = wm8960_read(codec, WM8960_POWER1); + reg |= 0x80; + wm8960_write(codec, WM8960_POWER1, reg); + msleep(100); + + /* Enable VREF */ + wm8960_write(codec, WM8960_POWER1, reg | WM8960_VREF); + + /* Disable anti-pop features */ + wm8960_write(codec, WM8960_APOP1, WM8960_BUFIOEN); + } + + /* Set VMID to 2x250k */ + reg = wm8960_read(codec, WM8960_POWER1); + reg &= ~0x180; + reg |= 0x100; + wm8960_write(codec, WM8960_POWER1, reg); + break; + + case SND_SOC_BIAS_OFF: + /* Enable anti-pop features */ + wm8960_write(codec, WM8960_APOP1, + WM8960_POBCTRL | WM8960_SOFT_ST | + WM8960_BUFDCOPEN | WM8960_BUFIOEN); + + /* Disable VMID and VREF, let them discharge */ + wm8960_write(codec, WM8960_POWER1, 0); + msleep(600); + + wm8960_write(codec, WM8960_APOP1, 0); + break; + } + + codec->bias_level = level; + + return 0; +} + +/* PLL divisors */ +struct _pll_div { + u32 pre_div:1; + u32 n:4; + u32 k:24; +}; + +/* The size in bits of the pll divide multiplied by 10 + * to allow rounding later */ +#define FIXED_PLL_SIZE ((1 << 24) * 10) + +static int pll_factors(unsigned int source, unsigned int target, + struct _pll_div *pll_div) +{ + unsigned long long Kpart; + unsigned int K, Ndiv, Nmod; + + pr_debug("WM8960 PLL: setting %dHz->%dHz\n", source, target); + + /* Scale up target to PLL operating frequency */ + target *= 4; + + Ndiv = target / source; + if (Ndiv < 6) { + source >>= 1; + pll_div->pre_div = 1; + Ndiv = target / source; + } else + pll_div->pre_div = 0; + + if ((Ndiv < 6) || (Ndiv > 12)) { + pr_err("WM8960 PLL: Unsupported N=%d\n", Ndiv); + return -EINVAL; + } + + pll_div->n = Ndiv; + Nmod = target % source; + Kpart = FIXED_PLL_SIZE * (long long)Nmod; + + do_div(Kpart, source); + + K = Kpart & 0xFFFFFFFF; + + /* Check if we need to round */ + if ((K % 10) >= 5) + K += 5; + + /* Move down to proper range now rounding is done */ + K /= 10; + + pll_div->k = K; + + pr_debug("WM8960 PLL: N=%x K=%x pre_div=%d\n", + pll_div->n, pll_div->k, pll_div->pre_div); + + return 0; +} + +static int wm8960_set_dai_pll(struct snd_soc_dai *codec_dai, + int pll_id, unsigned int freq_in, unsigned int freq_out) +{ + struct snd_soc_codec *codec = codec_dai->codec; + u16 reg; + static struct _pll_div pll_div; + int ret; + + if (freq_in && freq_out) { + ret = pll_factors(freq_in, freq_out, &pll_div); + if (ret != 0) + return ret; + } + + /* Disable the PLL: even if we are changing the frequency the + * PLL needs to be disabled while we do so. */ + wm8960_write(codec, WM8960_CLOCK1, + wm8960_read(codec, WM8960_CLOCK1) & ~1); + wm8960_write(codec, WM8960_POWER2, + wm8960_read(codec, WM8960_POWER2) & ~1); + + if (!freq_in || !freq_out) + return 0; + + reg = wm8960_read(codec, WM8960_PLL1) & ~0x3f; + reg |= pll_div.pre_div << 4; + reg |= pll_div.n; + + if (pll_div.k) { + reg |= 0x20; + + wm8960_write(codec, WM8960_PLL2, (pll_div.k >> 18) & 0x3f); + wm8960_write(codec, WM8960_PLL3, (pll_div.k >> 9) & 0x1ff); + wm8960_write(codec, WM8960_PLL4, pll_div.k & 0x1ff); + } + wm8960_write(codec, WM8960_PLL1, reg); + + /* Turn it on */ + wm8960_write(codec, WM8960_POWER2, + wm8960_read(codec, WM8960_POWER2) | 1); + msleep(250); + wm8960_write(codec, WM8960_CLOCK1, + wm8960_read(codec, WM8960_CLOCK1) | 1); + + return 0; +} + +static int wm8960_set_dai_clkdiv(struct snd_soc_dai *codec_dai, + int div_id, int div) +{ + struct snd_soc_codec *codec = codec_dai->codec; + u16 reg; + + switch (div_id) { + case WM8960_SYSCLKSEL: + reg = wm8960_read(codec, WM8960_CLOCK1) & 0x1fe; + wm8960_write(codec, WM8960_CLOCK1, reg | div); + break; + case WM8960_SYSCLKDIV: + reg = wm8960_read(codec, WM8960_CLOCK1) & 0x1f9; + wm8960_write(codec, WM8960_CLOCK1, reg | div); + break; + case WM8960_DACDIV: + reg = wm8960_read(codec, WM8960_CLOCK1) & 0x1c7; + wm8960_write(codec, WM8960_CLOCK1, reg | div); + break; + case WM8960_OPCLKDIV: + reg = wm8960_read(codec, WM8960_PLL1) & 0x03f; + wm8960_write(codec, WM8960_PLL1, reg | div); + break; + case WM8960_DCLKDIV: + reg = wm8960_read(codec, WM8960_CLOCK2) & 0x03f; + wm8960_write(codec, WM8960_CLOCK2, reg | div); + break; + case WM8960_TOCLKSEL: + reg = wm8960_read(codec, WM8960_ADDCTL1) & 0x1fd; + wm8960_write(codec, WM8960_ADDCTL1, reg | div); + break; + default: + return -EINVAL; + } + + return 0; +} + +#define WM8960_RATES SNDRV_PCM_RATE_8000_48000 + +#define WM8960_FORMATS \ + (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S20_3LE | \ + SNDRV_PCM_FMTBIT_S24_LE) + +static struct snd_soc_dai_ops wm8960_dai_ops = { + .hw_params = wm8960_hw_params, + .digital_mute = wm8960_mute, + .set_fmt = wm8960_set_dai_fmt, + .set_clkdiv = wm8960_set_dai_clkdiv, + .set_pll = wm8960_set_dai_pll, +}; + +struct snd_soc_dai wm8960_dai = { + .name = "WM8960", + .playback = { + .stream_name = "Playback", + .channels_min = 1, + .channels_max = 2, + .rates = WM8960_RATES, + .formats = WM8960_FORMATS,}, + .capture = { + .stream_name = "Capture", + .channels_min = 1, + .channels_max = 2, + .rates = WM8960_RATES, + .formats = WM8960_FORMATS,}, + .ops = &wm8960_dai_ops, + .symmetric_rates = 1, +}; +EXPORT_SYMBOL_GPL(wm8960_dai); + +static int wm8960_suspend(struct platform_device *pdev, pm_message_t state) +{ + struct snd_soc_device *socdev = platform_get_drvdata(pdev); + struct snd_soc_codec *codec = socdev->card->codec; + + wm8960_set_bias_level(codec, SND_SOC_BIAS_OFF); + return 0; +} + +static int wm8960_resume(struct platform_device *pdev) +{ + struct snd_soc_device *socdev = platform_get_drvdata(pdev); + struct snd_soc_codec *codec = socdev->card->codec; + int i; + u8 data[2]; + u16 *cache = codec->reg_cache; + + /* Sync reg_cache with the hardware */ + for (i = 0; i < ARRAY_SIZE(wm8960_reg); i++) { + data[0] = (i << 1) | ((cache[i] >> 8) & 0x0001); + data[1] = cache[i] & 0x00ff; + codec->hw_write(codec->control_data, data, 2); + } + + wm8960_set_bias_level(codec, SND_SOC_BIAS_STANDBY); + wm8960_set_bias_level(codec, codec->suspend_bias_level); + return 0; +} + +static struct snd_soc_codec *wm8960_codec; + +static int wm8960_probe(struct platform_device *pdev) +{ + struct snd_soc_device *socdev = platform_get_drvdata(pdev); + struct snd_soc_codec *codec; + int ret = 0; + + if (wm8960_codec == NULL) { + dev_err(&pdev->dev, "Codec device not registered\n"); + return -ENODEV; + } + + socdev->card->codec = wm8960_codec; + codec = wm8960_codec; + + /* register pcms */ + ret = snd_soc_new_pcms(socdev, SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1); + if (ret < 0) { + dev_err(codec->dev, "failed to create pcms: %d\n", ret); + goto pcm_err; + } + + snd_soc_add_controls(codec, wm8960_snd_controls, + ARRAY_SIZE(wm8960_snd_controls)); + wm8960_add_widgets(codec); + ret = snd_soc_init_card(socdev); + if (ret < 0) { + dev_err(codec->dev, "failed to register card: %d\n", ret); + goto card_err; + } + + return ret; + +card_err: + snd_soc_free_pcms(socdev); + snd_soc_dapm_free(socdev); +pcm_err: + return ret; +} + +/* power down chip */ +static int wm8960_remove(struct platform_device *pdev) +{ + struct snd_soc_device *socdev = platform_get_drvdata(pdev); + + snd_soc_free_pcms(socdev); + snd_soc_dapm_free(socdev); + + return 0; +} + +struct snd_soc_codec_device soc_codec_dev_wm8960 = { + .probe = wm8960_probe, + .remove = wm8960_remove, + .suspend = wm8960_suspend, + .resume = wm8960_resume, +}; +EXPORT_SYMBOL_GPL(soc_codec_dev_wm8960); + +static int wm8960_register(struct wm8960_priv *wm8960) +{ + struct wm8960_data *pdata = wm8960->codec.dev->platform_data; + struct snd_soc_codec *codec = &wm8960->codec; + int ret; + u16 reg; + + if (wm8960_codec) { + dev_err(codec->dev, "Another WM8960 is registered\n"); + return -EINVAL; + } + + if (!pdata) { + dev_warn(codec->dev, "No platform data supplied\n"); + } else { + if (pdata->dres > WM8960_DRES_MAX) { + dev_err(codec->dev, "Invalid DRES: %d\n", pdata->dres); + pdata->dres = 0; + } + } + + mutex_init(&codec->mutex); + INIT_LIST_HEAD(&codec->dapm_widgets); + INIT_LIST_HEAD(&codec->dapm_paths); + + codec->private_data = wm8960; + codec->name = "WM8960"; + codec->owner = THIS_MODULE; + codec->read = wm8960_read_reg_cache; + codec->write = wm8960_write; + codec->bias_level = SND_SOC_BIAS_OFF; + codec->set_bias_level = wm8960_set_bias_level; + codec->dai = &wm8960_dai; + codec->num_dai = 1; + codec->reg_cache_size = WM8960_CACHEREGNUM; + codec->reg_cache = &wm8960->reg_cache; + + memcpy(codec->reg_cache, wm8960_reg, sizeof(wm8960_reg)); + + ret = wm8960_reset(codec); + if (ret < 0) { + dev_err(codec->dev, "Failed to issue reset\n"); + return ret; + } + + wm8960_dai.dev = codec->dev; + + wm8960_set_bias_level(codec, SND_SOC_BIAS_STANDBY); + + /* Latch the update bits */ + reg = wm8960_read(codec, WM8960_LINVOL); + wm8960_write(codec, WM8960_LINVOL, reg | 0x100); + reg = wm8960_read(codec, WM8960_RINVOL); + wm8960_write(codec, WM8960_RINVOL, reg | 0x100); + reg = wm8960_read(codec, WM8960_LADC); + wm8960_write(codec, WM8960_LADC, reg | 0x100); + reg = wm8960_read(codec, WM8960_RADC); + wm8960_write(codec, WM8960_RADC, reg | 0x100); + reg = wm8960_read(codec, WM8960_LDAC); + wm8960_write(codec, WM8960_LDAC, reg | 0x100); + reg = wm8960_read(codec, WM8960_RDAC); + wm8960_write(codec, WM8960_RDAC, reg | 0x100); + reg = wm8960_read(codec, WM8960_LOUT1); + wm8960_write(codec, WM8960_LOUT1, reg | 0x100); + reg = wm8960_read(codec, WM8960_ROUT1); + wm8960_write(codec, WM8960_ROUT1, reg | 0x100); + reg = wm8960_read(codec, WM8960_LOUT2); + wm8960_write(codec, WM8960_LOUT2, reg | 0x100); + reg = wm8960_read(codec, WM8960_ROUT2); + wm8960_write(codec, WM8960_ROUT2, reg | 0x100); + + wm8960_codec = codec; + + ret = snd_soc_register_codec(codec); + if (ret != 0) { + dev_err(codec->dev, "Failed to register codec: %d\n", ret); + return ret; + } + + ret = snd_soc_register_dai(&wm8960_dai); + if (ret != 0) { + dev_err(codec->dev, "Failed to register DAI: %d\n", ret); + snd_soc_unregister_codec(codec); + return ret; + } + + return 0; +} + +static void wm8960_unregister(struct wm8960_priv *wm8960) +{ + wm8960_set_bias_level(&wm8960->codec, SND_SOC_BIAS_OFF); + snd_soc_unregister_dai(&wm8960_dai); + snd_soc_unregister_codec(&wm8960->codec); + kfree(wm8960); + wm8960_codec = NULL; +} + +static __devinit int wm8960_i2c_probe(struct i2c_client *i2c, + const struct i2c_device_id *id) +{ + struct wm8960_priv *wm8960; + struct snd_soc_codec *codec; + + wm8960 = kzalloc(sizeof(struct wm8960_priv), GFP_KERNEL); + if (wm8960 == NULL) + return -ENOMEM; + + codec = &wm8960->codec; + codec->hw_write = (hw_write_t)i2c_master_send; + + i2c_set_clientdata(i2c, wm8960); + codec->control_data = i2c; + + codec->dev = &i2c->dev; + + return wm8960_register(wm8960); +} + +static __devexit int wm8960_i2c_remove(struct i2c_client *client) +{ + struct wm8960_priv *wm8960 = i2c_get_clientdata(client); + wm8960_unregister(wm8960); + return 0; +} + +static const struct i2c_device_id wm8960_i2c_id[] = { + { "wm8960", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, wm8960_i2c_id); + +static struct i2c_driver wm8960_i2c_driver = { + .driver = { + .name = "WM8960 I2C Codec", + .owner = THIS_MODULE, + }, + .probe = wm8960_i2c_probe, + .remove = __devexit_p(wm8960_i2c_remove), + .id_table = wm8960_i2c_id, +}; + +static int __init wm8960_modinit(void) +{ + int ret; + + ret = i2c_add_driver(&wm8960_i2c_driver); + if (ret != 0) { + printk(KERN_ERR "Failed to register WM8960 I2C driver: %d\n", + ret); + } + + return ret; +} +module_init(wm8960_modinit); + +static void __exit wm8960_exit(void) +{ + i2c_del_driver(&wm8960_i2c_driver); +} +module_exit(wm8960_exit); + + +MODULE_DESCRIPTION("ASoC WM8960 driver"); +MODULE_AUTHOR("Liam Girdwood"); +MODULE_LICENSE("GPL"); diff --git a/sound/soc/codecs/wm8960.h b/sound/soc/codecs/wm8960.h new file mode 100644 index 0000000..c9af56c --- /dev/null +++ b/sound/soc/codecs/wm8960.h @@ -0,0 +1,127 @@ +/* + * wm8960.h -- WM8960 Soc Audio driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _WM8960_H +#define _WM8960_H + +/* WM8960 register space */ + + +#define WM8960_CACHEREGNUM 56 + +#define WM8960_LINVOL 0x0 +#define WM8960_RINVOL 0x1 +#define WM8960_LOUT1 0x2 +#define WM8960_ROUT1 0x3 +#define WM8960_CLOCK1 0x4 +#define WM8960_DACCTL1 0x5 +#define WM8960_DACCTL2 0x6 +#define WM8960_IFACE1 0x7 +#define WM8960_CLOCK2 0x8 +#define WM8960_IFACE2 0x9 +#define WM8960_LDAC 0xa +#define WM8960_RDAC 0xb + +#define WM8960_RESET 0xf +#define WM8960_3D 0x10 +#define WM8960_ALC1 0x11 +#define WM8960_ALC2 0x12 +#define WM8960_ALC3 0x13 +#define WM8960_NOISEG 0x14 +#define WM8960_LADC 0x15 +#define WM8960_RADC 0x16 +#define WM8960_ADDCTL1 0x17 +#define WM8960_ADDCTL2 0x18 +#define WM8960_POWER1 0x19 +#define WM8960_POWER2 0x1a +#define WM8960_ADDCTL3 0x1b +#define WM8960_APOP1 0x1c +#define WM8960_APOP2 0x1d + +#define WM8960_LINPATH 0x20 +#define WM8960_RINPATH 0x21 +#define WM8960_LOUTMIX 0x22 + +#define WM8960_ROUTMIX 0x25 +#define WM8960_MONOMIX1 0x26 +#define WM8960_MONOMIX2 0x27 +#define WM8960_LOUT2 0x28 +#define WM8960_ROUT2 0x29 +#define WM8960_MONO 0x2a +#define WM8960_INBMIX1 0x2b +#define WM8960_INBMIX2 0x2c +#define WM8960_BYPASS1 0x2d +#define WM8960_BYPASS2 0x2e +#define WM8960_POWER3 0x2f +#define WM8960_ADDCTL4 0x30 +#define WM8960_CLASSD1 0x31 + +#define WM8960_CLASSD3 0x33 +#define WM8960_PLL1 0x34 +#define WM8960_PLL2 0x35 +#define WM8960_PLL3 0x36 +#define WM8960_PLL4 0x37 + + +/* + * WM8960 Clock dividers + */ +#define WM8960_SYSCLKDIV 0 +#define WM8960_DACDIV 1 +#define WM8960_OPCLKDIV 2 +#define WM8960_DCLKDIV 3 +#define WM8960_TOCLKSEL 4 +#define WM8960_SYSCLKSEL 5 + +#define WM8960_SYSCLK_DIV_1 (0 << 1) +#define WM8960_SYSCLK_DIV_2 (2 << 1) + +#define WM8960_SYSCLK_MCLK (0 << 0) +#define WM8960_SYSCLK_PLL (1 << 0) + +#define WM8960_DAC_DIV_1 (0 << 3) +#define WM8960_DAC_DIV_1_5 (1 << 3) +#define WM8960_DAC_DIV_2 (2 << 3) +#define WM8960_DAC_DIV_3 (3 << 3) +#define WM8960_DAC_DIV_4 (4 << 3) +#define WM8960_DAC_DIV_5_5 (5 << 3) +#define WM8960_DAC_DIV_6 (6 << 3) + +#define WM8960_DCLK_DIV_1_5 (0 << 6) +#define WM8960_DCLK_DIV_2 (1 << 6) +#define WM8960_DCLK_DIV_3 (2 << 6) +#define WM8960_DCLK_DIV_4 (3 << 6) +#define WM8960_DCLK_DIV_6 (4 << 6) +#define WM8960_DCLK_DIV_8 (5 << 6) +#define WM8960_DCLK_DIV_12 (6 << 6) +#define WM8960_DCLK_DIV_16 (7 << 6) + +#define WM8960_TOCLK_F19 (0 << 1) +#define WM8960_TOCLK_F21 (1 << 1) + +#define WM8960_OPCLK_DIV_1 (0 << 0) +#define WM8960_OPCLK_DIV_2 (1 << 0) +#define WM8960_OPCLK_DIV_3 (2 << 0) +#define WM8960_OPCLK_DIV_4 (3 << 0) +#define WM8960_OPCLK_DIV_5_5 (4 << 0) +#define WM8960_OPCLK_DIV_6 (5 << 0) + +extern struct snd_soc_dai wm8960_dai; +extern struct snd_soc_codec_device soc_codec_dev_wm8960; + +#define WM8960_DRES_400R 0 +#define WM8960_DRES_200R 1 +#define WM8960_DRES_600R 2 +#define WM8960_DRES_150R 3 +#define WM8960_DRES_MAX 3 + +struct wm8960_data { + int dres; +}; + +#endif -- cgit v0.10.2 From 0f3fd87ce43727d6b8573191ce89e874533b1429 Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Mon, 13 Apr 2009 20:24:50 +0100 Subject: perf_counter: fix alignment in /proc/interrupts Trivial fix on columns alignment in /proc/interrupts file. Signed-off-by: Luis Henriques Cc: Peter Zijlstra LKML-Reference: <20090413192449.GA3920@hades.domain.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index dccaaa8..849cfab 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -67,7 +67,7 @@ static int show_other_interrupts(struct seq_file *p, int prec) for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); seq_printf(p, " Performance counter interrupts\n"); - seq_printf(p, "PND: "); + seq_printf(p, "%*s: ", prec, "PND"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs); seq_printf(p, " Performance pending work\n"); -- cgit v0.10.2 From 5cda395f4a262788d8ed79ac8a26a2b821e5f751 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Mon, 13 Apr 2009 17:39:24 +0200 Subject: x86: fix function definitions after: x86: apic - introduce imcr_ helpers The patch "introduce imcr_ helpers" introduced good comments, but also a few new compile warnings. This fixes the function definitions to have a 'void' return type. Signed-off-by: Alexander van Heukelum Acked-by: Cyrill Gorcunov LKML-Reference: <20090413153924.GA20287@mailshack.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 9b849d4..4b48ff9 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -106,7 +106,7 @@ static int enabled_via_apicbase; * the BIOS or the operating system must switch out of * PIC Mode by changing the IMCR. */ -static inline imcr_pic_to_apic(void) +static inline void imcr_pic_to_apic(void) { /* select IMCR register */ outb(0x70, 0x22); @@ -114,7 +114,7 @@ static inline imcr_pic_to_apic(void) outb(0x01, 0x23); } -static inline imcr_apic_to_pic(void) +static inline void imcr_apic_to_pic(void) { /* select IMCR register */ outb(0x70, 0x22); -- cgit v0.10.2 From e1112b4d96859367a93468027c9635e2ac04eb3f Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 31 Mar 2009 00:48:49 -0500 Subject: tracing/filters: add run-time field descriptions to TRACE_EVENT_FORMAT events This patch adds run-time field descriptions to all the event formats exported using TRACE_EVENT_FORMAT. It also hooks up all the tracers that use them (i.e. the tracers in the 'ftrace subsystem') so they can also have their output filtered by the event-filtering mechanism. When I was testing this, there were a couple of things that fooled me into thinking the filters weren't working, when actually they were - I'll mention them here so others don't make the same mistakes (and file bug reports. ;-) One is that some of the tracers trace multiple events e.g. the sched_switch tracer uses the context_switch and wakeup events, and if you don't set filters on all of the traced events, the unfiltered output from the events without filters on them can make it look like the filtering as a whole isn't working properly, when actually it is doing what it was asked to do - it just wasn't asked to do the right thing. The other is that for the really high-volume tracers e.g. the function tracer, the volume of filtered events can be so high that it pushes the unfiltered events out of the ring buffer before they can be read so e.g. cat'ing the trace file repeatedly shows either no output, or once in awhile some output but that isn't there the next time you read the trace, which isn't what you normally expect when reading the trace file. If you read from the trace_pipe file though, you can catch them before they disappear. Changes from v1: As suggested by Frederic Weisbecker: - get rid of externs in functions - added unlikely() to filter_check_discard() Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c index 7a0aa0e..9419ad1 100644 --- a/kernel/trace/kmemtrace.c +++ b/kernel/trace/kmemtrace.c @@ -42,6 +42,7 @@ static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id, gfp_t gfp_flags, int node) { + struct ftrace_event_call *call = &event_kmem_alloc; struct trace_array *tr = kmemtrace_array; struct kmemtrace_alloc_entry *entry; struct ring_buffer_event *event; @@ -62,6 +63,8 @@ static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id, entry->gfp_flags = gfp_flags; entry->node = node; + filter_check_discard(call, entry, event); + ring_buffer_unlock_commit(tr->buffer, event); trace_wake_up(); @@ -71,6 +74,7 @@ static inline void kmemtrace_free(enum kmemtrace_type_id type_id, unsigned long call_site, const void *ptr) { + struct ftrace_event_call *call = &event_kmem_free; struct trace_array *tr = kmemtrace_array; struct kmemtrace_free_entry *entry; struct ring_buffer_event *event; @@ -86,6 +90,8 @@ static inline void kmemtrace_free(enum kmemtrace_type_id type_id, entry->call_site = call_site; entry->ptr = ptr; + filter_check_discard(call, entry, event); + ring_buffer_unlock_commit(tr->buffer, event); trace_wake_up(); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4865459..962e617 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -898,6 +898,7 @@ trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, unsigned long flags, int pc) { + struct ftrace_event_call *call = &event_function; struct ring_buffer_event *event; struct ftrace_entry *entry; @@ -912,6 +913,9 @@ trace_function(struct trace_array *tr, entry = ring_buffer_event_data(event); entry->ip = ip; entry->parent_ip = parent_ip; + + filter_check_discard(call, entry, event); + ring_buffer_unlock_commit(tr->buffer, event); } @@ -921,6 +925,7 @@ static int __trace_graph_entry(struct trace_array *tr, unsigned long flags, int pc) { + struct ftrace_event_call *call = &event_funcgraph_entry; struct ring_buffer_event *event; struct ftrace_graph_ent_entry *entry; @@ -933,6 +938,7 @@ static int __trace_graph_entry(struct trace_array *tr, return 0; entry = ring_buffer_event_data(event); entry->graph_ent = *trace; + filter_check_discard(call, entry, event); ring_buffer_unlock_commit(global_trace.buffer, event); return 1; @@ -943,6 +949,7 @@ static void __trace_graph_return(struct trace_array *tr, unsigned long flags, int pc) { + struct ftrace_event_call *call = &event_funcgraph_exit; struct ring_buffer_event *event; struct ftrace_graph_ret_entry *entry; @@ -955,6 +962,7 @@ static void __trace_graph_return(struct trace_array *tr, return; entry = ring_buffer_event_data(event); entry->ret = *trace; + filter_check_discard(call, entry, event); ring_buffer_unlock_commit(global_trace.buffer, event); } #endif @@ -973,6 +981,7 @@ static void __ftrace_trace_stack(struct trace_array *tr, int skip, int pc) { #ifdef CONFIG_STACKTRACE + struct ftrace_event_call *call = &event_kernel_stack; struct ring_buffer_event *event; struct stack_entry *entry; struct stack_trace trace; @@ -990,6 +999,7 @@ static void __ftrace_trace_stack(struct trace_array *tr, trace.entries = entry->caller; save_stack_trace(&trace); + filter_check_discard(call, entry, event); ring_buffer_unlock_commit(tr->buffer, event); #endif } @@ -1015,6 +1025,7 @@ static void ftrace_trace_userstack(struct trace_array *tr, unsigned long flags, int pc) { #ifdef CONFIG_STACKTRACE + struct ftrace_event_call *call = &event_user_stack; struct ring_buffer_event *event; struct userstack_entry *entry; struct stack_trace trace; @@ -1036,6 +1047,7 @@ static void ftrace_trace_userstack(struct trace_array *tr, trace.entries = entry->caller; save_stack_trace_user(&trace); + filter_check_discard(call, entry, event); ring_buffer_unlock_commit(tr->buffer, event); #endif } @@ -1052,6 +1064,7 @@ ftrace_trace_special(void *__tr, unsigned long arg1, unsigned long arg2, unsigned long arg3, int pc) { + struct ftrace_event_call *call = &event_special; struct ring_buffer_event *event; struct trace_array *tr = __tr; struct special_entry *entry; @@ -1064,6 +1077,7 @@ ftrace_trace_special(void *__tr, entry->arg1 = arg1; entry->arg2 = arg2; entry->arg3 = arg3; + filter_check_discard(call, entry, event); trace_buffer_unlock_commit(tr, event, 0, pc); } @@ -1080,6 +1094,7 @@ tracing_sched_switch_trace(struct trace_array *tr, struct task_struct *next, unsigned long flags, int pc) { + struct ftrace_event_call *call = &event_context_switch; struct ring_buffer_event *event; struct ctx_switch_entry *entry; @@ -1095,6 +1110,9 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->next_prio = next->prio; entry->next_state = next->state; entry->next_cpu = task_cpu(next); + + filter_check_discard(call, entry, event); + trace_buffer_unlock_commit(tr, event, flags, pc); } @@ -1104,6 +1122,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, struct task_struct *curr, unsigned long flags, int pc) { + struct ftrace_event_call *call = &event_wakeup; struct ring_buffer_event *event; struct ctx_switch_entry *entry; @@ -1120,6 +1139,8 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->next_state = wakee->state; entry->next_cpu = task_cpu(wakee); + filter_check_discard(call, entry, event); + ring_buffer_unlock_commit(tr->buffer, event); ftrace_trace_stack(tr, flags, 6, pc); ftrace_trace_userstack(tr, flags, pc); @@ -1221,6 +1242,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; static u32 trace_buf[TRACE_BUF_SIZE]; + struct ftrace_event_call *call = &event_bprint; struct ring_buffer_event *event; struct trace_array *tr = &global_trace; struct trace_array_cpu *data; @@ -1260,6 +1282,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) entry->fmt = fmt; memcpy(entry->buf, trace_buf, sizeof(u32) * len); + filter_check_discard(call, entry, event); ring_buffer_unlock_commit(tr->buffer, event); out_unlock: @@ -1279,6 +1302,7 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED; static char trace_buf[TRACE_BUF_SIZE]; + struct ftrace_event_call *call = &event_print; struct ring_buffer_event *event; struct trace_array *tr = &global_trace; struct trace_array_cpu *data; @@ -1314,6 +1338,7 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) memcpy(&entry->buf, trace_buf, len); entry->buf[len] = 0; + filter_check_discard(call, entry, event); ring_buffer_unlock_commit(tr->buffer, event); out_unlock: diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 34b94c3..e773728 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -866,6 +866,21 @@ extern void filter_free_subsystem_preds(struct event_subsystem *system); extern int filter_add_subsystem_pred(struct event_subsystem *system, struct filter_pred *pred); +static inline void +filter_check_discard(struct ftrace_event_call *call, void *rec, + struct ring_buffer_event *event) +{ + if (unlikely(call->preds) && !filter_match_preds(call, rec)) + ring_buffer_event_discard(event); +} + +#define __common_field(type, item) \ + ret = trace_define_field(event_call, #type, "common_" #item, \ + offsetof(typeof(field.ent), item), \ + sizeof(field.ent.item)); \ + if (ret) \ + return ret; + void event_trace_printk(unsigned long ip, const char *fmt, ...); extern struct ftrace_event_call __start_ftrace_events[]; extern struct ftrace_event_call __stop_ftrace_events[]; @@ -897,4 +912,9 @@ do { \ __trace_printk(ip, fmt, ##args); \ } while (0) +#undef TRACE_EVENT_FORMAT +#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ + extern struct ftrace_event_call event_##call; +#include "trace_event_types.h" + #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index e6e3291..c95c25d 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -30,6 +30,7 @@ static struct trace_array *branch_tracer; static void probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) { + struct ftrace_event_call *call = &event_branch; struct trace_array *tr = branch_tracer; struct ring_buffer_event *event; struct trace_branch *entry; @@ -73,6 +74,8 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) entry->line = f->line; entry->correct = val == expect; + filter_check_discard(call, entry, event); + ring_buffer_unlock_commit(tr->buffer, event); out: diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h index fd78bee..95b147a 100644 --- a/kernel/trace/trace_event_types.h +++ b/kernel/trace/trace_event_types.h @@ -122,8 +122,10 @@ TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore, TRACE_EVENT_FORMAT(branch, TRACE_BRANCH, trace_branch, ignore, TRACE_STRUCT( TRACE_FIELD(unsigned int, line, line) - TRACE_FIELD_SPECIAL(char func[TRACE_FUNC_SIZE+1], func, func) - TRACE_FIELD_SPECIAL(char file[TRACE_FUNC_SIZE+1], file, file) + TRACE_FIELD_SPECIAL(char func[TRACE_FUNC_SIZE+1], func, + TRACE_FUNC_SIZE+1, func) + TRACE_FIELD_SPECIAL(char file[TRACE_FUNC_SIZE+1], file, + TRACE_FUNC_SIZE+1, file) TRACE_FIELD(char, correct, correct) ), TP_RAW_FMT("%u:%s:%s (%u)") diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 64ec4d2..be9299a 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -680,6 +680,7 @@ static struct dentry * event_subsystem_dir(const char *name, struct dentry *d_events) { struct event_subsystem *system; + struct dentry *entry; /* First see if we did not already create this dir */ list_for_each_entry(system, &event_subsystems, list) { @@ -708,6 +709,12 @@ event_subsystem_dir(const char *name, struct dentry *d_events) system->preds = NULL; + entry = debugfs_create_file("filter", 0644, system->entry, system, + &ftrace_subsystem_filter_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'%s/filter' entry\n", name); + return system->entry; } diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 026be41..470ad94 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -185,7 +185,7 @@ void filter_free_subsystem_preds(struct event_subsystem *system) } events_for_each(call) { - if (!call->name || !call->regfunc) + if (!call->define_fields) continue; if (!strcmp(call->system, system->name)) @@ -324,7 +324,7 @@ int filter_add_subsystem_pred(struct event_subsystem *system, events_for_each(call) { int err; - if (!call->name || !call->regfunc) + if (!call->define_fields) continue; if (strcmp(call->system, system->name)) diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h index 30743f7..1c94b87 100644 --- a/kernel/trace/trace_events_stage_2.h +++ b/kernel/trace/trace_events_stage_2.h @@ -146,13 +146,6 @@ ftrace_format_##call(struct trace_seq *s) \ if (ret) \ return ret; -#define __common_field(type, item) \ - ret = trace_define_field(event_call, #type, "common_" #item, \ - offsetof(typeof(field.ent), item), \ - sizeof(field.ent.item)); \ - if (ret) \ - return ret; - #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, func, print) \ int \ diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 07a22c3..f4e4661 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -30,7 +30,7 @@ #undef TRACE_FIELD_SPECIAL -#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ +#define TRACE_FIELD_SPECIAL(type_item, item, len, cmd) \ ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t" \ "offset:%u;\tsize:%u;\n", \ (unsigned int)offsetof(typeof(field), item), \ @@ -85,18 +85,69 @@ ftrace_format_##call(struct trace_seq *s) \ #define TRACE_ENTRY entry #undef TRACE_FIELD_SPECIAL -#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ +#define TRACE_FIELD_SPECIAL(type_item, item, len, cmd) \ cmd; #undef TRACE_EVENT_FORMAT #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ +int ftrace_define_fields_##call(void); \ +static int ftrace_raw_init_event_##call(void); \ \ -static struct ftrace_event_call __used \ +struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) event_##call = { \ .name = #call, \ .id = proto, \ .system = __stringify(TRACE_SYSTEM), \ + .raw_init = ftrace_raw_init_event_##call, \ .show_format = ftrace_format_##call, \ + .define_fields = ftrace_define_fields_##call, \ +}; \ +static int ftrace_raw_init_event_##call(void) \ +{ \ + INIT_LIST_HEAD(&event_##call.fields); \ + return 0; \ +} \ + +#include "trace_event_types.h" + +#undef TRACE_FIELD +#define TRACE_FIELD(type, item, assign) \ + ret = trace_define_field(event_call, #type, #item, \ + offsetof(typeof(field), item), \ + sizeof(field.item)); \ + if (ret) \ + return ret; + +#undef TRACE_FIELD_SPECIAL +#define TRACE_FIELD_SPECIAL(type, item, len, cmd) \ + ret = trace_define_field(event_call, #type "[" #len "]", #item, \ + offsetof(typeof(field), item), \ + sizeof(field.item)); \ + if (ret) \ + return ret; + +#undef TRACE_FIELD_ZERO_CHAR +#define TRACE_FIELD_ZERO_CHAR(item) + +#undef TRACE_EVENT_FORMAT +#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ +int \ +ftrace_define_fields_##call(void) \ +{ \ + struct ftrace_event_call *event_call = &event_##call; \ + struct args field; \ + int ret; \ + \ + __common_field(unsigned char, type); \ + __common_field(unsigned char, flags); \ + __common_field(unsigned char, preempt_count); \ + __common_field(int, pid); \ + __common_field(int, tgid); \ + \ + tstruct; \ + \ + return ret; \ } + #include "trace_event_types.h" diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c index 7bfdf4c..e6b275b 100644 --- a/kernel/trace/trace_hw_branches.c +++ b/kernel/trace/trace_hw_branches.c @@ -168,6 +168,7 @@ static enum print_line_t bts_trace_print_line(struct trace_iterator *iter) void trace_hw_branch(u64 from, u64 to) { + struct ftrace_event_call *call = &event_hw_branch; struct trace_array *tr = hw_branch_trace; struct ring_buffer_event *event; struct hw_branch_entry *entry; @@ -194,6 +195,7 @@ void trace_hw_branch(u64 from, u64 to) entry->ent.type = TRACE_HW_BRANCHES; entry->from = from; entry->to = to; + filter_check_discard(call, entry, event); trace_buffer_unlock_commit(tr, event, 0, 0); out: diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c index bae791e..8ce7d7d 100644 --- a/kernel/trace/trace_power.c +++ b/kernel/trace/trace_power.c @@ -36,6 +36,7 @@ static void probe_power_start(struct power_trace *it, unsigned int type, static void probe_power_end(struct power_trace *it) { + struct ftrace_event_call *call = &event_power; struct ring_buffer_event *event; struct trace_power *entry; struct trace_array_cpu *data; @@ -54,6 +55,7 @@ static void probe_power_end(struct power_trace *it) goto out; entry = ring_buffer_event_data(event); entry->state_data = *it; + filter_check_discard(call, entry, event); trace_buffer_unlock_commit(tr, event, 0, 0); out: preempt_enable(); @@ -62,6 +64,7 @@ static void probe_power_end(struct power_trace *it) static void probe_power_mark(struct power_trace *it, unsigned int type, unsigned int level) { + struct ftrace_event_call *call = &event_power; struct ring_buffer_event *event; struct trace_power *entry; struct trace_array_cpu *data; @@ -84,6 +87,7 @@ static void probe_power_mark(struct power_trace *it, unsigned int type, goto out; entry = ring_buffer_event_data(event); entry->state_data = *it; + filter_check_discard(call, entry, event); trace_buffer_unlock_commit(tr, event, 0, 0); out: preempt_enable(); -- cgit v0.10.2 From e45f2e2bd298e1ff687448e5fd15a3588b5807ec Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 31 Mar 2009 00:49:16 -0500 Subject: tracing/filters: add TRACE_EVENT_FORMAT_NOFILTER event macro Frederic Weisbecker suggested that the trace_special event shouldn't be filterable; this patch adds a TRACE_EVENT_FORMAT_NOFILTER event macro that allows an event format to be exported without having a filter attached, and removes filtering from the trace_special event. Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 962e617..c209d21 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1064,7 +1064,6 @@ ftrace_trace_special(void *__tr, unsigned long arg1, unsigned long arg2, unsigned long arg3, int pc) { - struct ftrace_event_call *call = &event_special; struct ring_buffer_event *event; struct trace_array *tr = __tr; struct special_entry *entry; @@ -1077,7 +1076,6 @@ ftrace_trace_special(void *__tr, entry->arg1 = arg1; entry->arg2 = arg2; entry->arg3 = arg3; - filter_check_discard(call, entry, event); trace_buffer_unlock_commit(tr, event, 0, pc); } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index e773728..3cf856f 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -915,6 +915,8 @@ do { \ #undef TRACE_EVENT_FORMAT #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ extern struct ftrace_event_call event_##call; +#undef TRACE_EVENT_FORMAT_NOFILTER +#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, tpfmt) #include "trace_event_types.h" #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h index 95b147a..cfcecc4 100644 --- a/kernel/trace/trace_event_types.h +++ b/kernel/trace/trace_event_types.h @@ -57,7 +57,7 @@ TRACE_EVENT_FORMAT(context_switch, TRACE_CTX, ctx_switch_entry, ignore, TP_RAW_FMT("%u:%u:%u ==+ %u:%u:%u [%03u]") ); -TRACE_EVENT_FORMAT(special, TRACE_SPECIAL, special_entry, ignore, +TRACE_EVENT_FORMAT_NOFILTER(special, TRACE_SPECIAL, special_entry, ignore, TRACE_STRUCT( TRACE_FIELD(unsigned long, arg1, arg1) TRACE_FIELD(unsigned long, arg2, arg2) diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index f4e4661..77c494f 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -65,6 +65,22 @@ ftrace_format_##call(struct trace_seq *s) \ return ret; \ } +#undef TRACE_EVENT_FORMAT_NOFILTER +#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \ + tpfmt) \ +static int \ +ftrace_format_##call(struct trace_seq *s) \ +{ \ + struct args field; \ + int ret; \ + \ + tstruct; \ + \ + trace_seq_printf(s, "\nprint fmt: \"%s\"\n", tpfmt); \ + \ + return ret; \ +} + #include "trace_event_types.h" #undef TRACE_ZERO_CHAR @@ -109,6 +125,19 @@ static int ftrace_raw_init_event_##call(void) \ return 0; \ } \ +#undef TRACE_EVENT_FORMAT_NOFILTER +#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \ + tpfmt) \ + \ +struct ftrace_event_call __used \ +__attribute__((__aligned__(4))) \ +__attribute__((section("_ftrace_events"))) event_##call = { \ + .name = #call, \ + .id = proto, \ + .system = __stringify(TRACE_SYSTEM), \ + .show_format = ftrace_format_##call, \ +}; + #include "trace_event_types.h" #undef TRACE_FIELD @@ -150,4 +179,8 @@ ftrace_define_fields_##call(void) \ return ret; \ } +#undef TRACE_EVENT_FORMAT_NOFILTER +#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \ + tpfmt) + #include "trace_event_types.h" -- cgit v0.10.2 From fa1b47dd85453ec7d4bcfe4aa4a2d172ba452fc3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 2 Apr 2009 00:09:41 -0400 Subject: ring-buffer: add ring_buffer_discard_commit The ring_buffer_discard_commit is similar to ring_buffer_event_discard but it can only be done on an event that has yet to be commited. Unpredictable results can happen otherwise. The main difference between ring_buffer_discard_commit and ring_buffer_event_discard is that ring_buffer_discard_commit will try to free the data in the ring buffer if nothing has addded data after the reserved event. If something did, then it acts almost the same as ring_buffer_event_discard followed by a ring_buffer_unlock_commit. Note, either ring_buffer_commit_discard and ring_buffer_unlock_commit can be called on an event, not both. This commit also exports both discard functions to be usable by GPL modules. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index e1b7b21..f0aa486 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -68,9 +68,38 @@ ring_buffer_event_time_delta(struct ring_buffer_event *event) return event->time_delta; } +/* + * ring_buffer_event_discard can discard any event in the ring buffer. + * it is up to the caller to protect against a reader from + * consuming it or a writer from wrapping and replacing it. + * + * No external protection is needed if this is called before + * the event is commited. But in that case it would be better to + * use ring_buffer_discard_commit. + * + * Note, if an event that has not been committed is discarded + * with ring_buffer_event_discard, it must still be committed. + */ void ring_buffer_event_discard(struct ring_buffer_event *event); /* + * ring_buffer_discard_commit will remove an event that has not + * ben committed yet. If this is used, then ring_buffer_unlock_commit + * must not be called on the discarded event. This function + * will try to remove the event from the ring buffer completely + * if another event has not been written after it. + * + * Example use: + * + * if (some_condition) + * ring_buffer_discard_commit(buffer, event); + * else + * ring_buffer_unlock_commit(buffer, event); + */ +void ring_buffer_discard_commit(struct ring_buffer *buffer, + struct ring_buffer_event *event); + +/* * size is in bytes for each per CPU buffer. */ struct ring_buffer * diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 74a1180..f935bd5 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -205,27 +205,6 @@ static void rb_event_set_padding(struct ring_buffer_event *event) event->time_delta = 0; } -/** - * ring_buffer_event_discard - discard an event in the ring buffer - * @buffer: the ring buffer - * @event: the event to discard - * - * Sometimes a event that is in the ring buffer needs to be ignored. - * This function lets the user discard an event in the ring buffer - * and then that event will not be read later. - * - * Note, it is up to the user to be careful with this, and protect - * against races. If the user discards an event that has been consumed - * it is possible that it could corrupt the ring buffer. - */ -void ring_buffer_event_discard(struct ring_buffer_event *event) -{ - event->type = RINGBUF_TYPE_PADDING; - /* time delta must be non zero */ - if (!event->time_delta) - event->time_delta = 1; -} - static unsigned rb_event_data_length(struct ring_buffer_event *event) { @@ -1571,6 +1550,110 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer, EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit); /** + * ring_buffer_event_discard - discard any event in the ring buffer + * @event: the event to discard + * + * Sometimes a event that is in the ring buffer needs to be ignored. + * This function lets the user discard an event in the ring buffer + * and then that event will not be read later. + * + * Note, it is up to the user to be careful with this, and protect + * against races. If the user discards an event that has been consumed + * it is possible that it could corrupt the ring buffer. + */ +void ring_buffer_event_discard(struct ring_buffer_event *event) +{ + event->type = RINGBUF_TYPE_PADDING; + /* time delta must be non zero */ + if (!event->time_delta) + event->time_delta = 1; +} +EXPORT_SYMBOL_GPL(ring_buffer_event_discard); + +/** + * ring_buffer_commit_discard - discard an event that has not been committed + * @buffer: the ring buffer + * @event: non committed event to discard + * + * This is similar to ring_buffer_event_discard but must only be + * performed on an event that has not been committed yet. The difference + * is that this will also try to free the event from the ring buffer + * if another event has not been added behind it. + * + * If another event has been added behind it, it will set the event + * up as discarded, and perform the commit. + * + * If this function is called, do not call ring_buffer_unlock_commit on + * the event. + */ +void ring_buffer_discard_commit(struct ring_buffer *buffer, + struct ring_buffer_event *event) +{ + struct ring_buffer_per_cpu *cpu_buffer; + unsigned long new_index, old_index; + struct buffer_page *bpage; + unsigned long index; + unsigned long addr; + int cpu; + + /* The event is discarded regardless */ + ring_buffer_event_discard(event); + + /* + * This must only be called if the event has not been + * committed yet. Thus we can assume that preemption + * is still disabled. + */ + RB_WARN_ON(buffer, !preempt_count()); + + cpu = smp_processor_id(); + cpu_buffer = buffer->buffers[cpu]; + + new_index = rb_event_index(event); + old_index = new_index + rb_event_length(event); + addr = (unsigned long)event; + addr &= PAGE_MASK; + + bpage = cpu_buffer->tail_page; + + if (bpage == (void *)addr && rb_page_write(bpage) == old_index) { + /* + * This is on the tail page. It is possible that + * a write could come in and move the tail page + * and write to the next page. That is fine + * because we just shorten what is on this page. + */ + index = local_cmpxchg(&bpage->write, old_index, new_index); + if (index == old_index) + goto out; + } + + /* + * The commit is still visible by the reader, so we + * must increment entries. + */ + cpu_buffer->entries++; + out: + /* + * If a write came in and pushed the tail page + * we still need to update the commit pointer + * if we were the commit. + */ + if (rb_is_commit(cpu_buffer, event)) + rb_set_commit_to_write(cpu_buffer); + + /* + * Only the last preempt count needs to restore preemption. + */ + if (preempt_count() == 1) + ftrace_preempt_enable(per_cpu(rb_need_resched, cpu)); + else + preempt_enable_no_resched_notrace(); + +} +EXPORT_SYMBOL_GPL(ring_buffer_discard_commit); + +/** * ring_buffer_write - write data to the buffer without reserving * @buffer: The ring buffer to write to. * @length: The length of the data being written (excluding the event header) -- cgit v0.10.2 From 77d9f465d46fd67cdb82ee5e1ab99dd57a17c486 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 2 Apr 2009 01:16:59 -0400 Subject: tracing/filters: use ring_buffer_discard_commit for discarded events The ring_buffer_discard_commit makes better usage of the ring_buffer when an event has been discarded. It tries to remove it completely if possible. This patch converts the trace event filtering to use ring_buffer_discard_commit instead of the ring_buffer_event_discard. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c209d21..d880ab2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -884,13 +884,18 @@ trace_current_buffer_lock_reserve(unsigned char type, unsigned long len, void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, unsigned long flags, int pc) { - return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1); + __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1); } void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event, unsigned long flags, int pc) { - return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0); + __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0); +} + +void trace_current_buffer_discard_commit(struct ring_buffer_event *event) +{ + ring_buffer_discard_commit(global_trace.buffer, event); } void diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 3cf856f..dfefffd 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -497,6 +497,7 @@ void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, unsigned long flags, int pc); void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event, unsigned long flags, int pc); +void trace_current_buffer_discard_commit(struct ring_buffer_event *event); struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data); diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h index 9d2fa78..d2f34bf 100644 --- a/kernel/trace/trace_events_stage_3.h +++ b/kernel/trace/trace_events_stage_3.h @@ -223,9 +223,9 @@ static void ftrace_raw_event_##call(proto) \ assign; \ \ if (call->preds && !filter_match_preds(call, entry)) \ - ring_buffer_event_discard(event); \ - \ - trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \ + trace_current_buffer_discard_commit(event); \ + else \ + trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \ \ } \ \ -- cgit v0.10.2 From 5f77a88b3f8268b11940b51d2e03d26a663ceb90 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Wed, 8 Apr 2009 03:14:01 -0500 Subject: tracing/infrastructure: separate event tracer from event support Add a new config option, CONFIG_EVENT_TRACING that gets selected when CONFIG_TRACING is selected and adds everything needed by the stuff in trace_export - basically all the event tracing support needed by e.g. bprint, minus the actual events, which are only included if CONFIG_EVENT_TRACER is selected. So CONFIG_EVENT_TRACER can be used to turn on or off the generated events (what I think of as the 'event tracer'), while CONFIG_EVENT_TRACING turns on or off the base event tracing support used by both the event tracer and the other things such as bprint that can't be configured out. Signed-off-by: Tom Zanussi Cc: Steven Rostedt Cc: fweisbec@gmail.com LKML-Reference: <1239178441.10295.34.camel@tropicana> Signed-off-by: Ingo Molnar diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 7fa660f..7e9b1e9 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -61,7 +61,7 @@ #define BRANCH_PROFILE() #endif -#ifdef CONFIG_EVENT_TRACER +#ifdef CONFIG_EVENT_TRACING #define FTRACE_EVENTS() VMLINUX_SYMBOL(__start_ftrace_events) = .; \ *(_ftrace_events) \ VMLINUX_SYMBOL(__stop_ftrace_events) = .; diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 23b96eb..644606e 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -48,6 +48,9 @@ config FTRACE_NMI_ENTER depends on HAVE_FTRACE_NMI_ENTER default y +config EVENT_TRACING + bool + config TRACING bool select DEBUG_FS @@ -56,6 +59,7 @@ config TRACING select TRACEPOINTS select NOP_TRACER select BINARY_PRINTF + select EVENT_TRACING # # Minimum requirements an architecture has to meet for us to diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 2630f51..3ad367e 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -40,11 +40,11 @@ obj-$(CONFIG_POWER_TRACER) += trace_power.o obj-$(CONFIG_KMEMTRACE) += kmemtrace.o obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o -obj-$(CONFIG_EVENT_TRACER) += trace_events.o +obj-$(CONFIG_EVENT_TRACING) += trace_events.o obj-$(CONFIG_EVENT_TRACER) += events.o -obj-$(CONFIG_EVENT_TRACER) += trace_export.o +obj-$(CONFIG_EVENT_TRACING) += trace_export.o obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o -obj-$(CONFIG_EVENT_TRACER) += trace_events_filter.o +obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o libftrace-y := ftrace.o -- cgit v0.10.2 From eb02ce017dd83985041a7e54c6449f92d53b026f Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Wed, 8 Apr 2009 03:15:54 -0500 Subject: tracing/filters: use ring_buffer_discard_commit() in filter_check_discard() This patch changes filter_check_discard() to make use of the new ring_buffer_discard_commit() function and modifies the current users to call the old commit function in the non-discard case. It also introduces a version of filter_check_discard() that uses the global trace buffer (filter_current_check_discard()) for those cases. v2 changes: - fix compile error noticed by Ingo Molnar Signed-off-by: Tom Zanussi Cc: Steven Rostedt Cc: fweisbec@gmail.com LKML-Reference: <1239178554.10295.36.camel@tropicana> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c index 9419ad1..86cdf67 100644 --- a/kernel/trace/kmemtrace.c +++ b/kernel/trace/kmemtrace.c @@ -63,9 +63,8 @@ static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id, entry->gfp_flags = gfp_flags; entry->node = node; - filter_check_discard(call, entry, event); - - ring_buffer_unlock_commit(tr->buffer, event); + if (!filter_check_discard(call, entry, tr->buffer, event)) + ring_buffer_unlock_commit(tr->buffer, event); trace_wake_up(); } @@ -90,9 +89,8 @@ static inline void kmemtrace_free(enum kmemtrace_type_id type_id, entry->call_site = call_site; entry->ptr = ptr; - filter_check_discard(call, entry, event); - - ring_buffer_unlock_commit(tr->buffer, event); + if (!filter_check_discard(call, entry, tr->buffer, event)) + ring_buffer_unlock_commit(tr->buffer, event); trace_wake_up(); } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d880ab2..c0047fc 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -171,6 +171,12 @@ static struct trace_array global_trace; static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu); +int filter_current_check_discard(struct ftrace_event_call *call, void *rec, + struct ring_buffer_event *event) +{ + return filter_check_discard(call, rec, global_trace.buffer, event); +} + cycle_t ftrace_now(int cpu) { u64 ts; @@ -919,9 +925,8 @@ trace_function(struct trace_array *tr, entry->ip = ip; entry->parent_ip = parent_ip; - filter_check_discard(call, entry, event); - - ring_buffer_unlock_commit(tr->buffer, event); + if (!filter_check_discard(call, entry, tr->buffer, event)) + ring_buffer_unlock_commit(tr->buffer, event); } #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -943,8 +948,8 @@ static int __trace_graph_entry(struct trace_array *tr, return 0; entry = ring_buffer_event_data(event); entry->graph_ent = *trace; - filter_check_discard(call, entry, event); - ring_buffer_unlock_commit(global_trace.buffer, event); + if (!filter_current_check_discard(call, entry, event)) + ring_buffer_unlock_commit(global_trace.buffer, event); return 1; } @@ -967,8 +972,8 @@ static void __trace_graph_return(struct trace_array *tr, return; entry = ring_buffer_event_data(event); entry->ret = *trace; - filter_check_discard(call, entry, event); - ring_buffer_unlock_commit(global_trace.buffer, event); + if (!filter_current_check_discard(call, entry, event)) + ring_buffer_unlock_commit(global_trace.buffer, event); } #endif @@ -1004,8 +1009,8 @@ static void __ftrace_trace_stack(struct trace_array *tr, trace.entries = entry->caller; save_stack_trace(&trace); - filter_check_discard(call, entry, event); - ring_buffer_unlock_commit(tr->buffer, event); + if (!filter_check_discard(call, entry, tr->buffer, event)) + ring_buffer_unlock_commit(tr->buffer, event); #endif } @@ -1052,8 +1057,8 @@ static void ftrace_trace_userstack(struct trace_array *tr, trace.entries = entry->caller; save_stack_trace_user(&trace); - filter_check_discard(call, entry, event); - ring_buffer_unlock_commit(tr->buffer, event); + if (!filter_check_discard(call, entry, tr->buffer, event)) + ring_buffer_unlock_commit(tr->buffer, event); #endif } @@ -1114,9 +1119,8 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->next_state = next->state; entry->next_cpu = task_cpu(next); - filter_check_discard(call, entry, event); - - trace_buffer_unlock_commit(tr, event, flags, pc); + if (!filter_check_discard(call, entry, tr->buffer, event)) + trace_buffer_unlock_commit(tr, event, flags, pc); } void @@ -1142,9 +1146,8 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->next_state = wakee->state; entry->next_cpu = task_cpu(wakee); - filter_check_discard(call, entry, event); - - ring_buffer_unlock_commit(tr->buffer, event); + if (!filter_check_discard(call, entry, tr->buffer, event)) + ring_buffer_unlock_commit(tr->buffer, event); ftrace_trace_stack(tr, flags, 6, pc); ftrace_trace_userstack(tr, flags, pc); } @@ -1285,8 +1288,8 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) entry->fmt = fmt; memcpy(entry->buf, trace_buf, sizeof(u32) * len); - filter_check_discard(call, entry, event); - ring_buffer_unlock_commit(tr->buffer, event); + if (!filter_check_discard(call, entry, tr->buffer, event)) + ring_buffer_unlock_commit(tr->buffer, event); out_unlock: __raw_spin_unlock(&trace_buf_lock); @@ -1341,8 +1344,8 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) memcpy(&entry->buf, trace_buf, len); entry->buf[len] = 0; - filter_check_discard(call, entry, event); - ring_buffer_unlock_commit(tr->buffer, event); + if (!filter_check_discard(call, entry, tr->buffer, event)) + ring_buffer_unlock_commit(tr->buffer, event); out_unlock: __raw_spin_unlock(&trace_buf_lock); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index dfefffd..9729d14 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -866,13 +866,21 @@ extern int filter_match_preds(struct ftrace_event_call *call, void *rec); extern void filter_free_subsystem_preds(struct event_subsystem *system); extern int filter_add_subsystem_pred(struct event_subsystem *system, struct filter_pred *pred); +extern int filter_current_check_discard(struct ftrace_event_call *call, + void *rec, + struct ring_buffer_event *event); -static inline void +static inline int filter_check_discard(struct ftrace_event_call *call, void *rec, + struct ring_buffer *buffer, struct ring_buffer_event *event) { - if (unlikely(call->preds) && !filter_match_preds(call, rec)) - ring_buffer_event_discard(event); + if (unlikely(call->preds) && !filter_match_preds(call, rec)) { + ring_buffer_discard_commit(buffer, event); + return 1; + } + + return 0; } #define __common_field(type, item) \ diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index c95c25d..8e64e60 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -74,9 +74,8 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) entry->line = f->line; entry->correct = val == expect; - filter_check_discard(call, entry, event); - - ring_buffer_unlock_commit(tr->buffer, event); + if (!filter_check_discard(call, entry, tr->buffer, event)) + ring_buffer_unlock_commit(tr->buffer, event); out: atomic_dec(&tr->data[cpu]->disabled); diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h index d2f34bf..b2b2982 100644 --- a/kernel/trace/trace_events_stage_3.h +++ b/kernel/trace/trace_events_stage_3.h @@ -222,11 +222,8 @@ static void ftrace_raw_event_##call(proto) \ \ assign; \ \ - if (call->preds && !filter_match_preds(call, entry)) \ - trace_current_buffer_discard_commit(event); \ - else \ + if (!filter_current_check_discard(call, entry, event)) \ trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \ - \ } \ \ static int ftrace_raw_reg_event_##call(void) \ diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c index e6b275b..8683d50 100644 --- a/kernel/trace/trace_hw_branches.c +++ b/kernel/trace/trace_hw_branches.c @@ -195,8 +195,8 @@ void trace_hw_branch(u64 from, u64 to) entry->ent.type = TRACE_HW_BRANCHES; entry->from = from; entry->to = to; - filter_check_discard(call, entry, event); - trace_buffer_unlock_commit(tr, event, 0, 0); + if (!filter_check_discard(call, entry, tr->buffer, event)) + trace_buffer_unlock_commit(tr, event, 0, 0); out: atomic_dec(&tr->data[cpu]->disabled); diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c index 8ce7d7d..810a5b7 100644 --- a/kernel/trace/trace_power.c +++ b/kernel/trace/trace_power.c @@ -55,8 +55,8 @@ static void probe_power_end(struct power_trace *it) goto out; entry = ring_buffer_event_data(event); entry->state_data = *it; - filter_check_discard(call, entry, event); - trace_buffer_unlock_commit(tr, event, 0, 0); + if (!filter_check_discard(call, entry, tr->buffer, event)) + trace_buffer_unlock_commit(tr, event, 0, 0); out: preempt_enable(); } @@ -87,8 +87,8 @@ static void probe_power_mark(struct power_trace *it, unsigned int type, goto out; entry = ring_buffer_event_data(event); entry->state_data = *it; - filter_check_discard(call, entry, event); - trace_buffer_unlock_commit(tr, event, 0, 0); + if (!filter_check_discard(call, entry, tr->buffer, event)) + trace_buffer_unlock_commit(tr, event, 0, 0); out: preempt_enable(); } -- cgit v0.10.2 From 0a19e53c1514ad8e9c3cbab40c6c3f52c86f403d Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Mon, 13 Apr 2009 03:17:50 -0500 Subject: tracing/filters: allow on-the-fly filter switching This patch allows event filters to be safely removed or switched on-the-fly while avoiding the use of rcu or the suspension of tracing of previous versions. It does it by adding a new filter_pred_none() predicate function which does nothing and by never deallocating either the predicates or any of the filter_pred members used in matching; the predicate lists are allocated and initialized during ftrace_event_calls initialization. Whenever a filter is removed or replaced, the filter_pred_* functions currently in use by the affected ftrace_event_call are immediately switched over to to the filter_pred_none() function, while the rest of the filter_pred members are left intact, allowing any currently executing filter_pred_* functions to finish up, using the values they're currently using. In the case of filter replacement, the new predicate values are copied into the old predicates after the above step, and the filter_pred_none() functions are replaced by the filter_pred_* functions for the new filter. In this case, it is possible though very unlikely that a previous filter_pred_* is still running even after the filter_pred_none() switch and the switch to the new filter_pred_*. In that case, however, because nothing has been deallocated in the filter_pred, the worst that can happen is that the old filter_pred_* function sees the new values and as a result produces either a false positive or a false negative, depending on the values it finds. So one downside to this method is that rarely, it can produce a bad match during the filter switch, but it should be possible to live with that, IMHO. The other downside is that at least in this patch the predicate lists are always pre-allocated, taking up memory from the start. They could probably be allocated on first-use, and de-allocated when tracing is completely stopped - if this patch makes sense, I could create another one to do that later on. Oh, and it also places a restriction on the size of __arrays in events, currently set to 128, since they can't be larger than the now embedded str_val arrays in the filter_pred struct. Signed-off-by: Tom Zanussi Acked-by: Frederic Weisbecker Cc: Steven Rostedt Cc: paulmck@linux.vnet.ibm.com LKML-Reference: <1239610670.6660.49.camel@tropicana> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 9729d14..b05b6ac 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -813,6 +813,7 @@ struct ftrace_event_call { int (*show_format)(struct trace_seq *s); int (*define_fields)(void); struct list_head fields; + int n_preds; struct filter_pred **preds; #ifdef CONFIG_EVENT_PROFILE @@ -826,6 +827,7 @@ struct event_subsystem { struct list_head list; const char *name; struct dentry *entry; + int n_preds; struct filter_pred **preds; }; @@ -834,7 +836,8 @@ struct event_subsystem { (unsigned long)event < (unsigned long)__stop_ftrace_events; \ event++) -#define MAX_FILTER_PRED 8 +#define MAX_FILTER_PRED 8 +#define MAX_FILTER_STR_VAL 128 struct filter_pred; @@ -843,7 +846,7 @@ typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event); struct filter_pred { filter_pred_fn_t fn; u64 val; - char *str_val; + char str_val[MAX_FILTER_STR_VAL]; int str_len; char *field_name; int offset; @@ -855,13 +858,14 @@ struct filter_pred { int trace_define_field(struct ftrace_event_call *call, char *type, char *name, int offset, int size); +extern int init_preds(struct ftrace_event_call *call); extern void filter_free_pred(struct filter_pred *pred); -extern void filter_print_preds(struct filter_pred **preds, +extern void filter_print_preds(struct filter_pred **preds, int n_preds, struct trace_seq *s); extern int filter_parse(char **pbuf, struct filter_pred *pred); extern int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred); -extern void filter_free_preds(struct ftrace_event_call *call); +extern void filter_disable_preds(struct ftrace_event_call *call); extern int filter_match_preds(struct ftrace_event_call *call, void *rec); extern void filter_free_subsystem_preds(struct event_subsystem *system); extern int filter_add_subsystem_pred(struct event_subsystem *system, @@ -875,7 +879,7 @@ filter_check_discard(struct ftrace_event_call *call, void *rec, struct ring_buffer *buffer, struct ring_buffer_event *event) { - if (unlikely(call->preds) && !filter_match_preds(call, rec)) { + if (unlikely(call->n_preds) && !filter_match_preds(call, rec)) { ring_buffer_discard_commit(buffer, event); return 1; } diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 789e14e..ead68ac 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -481,7 +481,7 @@ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt, trace_seq_init(s); - filter_print_preds(call->preds, s); + filter_print_preds(call->preds, call->n_preds, s); r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); kfree(s); @@ -516,7 +516,7 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, } if (pred->clear) { - filter_free_preds(call); + filter_disable_preds(call); filter_free_pred(pred); return cnt; } @@ -527,6 +527,8 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, return err; } + filter_free_pred(pred); + *ppos += cnt; return cnt; @@ -549,7 +551,7 @@ subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt, trace_seq_init(s); - filter_print_preds(system->preds, s); + filter_print_preds(system->preds, system->n_preds, s); r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); kfree(s); @@ -712,6 +714,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events) list_add(&system->list, &event_subsystems); system->preds = NULL; + system->n_preds = 0; entry = debugfs_create_file("filter", 0644, system->entry, system, &ftrace_subsystem_filter_fops); diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 9f8ecca..de42dad 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -82,25 +82,27 @@ static int filter_pred_string(struct filter_pred *pred, void *event) return match; } +static int filter_pred_none(struct filter_pred *pred, void *event) +{ + return 0; +} + /* return 1 if event matches, 0 otherwise (discard) */ int filter_match_preds(struct ftrace_event_call *call, void *rec) { int i, matched, and_failed = 0; struct filter_pred *pred; - for (i = 0; i < MAX_FILTER_PRED; i++) { - if (call->preds[i]) { - pred = call->preds[i]; - if (and_failed && !pred->or) - continue; - matched = pred->fn(pred, rec); - if (!matched && !pred->or) { - and_failed = 1; - continue; - } else if (matched && pred->or) - return 1; - } else - break; + for (i = 0; i < call->n_preds; i++) { + pred = call->preds[i]; + if (and_failed && !pred->or) + continue; + matched = pred->fn(pred, rec); + if (!matched && !pred->or) { + and_failed = 1; + continue; + } else if (matched && pred->or) + return 1; } if (and_failed) @@ -109,31 +111,29 @@ int filter_match_preds(struct ftrace_event_call *call, void *rec) return 1; } -void filter_print_preds(struct filter_pred **preds, struct trace_seq *s) +void filter_print_preds(struct filter_pred **preds, int n_preds, + struct trace_seq *s) { char *field_name; struct filter_pred *pred; int i; - if (!preds) { + if (!n_preds) { trace_seq_printf(s, "none\n"); return; } - for (i = 0; i < MAX_FILTER_PRED; i++) { - if (preds[i]) { - pred = preds[i]; - field_name = pred->field_name; - if (i) - trace_seq_printf(s, pred->or ? "|| " : "&& "); - trace_seq_printf(s, "%s ", field_name); - trace_seq_printf(s, pred->not ? "!= " : "== "); - if (pred->str_val) - trace_seq_printf(s, "%s\n", pred->str_val); - else - trace_seq_printf(s, "%llu\n", pred->val); - } else - break; + for (i = 0; i < n_preds; i++) { + pred = preds[i]; + field_name = pred->field_name; + if (i) + trace_seq_printf(s, pred->or ? "|| " : "&& "); + trace_seq_printf(s, "%s ", field_name); + trace_seq_printf(s, pred->not ? "!= " : "== "); + if (pred->str_len) + trace_seq_printf(s, "%s\n", pred->str_val); + else + trace_seq_printf(s, "%llu\n", pred->val); } } @@ -156,20 +156,69 @@ void filter_free_pred(struct filter_pred *pred) return; kfree(pred->field_name); - kfree(pred->str_val); kfree(pred); } -void filter_free_preds(struct ftrace_event_call *call) +static void filter_clear_pred(struct filter_pred *pred) +{ + kfree(pred->field_name); + pred->field_name = NULL; + pred->str_len = 0; +} + +static int filter_set_pred(struct filter_pred *dest, + struct filter_pred *src, + filter_pred_fn_t fn) +{ + *dest = *src; + dest->field_name = kstrdup(src->field_name, GFP_KERNEL); + if (!dest->field_name) + return -ENOMEM; + dest->fn = fn; + + return 0; +} + +void filter_disable_preds(struct ftrace_event_call *call) { int i; - if (call->preds) { - for (i = 0; i < MAX_FILTER_PRED; i++) + call->n_preds = 0; + + for (i = 0; i < MAX_FILTER_PRED; i++) + call->preds[i]->fn = filter_pred_none; +} + +int init_preds(struct ftrace_event_call *call) +{ + struct filter_pred *pred; + int i; + + call->n_preds = 0; + + call->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL); + if (!call->preds) + return -ENOMEM; + + for (i = 0; i < MAX_FILTER_PRED; i++) { + pred = kzalloc(sizeof(*pred), GFP_KERNEL); + if (!pred) + goto oom; + pred->fn = filter_pred_none; + call->preds[i] = pred; + } + + return 0; + +oom: + for (i = 0; i < MAX_FILTER_PRED; i++) { + if (call->preds[i]) filter_free_pred(call->preds[i]); - kfree(call->preds); - call->preds = NULL; } + kfree(call->preds); + call->preds = NULL; + + return -ENOMEM; } void filter_free_subsystem_preds(struct event_subsystem *system) @@ -177,11 +226,12 @@ void filter_free_subsystem_preds(struct event_subsystem *system) struct ftrace_event_call *call = __start_ftrace_events; int i; - if (system->preds) { - for (i = 0; i < MAX_FILTER_PRED; i++) + if (system->n_preds) { + for (i = 0; i < system->n_preds; i++) filter_free_pred(system->preds[i]); kfree(system->preds); system->preds = NULL; + system->n_preds = 0; } events_for_each(call) { @@ -189,33 +239,31 @@ void filter_free_subsystem_preds(struct event_subsystem *system) continue; if (!strcmp(call->system, system->name)) - filter_free_preds(call); + filter_disable_preds(call); } } static int __filter_add_pred(struct ftrace_event_call *call, - struct filter_pred *pred) + struct filter_pred *pred, + filter_pred_fn_t fn) { - int i; + int idx, err; - if (call->preds && !pred->compound) - filter_free_preds(call); + if (call->n_preds && !pred->compound) + filter_disable_preds(call); - if (!call->preds) { - call->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), - GFP_KERNEL); - if (!call->preds) - return -ENOMEM; - } + if (call->n_preds == MAX_FILTER_PRED) + return -ENOSPC; - for (i = 0; i < MAX_FILTER_PRED; i++) { - if (!call->preds[i]) { - call->preds[i] = pred; - return 0; - } - } + idx = call->n_preds; + filter_clear_pred(call->preds[idx]); + err = filter_set_pred(call->preds[idx], pred, fn); + if (err) + return err; + + call->n_preds++; - return -ENOSPC; + return 0; } static int is_string_field(const char *type) @@ -229,98 +277,66 @@ static int is_string_field(const char *type) int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred) { struct ftrace_event_field *field; + filter_pred_fn_t fn; field = find_event_field(call, pred->field_name); if (!field) return -EINVAL; + pred->fn = filter_pred_none; pred->offset = field->offset; if (is_string_field(field->type)) { - if (!pred->str_val) + if (!pred->str_len) return -EINVAL; - pred->fn = filter_pred_string; + fn = filter_pred_string; pred->str_len = field->size; - return __filter_add_pred(call, pred); + return __filter_add_pred(call, pred, fn); } else { - if (pred->str_val) + if (pred->str_len) return -EINVAL; } switch (field->size) { case 8: - pred->fn = filter_pred_64; + fn = filter_pred_64; break; case 4: - pred->fn = filter_pred_32; + fn = filter_pred_32; break; case 2: - pred->fn = filter_pred_16; + fn = filter_pred_16; break; case 1: - pred->fn = filter_pred_8; + fn = filter_pred_8; break; default: return -EINVAL; } - return __filter_add_pred(call, pred); -} - -static struct filter_pred *copy_pred(struct filter_pred *pred) -{ - struct filter_pred *new_pred = kmalloc(sizeof(*pred), GFP_KERNEL); - if (!new_pred) - return NULL; - - memcpy(new_pred, pred, sizeof(*pred)); - - if (pred->field_name) { - new_pred->field_name = kstrdup(pred->field_name, GFP_KERNEL); - if (!new_pred->field_name) { - kfree(new_pred); - return NULL; - } - } - - if (pred->str_val) { - new_pred->str_val = kstrdup(pred->str_val, GFP_KERNEL); - if (!new_pred->str_val) { - filter_free_pred(new_pred); - return NULL; - } - } - - return new_pred; + return __filter_add_pred(call, pred, fn); } int filter_add_subsystem_pred(struct event_subsystem *system, struct filter_pred *pred) { struct ftrace_event_call *call = __start_ftrace_events; - struct filter_pred *event_pred; - int i; - if (system->preds && !pred->compound) + if (system->n_preds && !pred->compound) filter_free_subsystem_preds(system); - if (!system->preds) { + if (!system->n_preds) { system->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL); if (!system->preds) return -ENOMEM; } - for (i = 0; i < MAX_FILTER_PRED; i++) { - if (!system->preds[i]) { - system->preds[i] = pred; - break; - } - } - - if (i == MAX_FILTER_PRED) + if (system->n_preds == MAX_FILTER_PRED) return -ENOSPC; + system->preds[system->n_preds] = pred; + events_for_each(call) { int err; @@ -333,22 +349,16 @@ int filter_add_subsystem_pred(struct event_subsystem *system, if (!find_event_field(call, pred->field_name)) continue; - event_pred = copy_pred(pred); - if (!event_pred) - goto oom; - - err = filter_add_pred(call, event_pred); - if (err) - filter_free_pred(event_pred); - if (err == -ENOMEM) - goto oom; + err = filter_add_pred(call, pred); + if (err == -ENOMEM) { + system->preds[system->n_preds] = NULL; + return err; + } } - return 0; + system->n_preds++; -oom: - system->preds[i] = NULL; - return -ENOMEM; + return 0; } int filter_parse(char **pbuf, struct filter_pred *pred) @@ -410,7 +420,8 @@ int filter_parse(char **pbuf, struct filter_pred *pred) } } - if (!val_str) { + if (!val_str || !strlen(val_str) + || strlen(val_str) >= MAX_FILTER_STR_VAL) { pred->field_name = NULL; return -EINVAL; } @@ -419,11 +430,12 @@ int filter_parse(char **pbuf, struct filter_pred *pred) if (!pred->field_name) return -ENOMEM; + pred->str_len = 0; pred->val = simple_strtoull(val_str, &tmp, 0); if (tmp == val_str) { - pred->str_val = kstrdup(val_str, GFP_KERNEL); - if (!pred->str_val) - return -ENOMEM; + strncpy(pred->str_val, val_str, MAX_FILTER_STR_VAL); + pred->str_len = strlen(val_str); + pred->str_val[pred->str_len] = '\0'; } else if (*tmp != '\0') return -EINVAL; diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h index 02fb710..59cfd7d 100644 --- a/kernel/trace/trace_events_stage_2.h +++ b/kernel/trace/trace_events_stage_2.h @@ -140,6 +140,7 @@ ftrace_format_##call(struct trace_seq *s) \ #undef __array #define __array(type, item, len) \ + BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ ret = trace_define_field(event_call, #type "[" #len "]", #item, \ offsetof(typeof(field), item), \ sizeof(field.item)); \ diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h index b2b2982..5bb1b7f 100644 --- a/kernel/trace/trace_events_stage_3.h +++ b/kernel/trace/trace_events_stage_3.h @@ -255,6 +255,7 @@ static int ftrace_raw_init_event_##call(void) \ return -ENODEV; \ event_##call.id = id; \ INIT_LIST_HEAD(&event_##call.fields); \ + init_preds(&event_##call); \ return 0; \ } \ \ diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 77c494f..48fc02f 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -122,6 +122,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ static int ftrace_raw_init_event_##call(void) \ { \ INIT_LIST_HEAD(&event_##call.fields); \ + init_preds(&event_##call); \ return 0; \ } \ -- cgit v0.10.2 From 6e837fb152410e571a81aaadbd9884f0bc46a55e Mon Sep 17 00:00:00 2001 From: Etienne Basset Date: Wed, 8 Apr 2009 20:39:40 +0200 Subject: smack: implement logging V3 This patch creates auditing functions usable by LSM to audit security events. It provides standard dumping of FS, NET, task etc ... events (code borrowed from SELinux) and provides 2 callbacks to define LSM specific auditing, which should be flexible enough to convert SELinux too. Signed-off-by: Etienne Basset Acked-by: Casey Schaufler cked-by: Eric Paris Signed-off-by: James Morris diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h new file mode 100644 index 0000000..e461b2c --- /dev/null +++ b/include/linux/lsm_audit.h @@ -0,0 +1,111 @@ +/* + * Common LSM logging functions + * Heavily borrowed from selinux/avc.h + * + * Author : Etienne BASSET + * + * All credits to : Stephen Smalley, + * All BUGS to : Etienne BASSET + */ +#ifndef _LSM_COMMON_LOGGING_ +#define _LSM_COMMON_LOGGING_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* Auxiliary data to use in generating the audit record. */ +struct common_audit_data { + char type; +#define LSM_AUDIT_DATA_FS 1 +#define LSM_AUDIT_DATA_NET 2 +#define LSM_AUDIT_DATA_CAP 3 +#define LSM_AUDIT_DATA_IPC 4 +#define LSM_AUDIT_DATA_TASK 5 +#define LSM_AUDIT_DATA_KEY 6 + struct task_struct *tsk; + union { + struct { + struct path path; + struct inode *inode; + } fs; + struct { + int netif; + struct sock *sk; + u16 family; + __be16 dport; + __be16 sport; + union { + struct { + __be32 daddr; + __be32 saddr; + } v4; + struct { + struct in6_addr daddr; + struct in6_addr saddr; + } v6; + } fam; + } net; + int cap; + int ipc_id; + struct task_struct *tsk; +#ifdef CONFIG_KEYS + struct { + key_serial_t key; + char *key_desc; + } key_struct; +#endif + } u; + const char *function; + /* this union contains LSM specific data */ + union { + /* SMACK data */ + struct smack_audit_data { + char *subject; + char *object; + char *request; + int result; + } smack_audit_data; + /* SELinux data */ + struct { + u32 ssid; + u32 tsid; + u16 tclass; + u32 requested; + u32 audited; + struct av_decision *avd; + int result; + } selinux_audit_data; + } lsm_priv; + /* these callback will be implemented by a specific LSM */ + void (*lsm_pre_audit)(struct audit_buffer *, void *); + void (*lsm_post_audit)(struct audit_buffer *, void *); +}; + +#define v4info fam.v4 +#define v6info fam.v6 + +int ipv4_skb_to_auditdata(struct sk_buff *skb, + struct common_audit_data *ad, u8 *proto); + +int ipv6_skb_to_auditdata(struct sk_buff *skb, + struct common_audit_data *ad, u8 *proto); + +/* Initialize an LSM audit data structure. */ +#define COMMON_AUDIT_DATA_INIT(_d, _t) \ + { memset((_d), 0, sizeof(struct common_audit_data)); \ + (_d)->type = LSM_AUDIT_DATA_##_t; (_d)->function = __func__; } + +void common_lsm_audit(struct common_audit_data *a); + +#endif diff --git a/security/lsm_audit.c b/security/lsm_audit.c new file mode 100644 index 0000000..94b8684 --- /dev/null +++ b/security/lsm_audit.c @@ -0,0 +1,386 @@ +/* + * common LSM auditing functions + * + * Based on code written for SELinux by : + * Stephen Smalley, + * James Morris + * Author : Etienne Basset, + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, + * as published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * ipv4_skb_to_auditdata : fill auditdata from skb + * @skb : the skb + * @ad : the audit data to fill + * @proto : the layer 4 protocol + * + * return 0 on success + */ +int ipv4_skb_to_auditdata(struct sk_buff *skb, + struct common_audit_data *ad, u8 *proto) +{ + int ret = 0; + struct iphdr *ih; + + ih = ip_hdr(skb); + if (ih == NULL) + return -EINVAL; + + ad->u.net.v4info.saddr = ih->saddr; + ad->u.net.v4info.daddr = ih->daddr; + + if (proto) + *proto = ih->protocol; + /* non initial fragment */ + if (ntohs(ih->frag_off) & IP_OFFSET) + return 0; + + switch (ih->protocol) { + case IPPROTO_TCP: { + struct tcphdr *th = tcp_hdr(skb); + if (th == NULL) + break; + + ad->u.net.sport = th->source; + ad->u.net.dport = th->dest; + break; + } + case IPPROTO_UDP: { + struct udphdr *uh = udp_hdr(skb); + if (uh == NULL) + break; + + ad->u.net.sport = uh->source; + ad->u.net.dport = uh->dest; + break; + } + case IPPROTO_DCCP: { + struct dccp_hdr *dh = dccp_hdr(skb); + if (dh == NULL) + break; + + ad->u.net.sport = dh->dccph_sport; + ad->u.net.dport = dh->dccph_dport; + break; + } + case IPPROTO_SCTP: { + struct sctphdr *sh = sctp_hdr(skb); + if (sh == NULL) + break; + ad->u.net.sport = sh->source; + ad->u.net.dport = sh->dest; + break; + } + default: + ret = -EINVAL; + } + return ret; +} +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +/** + * ipv6_skb_to_auditdata : fill auditdata from skb + * @skb : the skb + * @ad : the audit data to fill + * @proto : the layer 4 protocol + * + * return 0 on success + */ +int ipv6_skb_to_auditdata(struct sk_buff *skb, + struct common_audit_data *ad, u8 *proto) +{ + int offset, ret = 0; + struct ipv6hdr *ip6; + u8 nexthdr; + + ip6 = ipv6_hdr(skb); + if (ip6 == NULL) + return -EINVAL; + ipv6_addr_copy(&ad->u.net.v6info.saddr, &ip6->saddr); + ipv6_addr_copy(&ad->u.net.v6info.daddr, &ip6->daddr); + ret = 0; + /* IPv6 can have several extension header before the Transport header + * skip them */ + offset = skb_network_offset(skb); + offset += sizeof(*ip6); + nexthdr = ip6->nexthdr; + offset = ipv6_skip_exthdr(skb, offset, &nexthdr); + if (offset < 0) + return 0; + if (proto) + *proto = nexthdr; + switch (nexthdr) { + case IPPROTO_TCP: { + struct tcphdr _tcph, *th; + + th = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph); + if (th == NULL) + break; + + ad->u.net.sport = th->source; + ad->u.net.dport = th->dest; + break; + } + case IPPROTO_UDP: { + struct udphdr _udph, *uh; + + uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph); + if (uh == NULL) + break; + + ad->u.net.sport = uh->source; + ad->u.net.dport = uh->dest; + break; + } + case IPPROTO_DCCP: { + struct dccp_hdr _dccph, *dh; + + dh = skb_header_pointer(skb, offset, sizeof(_dccph), &_dccph); + if (dh == NULL) + break; + + ad->u.net.sport = dh->dccph_sport; + ad->u.net.dport = dh->dccph_dport; + break; + } + case IPPROTO_SCTP: { + struct sctphdr _sctph, *sh; + + sh = skb_header_pointer(skb, offset, sizeof(_sctph), &_sctph); + if (sh == NULL) + break; + ad->u.net.sport = sh->source; + ad->u.net.dport = sh->dest; + break; + } + default: + ret = -EINVAL; + } + return ret; +} +#endif + + +static inline void print_ipv6_addr(struct audit_buffer *ab, + struct in6_addr *addr, __be16 port, + char *name1, char *name2) +{ + if (!ipv6_addr_any(addr)) + audit_log_format(ab, " %s=%pI6", name1, addr); + if (port) + audit_log_format(ab, " %s=%d", name2, ntohs(port)); +} + +static inline void print_ipv4_addr(struct audit_buffer *ab, __be32 addr, + __be16 port, char *name1, char *name2) +{ + if (addr) + audit_log_format(ab, " %s=%pI4", name1, &addr); + if (port) + audit_log_format(ab, " %s=%d", name2, ntohs(port)); +} + +/** + * dump_common_audit_data - helper to dump common audit data + * @a : common audit data + * + */ +static void dump_common_audit_data(struct audit_buffer *ab, + struct common_audit_data *a) +{ + struct inode *inode = NULL; + struct task_struct *tsk = current; + + if (a->tsk) + tsk = a->tsk; + if (tsk && tsk->pid) { + audit_log_format(ab, " pid=%d comm=", tsk->pid); + audit_log_untrustedstring(ab, tsk->comm); + } + + switch (a->type) { + case LSM_AUDIT_DATA_IPC: + audit_log_format(ab, " key=%d ", a->u.ipc_id); + break; + case LSM_AUDIT_DATA_CAP: + audit_log_format(ab, " capability=%d ", a->u.cap); + break; + case LSM_AUDIT_DATA_FS: + if (a->u.fs.path.dentry) { + struct dentry *dentry = a->u.fs.path.dentry; + if (a->u.fs.path.mnt) { + audit_log_d_path(ab, "path=", &a->u.fs.path); + } else { + audit_log_format(ab, " name="); + audit_log_untrustedstring(ab, + dentry->d_name.name); + } + inode = dentry->d_inode; + } else if (a->u.fs.inode) { + struct dentry *dentry; + inode = a->u.fs.inode; + dentry = d_find_alias(inode); + if (dentry) { + audit_log_format(ab, " name="); + audit_log_untrustedstring(ab, + dentry->d_name.name); + dput(dentry); + } + } + if (inode) + audit_log_format(ab, " dev=%s ino=%lu", + inode->i_sb->s_id, + inode->i_ino); + break; + case LSM_AUDIT_DATA_TASK: + tsk = a->u.tsk; + if (tsk && tsk->pid) { + audit_log_format(ab, " pid=%d comm=", tsk->pid); + audit_log_untrustedstring(ab, tsk->comm); + } + break; + case LSM_AUDIT_DATA_NET: + if (a->u.net.sk) { + struct sock *sk = a->u.net.sk; + struct unix_sock *u; + int len = 0; + char *p = NULL; + + switch (sk->sk_family) { + case AF_INET: { + struct inet_sock *inet = inet_sk(sk); + + print_ipv4_addr(ab, inet->rcv_saddr, + inet->sport, + "laddr", "lport"); + print_ipv4_addr(ab, inet->daddr, + inet->dport, + "faddr", "fport"); + break; + } + case AF_INET6: { + struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *inet6 = inet6_sk(sk); + + print_ipv6_addr(ab, &inet6->rcv_saddr, + inet->sport, + "laddr", "lport"); + print_ipv6_addr(ab, &inet6->daddr, + inet->dport, + "faddr", "fport"); + break; + } + case AF_UNIX: + u = unix_sk(sk); + if (u->dentry) { + struct path path = { + .dentry = u->dentry, + .mnt = u->mnt + }; + audit_log_d_path(ab, "path=", &path); + break; + } + if (!u->addr) + break; + len = u->addr->len-sizeof(short); + p = &u->addr->name->sun_path[0]; + audit_log_format(ab, " path="); + if (*p) + audit_log_untrustedstring(ab, p); + else + audit_log_n_hex(ab, p, len); + break; + } + } + + switch (a->u.net.family) { + case AF_INET: + print_ipv4_addr(ab, a->u.net.v4info.saddr, + a->u.net.sport, + "saddr", "src"); + print_ipv4_addr(ab, a->u.net.v4info.daddr, + a->u.net.dport, + "daddr", "dest"); + break; + case AF_INET6: + print_ipv6_addr(ab, &a->u.net.v6info.saddr, + a->u.net.sport, + "saddr", "src"); + print_ipv6_addr(ab, &a->u.net.v6info.daddr, + a->u.net.dport, + "daddr", "dest"); + break; + } + if (a->u.net.netif > 0) { + struct net_device *dev; + + /* NOTE: we always use init's namespace */ + dev = dev_get_by_index(&init_net, a->u.net.netif); + if (dev) { + audit_log_format(ab, " netif=%s", dev->name); + dev_put(dev); + } + } + break; +#ifdef CONFIG_KEYS + case LSM_AUDIT_DATA_KEY: + audit_log_format(ab, " key_serial=%u", a->u.key_struct.key); + if (a->u.key_struct.key_desc) { + audit_log_format(ab, " key_desc="); + audit_log_untrustedstring(ab, a->u.key_struct.key_desc); + } + break; +#endif + } /* switch (a->type) */ +} + +/** + * common_lsm_audit - generic LSM auditing function + * @a: auxiliary audit data + * + * setup the audit buffer for common security information + * uses callback to print LSM specific information + */ +void common_lsm_audit(struct common_audit_data *a) +{ + struct audit_buffer *ab; + + if (a == NULL) + return; + /* we use GFP_ATOMIC so we won't sleep */ + ab = audit_log_start(current->audit_context, GFP_ATOMIC, AUDIT_AVC); + + if (ab == NULL) + return; + + if (a->lsm_pre_audit) + a->lsm_pre_audit(ab, a); + + dump_common_audit_data(ab, a); + + if (a->lsm_post_audit) + a->lsm_post_audit(ab, a); + + audit_log_end(ab); +} -- cgit v0.10.2 From ecfcc53fef3c357574bb6143dce6631e6d56295c Mon Sep 17 00:00:00 2001 From: Etienne Basset Date: Wed, 8 Apr 2009 20:40:06 +0200 Subject: smack: implement logging V3 the following patch, add logging of Smack security decisions. This is of course very useful to understand what your current smack policy does. As suggested by Casey, it also now forbids labels with ', " or \ It introduces a '/smack/logging' switch : 0: no logging 1: log denied (default) 2: log accepted 3: log denied&accepted Signed-off-by: Etienne Basset Acked-by: Casey Schaufler Acked-by: Eric Paris Signed-off-by: James Morris diff --git a/Documentation/Smack.txt b/Documentation/Smack.txt index 629c92e..34614b4 100644 --- a/Documentation/Smack.txt +++ b/Documentation/Smack.txt @@ -184,8 +184,9 @@ length. Single character labels using special characters, that being anything other than a letter or digit, are reserved for use by the Smack development team. Smack labels are unstructured, case sensitive, and the only operation ever performed on them is comparison for equality. Smack labels cannot -contain unprintable characters or the "/" (slash) character. Smack labels -cannot begin with a '-', which is reserved for special options. +contain unprintable characters, the "/" (slash), the "\" (backslash), the "'" +(quote) and '"' (double-quote) characters. +Smack labels cannot begin with a '-', which is reserved for special options. There are some predefined labels: @@ -523,3 +524,18 @@ Smack supports some mount options: These mount options apply to all file system types. +Smack auditing + +If you want Smack auditing of security events, you need to set CONFIG_AUDIT +in your kernel configuration. +By default, all denied events will be audited. You can change this behavior by +writing a single character to the /smack/logging file : +0 : no logging +1 : log denied (default) +2 : log accepted +3 : log denied & accepted + +Events are logged as 'key=value' pairs, for each event you at least will get +the subjet, the object, the rights requested, the action, the kernel function +that triggered the event, plus other pairs depending on the type of event +audited. diff --git a/security/Makefile b/security/Makefile index fa77021..c67557c 100644 --- a/security/Makefile +++ b/security/Makefile @@ -16,6 +16,9 @@ obj-$(CONFIG_SECURITYFS) += inode.o # Must precede capability.o in order to stack properly. obj-$(CONFIG_SECURITY_SELINUX) += selinux/built-in.o obj-$(CONFIG_SECURITY_SMACK) += smack/built-in.o +ifeq ($(CONFIG_AUDIT),y) +obj-$(CONFIG_SECURITY_SMACK) += lsm_audit.o +endif obj-$(CONFIG_SECURITY_TOMOYO) += tomoyo/built-in.o obj-$(CONFIG_SECURITY_ROOTPLUG) += root_plug.o obj-$(CONFIG_CGROUP_DEVICE) += device_cgroup.o diff --git a/security/smack/smack.h b/security/smack/smack.h index 42ef313..243bec1 100644 --- a/security/smack/smack.h +++ b/security/smack/smack.h @@ -20,6 +20,7 @@ #include #include #include +#include /* * Why 23? CIPSO is constrained to 30, so a 32 byte buffer is @@ -179,6 +180,20 @@ struct smack_known { #define MAY_NOT 0 /* + * Number of access types used by Smack (rwxa) + */ +#define SMK_NUM_ACCESS_TYPE 4 + +/* + * Smack audit data; is empty if CONFIG_AUDIT not set + * to save some stack + */ +struct smk_audit_info { +#ifdef CONFIG_AUDIT + struct common_audit_data a; +#endif +}; +/* * These functions are in smack_lsm.c */ struct inode_smack *new_inode_smack(char *); @@ -186,8 +201,8 @@ struct inode_smack *new_inode_smack(char *); /* * These functions are in smack_access.c */ -int smk_access(char *, char *, int); -int smk_curacc(char *, u32); +int smk_access(char *, char *, int, struct smk_audit_info *); +int smk_curacc(char *, u32, struct smk_audit_info *); int smack_to_cipso(const char *, struct smack_cipso *); void smack_from_cipso(u32, char *, char *); char *smack_from_secid(const u32); @@ -237,4 +252,93 @@ static inline char *smk_of_inode(const struct inode *isp) return sip->smk_inode; } +/* + * logging functions + */ +#define SMACK_AUDIT_DENIED 0x1 +#define SMACK_AUDIT_ACCEPT 0x2 +extern int log_policy; + +void smack_log(char *subject_label, char *object_label, + int request, + int result, struct smk_audit_info *auditdata); + +#ifdef CONFIG_AUDIT + +/* + * some inline functions to set up audit data + * they do nothing if CONFIG_AUDIT is not set + * + */ +static inline void smk_ad_init(struct smk_audit_info *a, const char *func, + char type) +{ + memset(a, 0, sizeof(*a)); + a->a.type = type; + a->a.function = func; +} + +static inline void smk_ad_setfield_u_tsk(struct smk_audit_info *a, + struct task_struct *t) +{ + a->a.u.tsk = t; +} +static inline void smk_ad_setfield_u_fs_path_dentry(struct smk_audit_info *a, + struct dentry *d) +{ + a->a.u.fs.path.dentry = d; +} +static inline void smk_ad_setfield_u_fs_path_mnt(struct smk_audit_info *a, + struct vfsmount *m) +{ + a->a.u.fs.path.mnt = m; +} +static inline void smk_ad_setfield_u_fs_inode(struct smk_audit_info *a, + struct inode *i) +{ + a->a.u.fs.inode = i; +} +static inline void smk_ad_setfield_u_fs_path(struct smk_audit_info *a, + struct path p) +{ + a->a.u.fs.path = p; +} +static inline void smk_ad_setfield_u_net_sk(struct smk_audit_info *a, + struct sock *sk) +{ + a->a.u.net.sk = sk; +} + +#else /* no AUDIT */ + +static inline void smk_ad_init(struct smk_audit_info *a, const char *func, + char type) +{ +} +static inline void smk_ad_setfield_u_tsk(struct smk_audit_info *a, + struct task_struct *t) +{ +} +static inline void smk_ad_setfield_u_fs_path_dentry(struct smk_audit_info *a, + struct dentry *d) +{ +} +static inline void smk_ad_setfield_u_fs_path_mnt(struct smk_audit_info *a, + struct vfsmount *m) +{ +} +static inline void smk_ad_setfield_u_fs_inode(struct smk_audit_info *a, + struct inode *i) +{ +} +static inline void smk_ad_setfield_u_fs_path(struct smk_audit_info *a, + struct path p) +{ +} +static inline void smk_ad_setfield_u_net_sk(struct smk_audit_info *a, + struct sock *sk) +{ +} +#endif + #endif /* _SECURITY_SMACK_H */ diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c index ac0a270..513dc1a 100644 --- a/security/smack/smack_access.c +++ b/security/smack/smack_access.c @@ -59,11 +59,18 @@ LIST_HEAD(smack_known_list); */ static u32 smack_next_secid = 10; +/* + * what events do we log + * can be overwritten at run-time by /smack/logging + */ +int log_policy = SMACK_AUDIT_DENIED; + /** * smk_access - determine if a subject has a specific access to an object * @subject_label: a pointer to the subject's Smack label * @object_label: a pointer to the object's Smack label * @request: the access requested, in "MAY" format + * @a : a pointer to the audit data * * This function looks up the subject/object pair in the * access rule list and returns 0 if the access is permitted, @@ -78,10 +85,12 @@ static u32 smack_next_secid = 10; * will be on the list, so checking the pointers may be a worthwhile * optimization. */ -int smk_access(char *subject_label, char *object_label, int request) +int smk_access(char *subject_label, char *object_label, int request, + struct smk_audit_info *a) { u32 may = MAY_NOT; struct smack_rule *srp; + int rc = 0; /* * Hardcoded comparisons. @@ -89,8 +98,10 @@ int smk_access(char *subject_label, char *object_label, int request) * A star subject can't access any object. */ if (subject_label == smack_known_star.smk_known || - strcmp(subject_label, smack_known_star.smk_known) == 0) - return -EACCES; + strcmp(subject_label, smack_known_star.smk_known) == 0) { + rc = -EACCES; + goto out_audit; + } /* * An internet object can be accessed by any subject. * Tasks cannot be assigned the internet label. @@ -100,20 +111,20 @@ int smk_access(char *subject_label, char *object_label, int request) subject_label == smack_known_web.smk_known || strcmp(object_label, smack_known_web.smk_known) == 0 || strcmp(subject_label, smack_known_web.smk_known) == 0) - return 0; + goto out_audit; /* * A star object can be accessed by any subject. */ if (object_label == smack_known_star.smk_known || strcmp(object_label, smack_known_star.smk_known) == 0) - return 0; + goto out_audit; /* * An object can be accessed in any way by a subject * with the same label. */ if (subject_label == object_label || strcmp(subject_label, object_label) == 0) - return 0; + goto out_audit; /* * A hat subject can read any object. * A floor object can be read by any subject. @@ -121,10 +132,10 @@ int smk_access(char *subject_label, char *object_label, int request) if ((request & MAY_ANYREAD) == request) { if (object_label == smack_known_floor.smk_known || strcmp(object_label, smack_known_floor.smk_known) == 0) - return 0; + goto out_audit; if (subject_label == smack_known_hat.smk_known || strcmp(subject_label, smack_known_hat.smk_known) == 0) - return 0; + goto out_audit; } /* * Beyond here an explicit relationship is required. @@ -148,28 +159,36 @@ int smk_access(char *subject_label, char *object_label, int request) * This is a bit map operation. */ if ((request & may) == request) - return 0; - - return -EACCES; + goto out_audit; + + rc = -EACCES; +out_audit: +#ifdef CONFIG_AUDIT + if (a) + smack_log(subject_label, object_label, request, rc, a); +#endif + return rc; } /** * smk_curacc - determine if current has a specific access to an object * @obj_label: a pointer to the object's Smack label * @mode: the access requested, in "MAY" format + * @a : common audit data * * This function checks the current subject label/object label pair * in the access rule list and returns 0 if the access is permitted, * non zero otherwise. It allows that current may have the capability * to override the rules. */ -int smk_curacc(char *obj_label, u32 mode) +int smk_curacc(char *obj_label, u32 mode, struct smk_audit_info *a) { int rc; + char *sp = current_security(); - rc = smk_access(current_security(), obj_label, mode); + rc = smk_access(sp, obj_label, mode, NULL); if (rc == 0) - return 0; + goto out_audit; /* * Return if a specific label has been designated as the @@ -177,14 +196,105 @@ int smk_curacc(char *obj_label, u32 mode) * have that label. */ if (smack_onlycap != NULL && smack_onlycap != current->cred->security) - return rc; + goto out_audit; if (capable(CAP_MAC_OVERRIDE)) return 0; +out_audit: +#ifdef CONFIG_AUDIT + if (a) + smack_log(sp, obj_label, mode, rc, a); +#endif return rc; } +#ifdef CONFIG_AUDIT +/** + * smack_str_from_perm : helper to transalate an int to a + * readable string + * @string : the string to fill + * @access : the int + * + */ +static inline void smack_str_from_perm(char *string, int access) +{ + int i = 0; + if (access & MAY_READ) + string[i++] = 'r'; + if (access & MAY_WRITE) + string[i++] = 'w'; + if (access & MAY_EXEC) + string[i++] = 'x'; + if (access & MAY_APPEND) + string[i++] = 'a'; + string[i] = '\0'; +} +/** + * smack_log_callback - SMACK specific information + * will be called by generic audit code + * @ab : the audit_buffer + * @a : audit_data + * + */ +static void smack_log_callback(struct audit_buffer *ab, void *a) +{ + struct common_audit_data *ad = a; + struct smack_audit_data *sad = &ad->lsm_priv.smack_audit_data; + audit_log_format(ab, "lsm=SMACK fn=%s action=%s", ad->function, + sad->result ? "denied" : "granted"); + audit_log_format(ab, " subject="); + audit_log_untrustedstring(ab, sad->subject); + audit_log_format(ab, " object="); + audit_log_untrustedstring(ab, sad->object); + audit_log_format(ab, " requested=%s", sad->request); +} + +/** + * smack_log - Audit the granting or denial of permissions. + * @subject_label : smack label of the requester + * @object_label : smack label of the object being accessed + * @request: requested permissions + * @result: result from smk_access + * @a: auxiliary audit data + * + * Audit the granting or denial of permissions in accordance + * with the policy. + */ +void smack_log(char *subject_label, char *object_label, int request, + int result, struct smk_audit_info *ad) +{ + char request_buffer[SMK_NUM_ACCESS_TYPE + 1]; + struct smack_audit_data *sad; + struct common_audit_data *a = &ad->a; + + /* check if we have to log the current event */ + if (result != 0 && (log_policy & SMACK_AUDIT_DENIED) == 0) + return; + if (result == 0 && (log_policy & SMACK_AUDIT_ACCEPT) == 0) + return; + + if (a->function == NULL) + a->function = "unknown"; + + /* end preparing the audit data */ + sad = &a->lsm_priv.smack_audit_data; + smack_str_from_perm(request_buffer, request); + sad->subject = subject_label; + sad->object = object_label; + sad->request = request_buffer; + sad->result = result; + a->lsm_pre_audit = smack_log_callback; + + common_lsm_audit(a); +} +#else /* #ifdef CONFIG_AUDIT */ +void smack_log(char *subject_label, char *object_label, int request, + int result, struct smk_audit_info *ad) +{ +} +#endif + static DEFINE_MUTEX(smack_known_lock); /** @@ -209,7 +319,8 @@ struct smack_known *smk_import_entry(const char *string, int len) if (found) smack[i] = '\0'; else if (i >= len || string[i] > '~' || string[i] <= ' ' || - string[i] == '/') { + string[i] == '/' || string[i] == '"' || + string[i] == '\\' || string[i] == '\'') { smack[i] = '\0'; found = 1; } else diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 9215149..f557767 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -30,7 +30,6 @@ #include #include #include - #include "smack.h" #define task_security(task) (task_cred_xxx((task), security)) @@ -103,14 +102,24 @@ struct inode_smack *new_inode_smack(char *smack) static int smack_ptrace_may_access(struct task_struct *ctp, unsigned int mode) { int rc; + struct smk_audit_info ad; + char *sp, *tsp; rc = cap_ptrace_may_access(ctp, mode); if (rc != 0) return rc; - rc = smk_access(current_security(), task_security(ctp), MAY_READWRITE); + sp = current_security(); + tsp = task_security(ctp); + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK); + smk_ad_setfield_u_tsk(&ad, ctp); + + /* we won't log here, because rc can be overriden */ + rc = smk_access(sp, tsp, MAY_READWRITE, NULL); if (rc != 0 && capable(CAP_MAC_OVERRIDE)) - return 0; + rc = 0; + + smack_log(sp, tsp, MAY_READWRITE, rc, &ad); return rc; } @@ -125,14 +134,24 @@ static int smack_ptrace_may_access(struct task_struct *ctp, unsigned int mode) static int smack_ptrace_traceme(struct task_struct *ptp) { int rc; + struct smk_audit_info ad; + char *sp, *tsp; rc = cap_ptrace_traceme(ptp); if (rc != 0) return rc; - rc = smk_access(task_security(ptp), current_security(), MAY_READWRITE); + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK); + smk_ad_setfield_u_tsk(&ad, ptp); + + sp = current_security(); + tsp = task_security(ptp); + /* we won't log here, because rc can be overriden */ + rc = smk_access(tsp, sp, MAY_READWRITE, NULL); if (rc != 0 && has_capability(ptp, CAP_MAC_OVERRIDE)) - return 0; + rc = 0; + + smack_log(tsp, sp, MAY_READWRITE, rc, &ad); return rc; } @@ -327,8 +346,14 @@ static int smack_sb_kern_mount(struct super_block *sb, int flags, void *data) static int smack_sb_statfs(struct dentry *dentry) { struct superblock_smack *sbp = dentry->d_sb->s_security; + int rc; + struct smk_audit_info ad; - return smk_curacc(sbp->smk_floor, MAY_READ); + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS); + smk_ad_setfield_u_fs_path_dentry(&ad, dentry); + + rc = smk_curacc(sbp->smk_floor, MAY_READ, &ad); + return rc; } /** @@ -346,8 +371,12 @@ static int smack_sb_mount(char *dev_name, struct path *path, char *type, unsigned long flags, void *data) { struct superblock_smack *sbp = path->mnt->mnt_sb->s_security; + struct smk_audit_info ad; + + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS); + smk_ad_setfield_u_fs_path(&ad, *path); - return smk_curacc(sbp->smk_floor, MAY_WRITE); + return smk_curacc(sbp->smk_floor, MAY_WRITE, &ad); } /** @@ -361,10 +390,14 @@ static int smack_sb_mount(char *dev_name, struct path *path, static int smack_sb_umount(struct vfsmount *mnt, int flags) { struct superblock_smack *sbp; + struct smk_audit_info ad; - sbp = mnt->mnt_sb->s_security; + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS); + smk_ad_setfield_u_fs_path_dentry(&ad, mnt->mnt_mountpoint); + smk_ad_setfield_u_fs_path_mnt(&ad, mnt); - return smk_curacc(sbp->smk_floor, MAY_WRITE); + sbp = mnt->mnt_sb->s_security; + return smk_curacc(sbp->smk_floor, MAY_WRITE, &ad); } /* @@ -441,15 +474,20 @@ static int smack_inode_init_security(struct inode *inode, struct inode *dir, static int smack_inode_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) { - int rc; char *isp; + struct smk_audit_info ad; + int rc; + + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS); + smk_ad_setfield_u_fs_path_dentry(&ad, old_dentry); isp = smk_of_inode(old_dentry->d_inode); - rc = smk_curacc(isp, MAY_WRITE); + rc = smk_curacc(isp, MAY_WRITE, &ad); if (rc == 0 && new_dentry->d_inode != NULL) { isp = smk_of_inode(new_dentry->d_inode); - rc = smk_curacc(isp, MAY_WRITE); + smk_ad_setfield_u_fs_path_dentry(&ad, new_dentry); + rc = smk_curacc(isp, MAY_WRITE, &ad); } return rc; @@ -466,18 +504,24 @@ static int smack_inode_link(struct dentry *old_dentry, struct inode *dir, static int smack_inode_unlink(struct inode *dir, struct dentry *dentry) { struct inode *ip = dentry->d_inode; + struct smk_audit_info ad; int rc; + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS); + smk_ad_setfield_u_fs_path_dentry(&ad, dentry); + /* * You need write access to the thing you're unlinking */ - rc = smk_curacc(smk_of_inode(ip), MAY_WRITE); - if (rc == 0) + rc = smk_curacc(smk_of_inode(ip), MAY_WRITE, &ad); + if (rc == 0) { /* * You also need write access to the containing directory */ - rc = smk_curacc(smk_of_inode(dir), MAY_WRITE); - + smk_ad_setfield_u_fs_path_dentry(&ad, NULL); + smk_ad_setfield_u_fs_inode(&ad, dir); + rc = smk_curacc(smk_of_inode(dir), MAY_WRITE, &ad); + } return rc; } @@ -491,17 +535,24 @@ static int smack_inode_unlink(struct inode *dir, struct dentry *dentry) */ static int smack_inode_rmdir(struct inode *dir, struct dentry *dentry) { + struct smk_audit_info ad; int rc; + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS); + smk_ad_setfield_u_fs_path_dentry(&ad, dentry); + /* * You need write access to the thing you're removing */ - rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE); - if (rc == 0) + rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE, &ad); + if (rc == 0) { /* * You also need write access to the containing directory */ - rc = smk_curacc(smk_of_inode(dir), MAY_WRITE); + smk_ad_setfield_u_fs_path_dentry(&ad, NULL); + smk_ad_setfield_u_fs_inode(&ad, dir); + rc = smk_curacc(smk_of_inode(dir), MAY_WRITE, &ad); + } return rc; } @@ -525,15 +576,19 @@ static int smack_inode_rename(struct inode *old_inode, { int rc; char *isp; + struct smk_audit_info ad; + + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS); + smk_ad_setfield_u_fs_path_dentry(&ad, old_dentry); isp = smk_of_inode(old_dentry->d_inode); - rc = smk_curacc(isp, MAY_READWRITE); + rc = smk_curacc(isp, MAY_READWRITE, &ad); if (rc == 0 && new_dentry->d_inode != NULL) { isp = smk_of_inode(new_dentry->d_inode); - rc = smk_curacc(isp, MAY_READWRITE); + smk_ad_setfield_u_fs_path_dentry(&ad, new_dentry); + rc = smk_curacc(isp, MAY_READWRITE, &ad); } - return rc; } @@ -548,13 +603,15 @@ static int smack_inode_rename(struct inode *old_inode, */ static int smack_inode_permission(struct inode *inode, int mask) { + struct smk_audit_info ad; /* * No permission to check. Existence test. Yup, it's there. */ if (mask == 0) return 0; - - return smk_curacc(smk_of_inode(inode), mask); + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS); + smk_ad_setfield_u_fs_inode(&ad, inode); + return smk_curacc(smk_of_inode(inode), mask, &ad); } /** @@ -566,13 +623,16 @@ static int smack_inode_permission(struct inode *inode, int mask) */ static int smack_inode_setattr(struct dentry *dentry, struct iattr *iattr) { + struct smk_audit_info ad; /* * Need to allow for clearing the setuid bit. */ if (iattr->ia_valid & ATTR_FORCE) return 0; + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS); + smk_ad_setfield_u_fs_path_dentry(&ad, dentry); - return smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE); + return smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE, &ad); } /** @@ -584,7 +644,12 @@ static int smack_inode_setattr(struct dentry *dentry, struct iattr *iattr) */ static int smack_inode_getattr(struct vfsmount *mnt, struct dentry *dentry) { - return smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ); + struct smk_audit_info ad; + + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS); + smk_ad_setfield_u_fs_path_dentry(&ad, dentry); + smk_ad_setfield_u_fs_path_mnt(&ad, mnt); + return smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ, &ad); } /** @@ -602,6 +667,7 @@ static int smack_inode_getattr(struct vfsmount *mnt, struct dentry *dentry) static int smack_inode_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { + struct smk_audit_info ad; int rc = 0; if (strcmp(name, XATTR_NAME_SMACK) == 0 || @@ -615,8 +681,11 @@ static int smack_inode_setxattr(struct dentry *dentry, const char *name, } else rc = cap_inode_setxattr(dentry, name, value, size, flags); + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS); + smk_ad_setfield_u_fs_path_dentry(&ad, dentry); + if (rc == 0) - rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE); + rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE, &ad); return rc; } @@ -671,7 +740,12 @@ static void smack_inode_post_setxattr(struct dentry *dentry, const char *name, */ static int smack_inode_getxattr(struct dentry *dentry, const char *name) { - return smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ); + struct smk_audit_info ad; + + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS); + smk_ad_setfield_u_fs_path_dentry(&ad, dentry); + + return smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ, &ad); } /* @@ -685,6 +759,7 @@ static int smack_inode_getxattr(struct dentry *dentry, const char *name) */ static int smack_inode_removexattr(struct dentry *dentry, const char *name) { + struct smk_audit_info ad; int rc = 0; if (strcmp(name, XATTR_NAME_SMACK) == 0 || @@ -695,8 +770,10 @@ static int smack_inode_removexattr(struct dentry *dentry, const char *name) } else rc = cap_inode_removexattr(dentry, name); + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS); + smk_ad_setfield_u_fs_path_dentry(&ad, dentry); if (rc == 0) - rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE); + rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE, &ad); return rc; } @@ -855,12 +932,16 @@ static int smack_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { int rc = 0; + struct smk_audit_info ad; + + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS); + smk_ad_setfield_u_fs_path(&ad, file->f_path); if (_IOC_DIR(cmd) & _IOC_WRITE) - rc = smk_curacc(file->f_security, MAY_WRITE); + rc = smk_curacc(file->f_security, MAY_WRITE, &ad); if (rc == 0 && (_IOC_DIR(cmd) & _IOC_READ)) - rc = smk_curacc(file->f_security, MAY_READ); + rc = smk_curacc(file->f_security, MAY_READ, &ad); return rc; } @@ -874,7 +955,11 @@ static int smack_file_ioctl(struct file *file, unsigned int cmd, */ static int smack_file_lock(struct file *file, unsigned int cmd) { - return smk_curacc(file->f_security, MAY_WRITE); + struct smk_audit_info ad; + + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS); + smk_ad_setfield_u_fs_path_dentry(&ad, file->f_path.dentry); + return smk_curacc(file->f_security, MAY_WRITE, &ad); } /** @@ -888,8 +973,12 @@ static int smack_file_lock(struct file *file, unsigned int cmd) static int smack_file_fcntl(struct file *file, unsigned int cmd, unsigned long arg) { + struct smk_audit_info ad; int rc; + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS); + smk_ad_setfield_u_fs_path(&ad, file->f_path); + switch (cmd) { case F_DUPFD: case F_GETFD: @@ -897,7 +986,7 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd, case F_GETLK: case F_GETOWN: case F_GETSIG: - rc = smk_curacc(file->f_security, MAY_READ); + rc = smk_curacc(file->f_security, MAY_READ, &ad); break; case F_SETFD: case F_SETFL: @@ -905,10 +994,10 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd, case F_SETLKW: case F_SETOWN: case F_SETSIG: - rc = smk_curacc(file->f_security, MAY_WRITE); + rc = smk_curacc(file->f_security, MAY_WRITE, &ad); break; default: - rc = smk_curacc(file->f_security, MAY_READWRITE); + rc = smk_curacc(file->f_security, MAY_READWRITE, &ad); } return rc; @@ -943,14 +1032,21 @@ static int smack_file_send_sigiotask(struct task_struct *tsk, { struct file *file; int rc; + char *tsp = tsk->cred->security; + struct smk_audit_info ad; /* * struct fown_struct is never outside the context of a struct file */ file = container_of(fown, struct file, f_owner); - rc = smk_access(file->f_security, tsk->cred->security, MAY_WRITE); + /* we don't log here as rc can be overriden */ + rc = smk_access(file->f_security, tsp, MAY_WRITE, NULL); if (rc != 0 && has_capability(tsk, CAP_MAC_OVERRIDE)) - return 0; + rc = 0; + + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK); + smk_ad_setfield_u_tsk(&ad, tsk); + smack_log(file->f_security, tsp, MAY_WRITE, rc, &ad); return rc; } @@ -963,7 +1059,10 @@ static int smack_file_send_sigiotask(struct task_struct *tsk, static int smack_file_receive(struct file *file) { int may = 0; + struct smk_audit_info ad; + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK); + smk_ad_setfield_u_fs_path(&ad, file->f_path); /* * This code relies on bitmasks. */ @@ -972,7 +1071,7 @@ static int smack_file_receive(struct file *file) if (file->f_mode & FMODE_WRITE) may |= MAY_WRITE; - return smk_curacc(file->f_security, may); + return smk_curacc(file->f_security, may, &ad); } /* @@ -1052,6 +1151,22 @@ static int smack_kernel_create_files_as(struct cred *new, } /** + * smk_curacc_on_task - helper to log task related access + * @p: the task object + * @access : the access requested + * + * Return 0 if access is permitted + */ +static int smk_curacc_on_task(struct task_struct *p, int access) +{ + struct smk_audit_info ad; + + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK); + smk_ad_setfield_u_tsk(&ad, p); + return smk_curacc(task_security(p), access, &ad); +} + +/** * smack_task_setpgid - Smack check on setting pgid * @p: the task object * @pgid: unused @@ -1060,7 +1175,7 @@ static int smack_kernel_create_files_as(struct cred *new, */ static int smack_task_setpgid(struct task_struct *p, pid_t pgid) { - return smk_curacc(task_security(p), MAY_WRITE); + return smk_curacc_on_task(p, MAY_WRITE); } /** @@ -1071,7 +1186,7 @@ static int smack_task_setpgid(struct task_struct *p, pid_t pgid) */ static int smack_task_getpgid(struct task_struct *p) { - return smk_curacc(task_security(p), MAY_READ); + return smk_curacc_on_task(p, MAY_READ); } /** @@ -1082,7 +1197,7 @@ static int smack_task_getpgid(struct task_struct *p) */ static int smack_task_getsid(struct task_struct *p) { - return smk_curacc(task_security(p), MAY_READ); + return smk_curacc_on_task(p, MAY_READ); } /** @@ -1110,7 +1225,7 @@ static int smack_task_setnice(struct task_struct *p, int nice) rc = cap_task_setnice(p, nice); if (rc == 0) - rc = smk_curacc(task_security(p), MAY_WRITE); + rc = smk_curacc_on_task(p, MAY_WRITE); return rc; } @@ -1127,7 +1242,7 @@ static int smack_task_setioprio(struct task_struct *p, int ioprio) rc = cap_task_setioprio(p, ioprio); if (rc == 0) - rc = smk_curacc(task_security(p), MAY_WRITE); + rc = smk_curacc_on_task(p, MAY_WRITE); return rc; } @@ -1139,7 +1254,7 @@ static int smack_task_setioprio(struct task_struct *p, int ioprio) */ static int smack_task_getioprio(struct task_struct *p) { - return smk_curacc(task_security(p), MAY_READ); + return smk_curacc_on_task(p, MAY_READ); } /** @@ -1157,7 +1272,7 @@ static int smack_task_setscheduler(struct task_struct *p, int policy, rc = cap_task_setscheduler(p, policy, lp); if (rc == 0) - rc = smk_curacc(task_security(p), MAY_WRITE); + rc = smk_curacc_on_task(p, MAY_WRITE); return rc; } @@ -1169,7 +1284,7 @@ static int smack_task_setscheduler(struct task_struct *p, int policy, */ static int smack_task_getscheduler(struct task_struct *p) { - return smk_curacc(task_security(p), MAY_READ); + return smk_curacc_on_task(p, MAY_READ); } /** @@ -1180,7 +1295,7 @@ static int smack_task_getscheduler(struct task_struct *p) */ static int smack_task_movememory(struct task_struct *p) { - return smk_curacc(task_security(p), MAY_WRITE); + return smk_curacc_on_task(p, MAY_WRITE); } /** @@ -1198,18 +1313,23 @@ static int smack_task_movememory(struct task_struct *p) static int smack_task_kill(struct task_struct *p, struct siginfo *info, int sig, u32 secid) { + struct smk_audit_info ad; + + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK); + smk_ad_setfield_u_tsk(&ad, p); /* * Sending a signal requires that the sender * can write the receiver. */ if (secid == 0) - return smk_curacc(task_security(p), MAY_WRITE); + return smk_curacc(task_security(p), MAY_WRITE, &ad); /* * If the secid isn't 0 we're dealing with some USB IO * specific behavior. This is not clean. For one thing * we can't take privilege into account. */ - return smk_access(smack_from_secid(secid), task_security(p), MAY_WRITE); + return smk_access(smack_from_secid(secid), task_security(p), + MAY_WRITE, &ad); } /** @@ -1220,11 +1340,15 @@ static int smack_task_kill(struct task_struct *p, struct siginfo *info, */ static int smack_task_wait(struct task_struct *p) { + struct smk_audit_info ad; + char *sp = current_security(); + char *tsp = task_security(p); int rc; - rc = smk_access(current_security(), task_security(p), MAY_WRITE); + /* we don't log here, we can be overriden */ + rc = smk_access(sp, tsp, MAY_WRITE, NULL); if (rc == 0) - return 0; + goto out_log; /* * Allow the operation to succeed if either task @@ -1238,8 +1362,12 @@ static int smack_task_wait(struct task_struct *p) * the smack value. */ if (capable(CAP_MAC_OVERRIDE) || has_capability(p, CAP_MAC_OVERRIDE)) - return 0; - + rc = 0; + /* we log only if we didn't get overriden */ + out_log: + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK); + smk_ad_setfield_u_tsk(&ad, p); + smack_log(sp, tsp, MAY_WRITE, rc, &ad); return rc; } @@ -1455,12 +1583,19 @@ static int smack_netlabel_send(struct sock *sk, struct sockaddr_in *sap) int sk_lbl; char *hostsp; struct socket_smack *ssp = sk->sk_security; + struct smk_audit_info ad; rcu_read_lock(); hostsp = smack_host_label(sap); if (hostsp != NULL) { sk_lbl = SMACK_UNLABELED_SOCKET; - rc = smk_access(ssp->smk_out, hostsp, MAY_WRITE); +#ifdef CONFIG_AUDIT + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_NET); + ad.a.u.net.family = sap->sin_family; + ad.a.u.net.dport = sap->sin_port; + ad.a.u.net.v4info.daddr = sap->sin_addr.s_addr; +#endif + rc = smk_access(ssp->smk_out, hostsp, MAY_WRITE, &ad); } else { sk_lbl = SMACK_CIPSO_SOCKET; rc = 0; @@ -1656,6 +1791,25 @@ static void smack_shm_free_security(struct shmid_kernel *shp) } /** + * smk_curacc_shm : check if current has access on shm + * @shp : the object + * @access : access requested + * + * Returns 0 if current has the requested access, error code otherwise + */ +static int smk_curacc_shm(struct shmid_kernel *shp, int access) +{ + char *ssp = smack_of_shm(shp); + struct smk_audit_info ad; + +#ifdef CONFIG_AUDIT + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_IPC); + ad.a.u.ipc_id = shp->shm_perm.id; +#endif + return smk_curacc(ssp, access, &ad); +} + +/** * smack_shm_associate - Smack access check for shm * @shp: the object * @shmflg: access requested @@ -1664,11 +1818,10 @@ static void smack_shm_free_security(struct shmid_kernel *shp) */ static int smack_shm_associate(struct shmid_kernel *shp, int shmflg) { - char *ssp = smack_of_shm(shp); int may; may = smack_flags_to_may(shmflg); - return smk_curacc(ssp, may); + return smk_curacc_shm(shp, may); } /** @@ -1680,7 +1833,6 @@ static int smack_shm_associate(struct shmid_kernel *shp, int shmflg) */ static int smack_shm_shmctl(struct shmid_kernel *shp, int cmd) { - char *ssp; int may; switch (cmd) { @@ -1703,9 +1855,7 @@ static int smack_shm_shmctl(struct shmid_kernel *shp, int cmd) default: return -EINVAL; } - - ssp = smack_of_shm(shp); - return smk_curacc(ssp, may); + return smk_curacc_shm(shp, may); } /** @@ -1719,11 +1869,10 @@ static int smack_shm_shmctl(struct shmid_kernel *shp, int cmd) static int smack_shm_shmat(struct shmid_kernel *shp, char __user *shmaddr, int shmflg) { - char *ssp = smack_of_shm(shp); int may; may = smack_flags_to_may(shmflg); - return smk_curacc(ssp, may); + return smk_curacc_shm(shp, may); } /** @@ -1765,6 +1914,25 @@ static void smack_sem_free_security(struct sem_array *sma) } /** + * smk_curacc_sem : check if current has access on sem + * @sma : the object + * @access : access requested + * + * Returns 0 if current has the requested access, error code otherwise + */ +static int smk_curacc_sem(struct sem_array *sma, int access) +{ + char *ssp = smack_of_sem(sma); + struct smk_audit_info ad; + +#ifdef CONFIG_AUDIT + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_IPC); + ad.a.u.ipc_id = sma->sem_perm.id; +#endif + return smk_curacc(ssp, access, &ad); +} + +/** * smack_sem_associate - Smack access check for sem * @sma: the object * @semflg: access requested @@ -1773,11 +1941,10 @@ static void smack_sem_free_security(struct sem_array *sma) */ static int smack_sem_associate(struct sem_array *sma, int semflg) { - char *ssp = smack_of_sem(sma); int may; may = smack_flags_to_may(semflg); - return smk_curacc(ssp, may); + return smk_curacc_sem(sma, may); } /** @@ -1789,7 +1956,6 @@ static int smack_sem_associate(struct sem_array *sma, int semflg) */ static int smack_sem_semctl(struct sem_array *sma, int cmd) { - char *ssp; int may; switch (cmd) { @@ -1818,8 +1984,7 @@ static int smack_sem_semctl(struct sem_array *sma, int cmd) return -EINVAL; } - ssp = smack_of_sem(sma); - return smk_curacc(ssp, may); + return smk_curacc_sem(sma, may); } /** @@ -1836,9 +2001,7 @@ static int smack_sem_semctl(struct sem_array *sma, int cmd) static int smack_sem_semop(struct sem_array *sma, struct sembuf *sops, unsigned nsops, int alter) { - char *ssp = smack_of_sem(sma); - - return smk_curacc(ssp, MAY_READWRITE); + return smk_curacc_sem(sma, MAY_READWRITE); } /** @@ -1880,6 +2043,25 @@ static char *smack_of_msq(struct msg_queue *msq) } /** + * smk_curacc_msq : helper to check if current has access on msq + * @msq : the msq + * @access : access requested + * + * return 0 if current has access, error otherwise + */ +static int smk_curacc_msq(struct msg_queue *msq, int access) +{ + char *msp = smack_of_msq(msq); + struct smk_audit_info ad; + +#ifdef CONFIG_AUDIT + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_IPC); + ad.a.u.ipc_id = msq->q_perm.id; +#endif + return smk_curacc(msp, access, &ad); +} + +/** * smack_msg_queue_associate - Smack access check for msg_queue * @msq: the object * @msqflg: access requested @@ -1888,11 +2070,10 @@ static char *smack_of_msq(struct msg_queue *msq) */ static int smack_msg_queue_associate(struct msg_queue *msq, int msqflg) { - char *msp = smack_of_msq(msq); int may; may = smack_flags_to_may(msqflg); - return smk_curacc(msp, may); + return smk_curacc_msq(msq, may); } /** @@ -1904,7 +2085,6 @@ static int smack_msg_queue_associate(struct msg_queue *msq, int msqflg) */ static int smack_msg_queue_msgctl(struct msg_queue *msq, int cmd) { - char *msp; int may; switch (cmd) { @@ -1926,8 +2106,7 @@ static int smack_msg_queue_msgctl(struct msg_queue *msq, int cmd) return -EINVAL; } - msp = smack_of_msq(msq); - return smk_curacc(msp, may); + return smk_curacc_msq(msq, may); } /** @@ -1941,11 +2120,10 @@ static int smack_msg_queue_msgctl(struct msg_queue *msq, int cmd) static int smack_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg, int msqflg) { - char *msp = smack_of_msq(msq); - int rc; + int may; - rc = smack_flags_to_may(msqflg); - return smk_curacc(msp, rc); + may = smack_flags_to_may(msqflg); + return smk_curacc_msq(msq, may); } /** @@ -1961,9 +2139,7 @@ static int smack_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg, static int smack_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg, struct task_struct *target, long type, int mode) { - char *msp = smack_of_msq(msq); - - return smk_curacc(msp, MAY_READWRITE); + return smk_curacc_msq(msq, MAY_READWRITE); } /** @@ -1976,10 +2152,14 @@ static int smack_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg, static int smack_ipc_permission(struct kern_ipc_perm *ipp, short flag) { char *isp = ipp->security; - int may; + int may = smack_flags_to_may(flag); + struct smk_audit_info ad; - may = smack_flags_to_may(flag); - return smk_curacc(isp, may); +#ifdef CONFIG_AUDIT + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_IPC); + ad.a.u.ipc_id = ipp->id; +#endif + return smk_curacc(isp, may, &ad); } /** @@ -2238,8 +2418,12 @@ static int smack_unix_stream_connect(struct socket *sock, { struct inode *sp = SOCK_INODE(sock); struct inode *op = SOCK_INODE(other); + struct smk_audit_info ad; - return smk_access(smk_of_inode(sp), smk_of_inode(op), MAY_READWRITE); + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_NET); + smk_ad_setfield_u_net_sk(&ad, other->sk); + return smk_access(smk_of_inode(sp), smk_of_inode(op), + MAY_READWRITE, &ad); } /** @@ -2254,8 +2438,11 @@ static int smack_unix_may_send(struct socket *sock, struct socket *other) { struct inode *sp = SOCK_INODE(sock); struct inode *op = SOCK_INODE(other); + struct smk_audit_info ad; - return smk_access(smk_of_inode(sp), smk_of_inode(op), MAY_WRITE); + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_NET); + smk_ad_setfield_u_net_sk(&ad, other->sk); + return smk_access(smk_of_inode(sp), smk_of_inode(op), MAY_WRITE, &ad); } /** @@ -2370,7 +2557,7 @@ static int smack_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) char smack[SMK_LABELLEN]; char *csp; int rc; - + struct smk_audit_info ad; if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6) return 0; @@ -2388,13 +2575,19 @@ static int smack_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) netlbl_secattr_destroy(&secattr); +#ifdef CONFIG_AUDIT + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_NET); + ad.a.u.net.family = sk->sk_family; + ad.a.u.net.netif = skb->iif; + ipv4_skb_to_auditdata(skb, &ad.a, NULL); +#endif /* * Receiving a packet requires that the other end * be able to write here. Read access is not required. * This is the simplist possible security model * for networking. */ - rc = smk_access(csp, ssp->smk_in, MAY_WRITE); + rc = smk_access(csp, ssp->smk_in, MAY_WRITE, &ad); if (rc != 0) netlbl_skbuff_err(skb, rc, 0); return rc; @@ -2523,6 +2716,7 @@ static int smack_inet_conn_request(struct sock *sk, struct sk_buff *skb, struct iphdr *hdr; char smack[SMK_LABELLEN]; int rc; + struct smk_audit_info ad; /* handle mapped IPv4 packets arriving via IPv6 sockets */ if (family == PF_INET6 && skb->protocol == htons(ETH_P_IP)) @@ -2536,11 +2730,17 @@ static int smack_inet_conn_request(struct sock *sk, struct sk_buff *skb, strncpy(smack, smack_known_huh.smk_known, SMK_MAXLEN); netlbl_secattr_destroy(&secattr); +#ifdef CONFIG_AUDIT + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_NET); + ad.a.u.net.family = family; + ad.a.u.net.netif = skb->iif; + ipv4_skb_to_auditdata(skb, &ad.a, NULL); +#endif /* * Receiving a packet requires that the other end be able to write * here. Read access is not required. */ - rc = smk_access(smack, ssp->smk_in, MAY_WRITE); + rc = smk_access(smack, ssp->smk_in, MAY_WRITE, &ad); if (rc != 0) return rc; @@ -2642,6 +2842,7 @@ static int smack_key_permission(key_ref_t key_ref, const struct cred *cred, key_perm_t perm) { struct key *keyp; + struct smk_audit_info ad; keyp = key_ref_to_ptr(key_ref); if (keyp == NULL) @@ -2657,8 +2858,13 @@ static int smack_key_permission(key_ref_t key_ref, */ if (cred->security == NULL) return -EACCES; - - return smk_access(cred->security, keyp->security, MAY_READWRITE); +#ifdef CONFIG_AUDIT + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_KEY); + ad.a.u.key_struct.key = keyp->serial; + ad.a.u.key_struct.key_desc = keyp->description; +#endif + return smk_access(cred->security, keyp->security, + MAY_READWRITE, &ad); } #endif /* CONFIG_KEYS */ diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c index e03a7e1..904af34 100644 --- a/security/smack/smackfs.c +++ b/security/smack/smackfs.c @@ -41,6 +41,7 @@ enum smk_inos { SMK_AMBIENT = 7, /* internet ambient label */ SMK_NETLBLADDR = 8, /* single label hosts */ SMK_ONLYCAP = 9, /* the only "capable" label */ + SMK_LOGGING = 10, /* logging */ }; /* @@ -1192,6 +1193,69 @@ static const struct file_operations smk_onlycap_ops = { }; /** + * smk_read_logging - read() for /smack/logging + * @filp: file pointer, not actually used + * @buf: where to put the result + * @cn: maximum to send along + * @ppos: where to start + * + * Returns number of bytes read or error code, as appropriate + */ +static ssize_t smk_read_logging(struct file *filp, char __user *buf, + size_t count, loff_t *ppos) +{ + char temp[32]; + ssize_t rc; + + if (*ppos != 0) + return 0; + + sprintf(temp, "%d\n", log_policy); + rc = simple_read_from_buffer(buf, count, ppos, temp, strlen(temp)); + return rc; +} + +/** + * smk_write_logging - write() for /smack/logging + * @file: file pointer, not actually used + * @buf: where to get the data from + * @count: bytes sent + * @ppos: where to start + * + * Returns number of bytes written or error code, as appropriate + */ +static ssize_t smk_write_logging(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + char temp[32]; + int i; + + if (!capable(CAP_MAC_ADMIN)) + return -EPERM; + + if (count >= sizeof(temp) || count == 0) + return -EINVAL; + + if (copy_from_user(temp, buf, count) != 0) + return -EFAULT; + + temp[count] = '\0'; + + if (sscanf(temp, "%d", &i) != 1) + return -EINVAL; + if (i < 0 || i > 3) + return -EINVAL; + log_policy = i; + return count; +} + + + +static const struct file_operations smk_logging_ops = { + .read = smk_read_logging, + .write = smk_write_logging, +}; +/** * smk_fill_super - fill the /smackfs superblock * @sb: the empty superblock * @data: unused @@ -1221,6 +1285,8 @@ static int smk_fill_super(struct super_block *sb, void *data, int silent) {"netlabel", &smk_netlbladdr_ops, S_IRUGO|S_IWUSR}, [SMK_ONLYCAP] = {"onlycap", &smk_onlycap_ops, S_IRUGO|S_IWUSR}, + [SMK_LOGGING] = + {"logging", &smk_logging_ops, S_IRUGO|S_IWUSR}, /* last one */ {""} }; -- cgit v0.10.2 From 7ba5c840e64d4a967379f1ae3eca73278180b11d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 13 Apr 2009 21:31:17 -0700 Subject: rcu: Add __rcu_pending tracing to hierarchical RCU Add tracing to __rcu_pending() to provide information on why RCU processing was kicked off. This is helpful for debugging hierarchical RCU, and might also be helpful in learning how hierarchical RCU operates. Located-by: Anton Blanchard Tested-by: Anton Blanchard Signed-off-by: Paul E. McKenney Cc: anton@samba.org Cc: akpm@linux-foundation.org Cc: dipankar@in.ibm.com Cc: manfred@colorfullife.com Cc: cl@linux-foundation.org Cc: josht@linux.vnet.ibm.com Cc: schamp@sgi.com Cc: niv@us.ibm.com Cc: dvhltc@us.ibm.com Cc: ego@in.ibm.com Cc: laijs@cn.fujitsu.com Cc: rostedt@goodmis.org Cc: peterz@infradead.org Cc: penberg@cs.helsinki.fi Cc: andi@firstfloor.org Cc: "Paul E. McKenney" LKML-Reference: <1239683479943-git-send-email-> Signed-off-by: Ingo Molnar diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 58b2aa5..5a51538 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -161,8 +161,15 @@ struct rcu_data { unsigned long offline_fqs; /* Kicked due to being offline. */ unsigned long resched_ipi; /* Sent a resched IPI. */ - /* 5) For future __rcu_pending statistics. */ + /* 5) __rcu_pending() statistics. */ long n_rcu_pending; /* rcu_pending() calls since boot. */ + long n_rp_qs_pending; + long n_rp_cb_ready; + long n_rp_cpu_needs_gp; + long n_rp_gp_completed; + long n_rp_gp_started; + long n_rp_need_fqs; + long n_rp_need_nothing; int cpu; }; diff --git a/kernel/rcutree.c b/kernel/rcutree.c index d2a372f..0dccfbb 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1259,31 +1259,44 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) check_cpu_stall(rsp, rdp); /* Is the RCU core waiting for a quiescent state from this CPU? */ - if (rdp->qs_pending) + if (rdp->qs_pending) { + rdp->n_rp_qs_pending++; return 1; + } /* Does this CPU have callbacks ready to invoke? */ - if (cpu_has_callbacks_ready_to_invoke(rdp)) + if (cpu_has_callbacks_ready_to_invoke(rdp)) { + rdp->n_rp_cb_ready++; return 1; + } /* Has RCU gone idle with this CPU needing another grace period? */ - if (cpu_needs_another_gp(rsp, rdp)) + if (cpu_needs_another_gp(rsp, rdp)) { + rdp->n_rp_cpu_needs_gp++; return 1; + } /* Has another RCU grace period completed? */ - if (ACCESS_ONCE(rsp->completed) != rdp->completed) /* outside of lock */ + if (ACCESS_ONCE(rsp->completed) != rdp->completed) { /* outside lock */ + rdp->n_rp_gp_completed++; return 1; + } /* Has a new RCU grace period started? */ - if (ACCESS_ONCE(rsp->gpnum) != rdp->gpnum) /* outside of lock */ + if (ACCESS_ONCE(rsp->gpnum) != rdp->gpnum) { /* outside lock */ + rdp->n_rp_gp_started++; return 1; + } /* Has an RCU GP gone long enough to send resched IPIs &c? */ if (ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum) && - ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)) + ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)) { + rdp->n_rp_need_fqs++; return 1; + } /* nothing to do */ + rdp->n_rp_need_nothing++; return 0; } diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 4b1875b..fe1dcdb 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -213,7 +213,63 @@ static struct file_operations rcugp_fops = { .release = single_release, }; -static struct dentry *rcudir, *datadir, *datadir_csv, *hierdir, *gpdir; +static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp) +{ + seq_printf(m, "%3d%cnp=%ld " + "qsp=%ld cbr=%ld cng=%ld gpc=%ld gps=%ld nf=%ld nn=%ld\n", + rdp->cpu, + cpu_is_offline(rdp->cpu) ? '!' : ' ', + rdp->n_rcu_pending, + rdp->n_rp_qs_pending, + rdp->n_rp_cb_ready, + rdp->n_rp_cpu_needs_gp, + rdp->n_rp_gp_completed, + rdp->n_rp_gp_started, + rdp->n_rp_need_fqs, + rdp->n_rp_need_nothing); +} + +static void print_rcu_pendings(struct seq_file *m, struct rcu_state *rsp) +{ + int cpu; + struct rcu_data *rdp; + + for_each_possible_cpu(cpu) { + rdp = rsp->rda[cpu]; + if (rdp->beenonline) + print_one_rcu_pending(m, rdp); + } +} + +static int show_rcu_pending(struct seq_file *m, void *unused) +{ + seq_puts(m, "rcu:\n"); + print_rcu_pendings(m, &rcu_state); + seq_puts(m, "rcu_bh:\n"); + print_rcu_pendings(m, &rcu_bh_state); + return 0; +} + +static int rcu_pending_open(struct inode *inode, struct file *file) +{ + return single_open(file, show_rcu_pending, NULL); +} + +static struct file_operations rcu_pending_fops = { + .owner = THIS_MODULE, + .open = rcu_pending_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static struct dentry *rcudir; +static struct dentry *datadir; +static struct dentry *datadir_csv; +static struct dentry *gpdir; +static struct dentry *hierdir; +static struct dentry *rcu_pendingdir; + static int __init rcuclassic_trace_init(void) { rcudir = debugfs_create_dir("rcu", NULL); @@ -238,6 +294,11 @@ static int __init rcuclassic_trace_init(void) NULL, &rcuhier_fops); if (!hierdir) goto free_out; + + rcu_pendingdir = debugfs_create_file("rcu_pending", 0444, rcudir, + NULL, &rcu_pending_fops); + if (!rcu_pendingdir) + goto free_out; return 0; free_out: if (datadir) @@ -257,6 +318,7 @@ static void __exit rcuclassic_trace_cleanup(void) debugfs_remove(datadir_csv); debugfs_remove(gpdir); debugfs_remove(hierdir); + debugfs_remove(rcu_pendingdir); debugfs_remove(rcudir); } -- cgit v0.10.2 From 6fd9b3a40b82081d9e6490b0d7cd656e9a78a134 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 13 Apr 2009 21:31:18 -0700 Subject: rcu: Update RCU tracing documentation for __rcu_pending This patch updates the RCU documentation to reflect the changes in tracing made in the previous patch in the set. Located-by: Anton Blanchard Tested-by: Anton Blanchard Signed-off-by: Paul E. McKenney Cc: anton@samba.org Cc: akpm@linux-foundation.org Cc: dipankar@in.ibm.com Cc: manfred@colorfullife.com Cc: cl@linux-foundation.org Cc: josht@linux.vnet.ibm.com Cc: schamp@sgi.com Cc: niv@us.ibm.com Cc: dvhltc@us.ibm.com Cc: ego@in.ibm.com Cc: laijs@cn.fujitsu.com Cc: rostedt@goodmis.org Cc: peterz@infradead.org Cc: penberg@cs.helsinki.fi Cc: andi@firstfloor.org Cc: "Paul E. McKenney" LKML-Reference: <12396834792865-git-send-email-> Signed-off-by: Ingo Molnar diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt index 0688482..02cced1 100644 --- a/Documentation/RCU/trace.txt +++ b/Documentation/RCU/trace.txt @@ -192,23 +192,24 @@ rcu/rcuhier (which displays the struct rcu_node hierarchy). The output of "cat rcu/rcudata" looks as follows: rcu: - 0 c=4011 g=4012 pq=1 pqc=4011 qp=0 rpfq=1 rp=3c2a dt=23301/73 dn=2 df=1882 of=0 ri=2126 ql=2 b=10 - 1 c=4011 g=4012 pq=1 pqc=4011 qp=0 rpfq=3 rp=39a6 dt=78073/1 dn=2 df=1402 of=0 ri=1875 ql=46 b=10 - 2 c=4010 g=4010 pq=1 pqc=4010 qp=0 rpfq=-5 rp=1d12 dt=16646/0 dn=2 df=3140 of=0 ri=2080 ql=0 b=10 - 3 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=2b50 dt=21159/1 dn=2 df=2230 of=0 ri=1923 ql=72 b=10 - 4 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=1644 dt=5783/1 dn=2 df=3348 of=0 ri=2805 ql=7 b=10 - 5 c=4012 g=4013 pq=0 pqc=4011 qp=1 rpfq=3 rp=1aac dt=5879/1 dn=2 df=3140 of=0 ri=2066 ql=10 b=10 - 6 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=ed8 dt=5847/1 dn=2 df=3797 of=0 ri=1266 ql=10 b=10 - 7 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=1fa2 dt=6199/1 dn=2 df=2795 of=0 ri=2162 ql=28 b=10 +rcu: + 0 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=10951/1 dn=0 df=1101 of=0 ri=36 ql=0 b=10 + 1 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=16117/1 dn=0 df=1015 of=0 ri=0 ql=0 b=10 + 2 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=1445/1 dn=0 df=1839 of=0 ri=0 ql=0 b=10 + 3 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=6681/1 dn=0 df=1545 of=0 ri=0 ql=0 b=10 + 4 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=1003/1 dn=0 df=1992 of=0 ri=0 ql=0 b=10 + 5 c=17829 g=17830 pq=1 pqc=17829 qp=1 dt=3887/1 dn=0 df=3331 of=0 ri=4 ql=2 b=10 + 6 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=859/1 dn=0 df=3224 of=0 ri=0 ql=0 b=10 + 7 c=17829 g=17830 pq=0 pqc=17829 qp=1 dt=3761/1 dn=0 df=1818 of=0 ri=0 ql=2 b=10 rcu_bh: - 0 c=-268 g=-268 pq=1 pqc=-268 qp=0 rpfq=-145 rp=21d6 dt=23301/73 dn=2 df=0 of=0 ri=0 ql=0 b=10 - 1 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-170 rp=20ce dt=78073/1 dn=2 df=26 of=0 ri=5 ql=0 b=10 - 2 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-83 rp=fbd dt=16646/0 dn=2 df=28 of=0 ri=4 ql=0 b=10 - 3 c=-268 g=-268 pq=1 pqc=-268 qp=0 rpfq=-105 rp=178c dt=21159/1 dn=2 df=28 of=0 ri=2 ql=0 b=10 - 4 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-30 rp=b54 dt=5783/1 dn=2 df=32 of=0 ri=0 ql=0 b=10 - 5 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-29 rp=df5 dt=5879/1 dn=2 df=30 of=0 ri=3 ql=0 b=10 - 6 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-28 rp=788 dt=5847/1 dn=2 df=32 of=0 ri=0 ql=0 b=10 - 7 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-53 rp=1098 dt=6199/1 dn=2 df=30 of=0 ri=3 ql=0 b=10 + 0 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=10951/1 dn=0 df=0 of=0 ri=0 ql=0 b=10 + 1 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=16117/1 dn=0 df=13 of=0 ri=0 ql=0 b=10 + 2 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=1445/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 + 3 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=6681/1 dn=0 df=9 of=0 ri=0 ql=0 b=10 + 4 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=1003/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 + 5 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3887/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 + 6 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=859/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 + 7 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3761/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 The first section lists the rcu_data structures for rcu, the second for rcu_bh. Each section has one line per CPU, or eight for this 8-CPU system. @@ -253,12 +254,6 @@ o "pqc" indicates which grace period the last-observed quiescent o "qp" indicates that RCU still expects a quiescent state from this CPU. -o "rpfq" is the number of rcu_pending() calls on this CPU required - to induce this CPU to invoke force_quiescent_state(). - -o "rp" is low-order four hex digits of the count of how many times - rcu_pending() has been invoked on this CPU. - o "dt" is the current value of the dyntick counter that is incremented when entering or leaving dynticks idle state, either by the scheduler or by irq. The number after the "/" is the interrupt @@ -305,6 +300,9 @@ o "b" is the batch limit for this CPU. If more than this number of RCU callbacks is ready to invoke, then the remainder will be deferred. +There is also an rcu/rcudata.csv file with the same information in +comma-separated-variable spreadsheet format. + The output of "cat rcu/rcugp" looks as follows: @@ -411,3 +409,63 @@ o Each element of the form "1/1 0:127 ^0" represents one struct For example, the first entry at the lowest level shows "^0", indicating that it corresponds to bit zero in the first entry at the middle level. + + +The output of "cat rcu/rcu_pending" looks as follows: + +rcu: + 0 np=255892 qsp=53936 cbr=0 cng=14417 gpc=10033 gps=24320 nf=6445 nn=146741 + 1 np=261224 qsp=54638 cbr=0 cng=25723 gpc=16310 gps=2849 nf=5912 nn=155792 + 2 np=237496 qsp=49664 cbr=0 cng=2762 gpc=45478 gps=1762 nf=1201 nn=136629 + 3 np=236249 qsp=48766 cbr=0 cng=286 gpc=48049 gps=1218 nf=207 nn=137723 + 4 np=221310 qsp=46850 cbr=0 cng=26 gpc=43161 gps=4634 nf=3529 nn=123110 + 5 np=237332 qsp=48449 cbr=0 cng=54 gpc=47920 gps=3252 nf=201 nn=137456 + 6 np=219995 qsp=46718 cbr=0 cng=50 gpc=42098 gps=6093 nf=4202 nn=120834 + 7 np=249893 qsp=49390 cbr=0 cng=72 gpc=38400 gps=17102 nf=41 nn=144888 +rcu_bh: + 0 np=146741 qsp=1419 cbr=0 cng=6 gpc=0 gps=0 nf=2 nn=145314 + 1 np=155792 qsp=12597 cbr=0 cng=0 gpc=4 gps=8 nf=3 nn=143180 + 2 np=136629 qsp=18680 cbr=0 cng=0 gpc=7 gps=6 nf=0 nn=117936 + 3 np=137723 qsp=2843 cbr=0 cng=0 gpc=10 gps=7 nf=0 nn=134863 + 4 np=123110 qsp=12433 cbr=0 cng=0 gpc=4 gps=2 nf=0 nn=110671 + 5 np=137456 qsp=4210 cbr=0 cng=0 gpc=6 gps=5 nf=0 nn=133235 + 6 np=120834 qsp=9902 cbr=0 cng=0 gpc=6 gps=3 nf=2 nn=110921 + 7 np=144888 qsp=26336 cbr=0 cng=0 gpc=8 gps=2 nf=0 nn=118542 + +As always, this is once again split into "rcu" and "rcu_bh" portions. +The fields are as follows: + +o "np" is the number of times that __rcu_pending() has been invoked + for the corresponding flavor of RCU. + +o "qsp" is the number of times that the RCU was waiting for a + quiescent state from this CPU. + +o "cbr" is the number of times that this CPU had RCU callbacks + that had passed through a grace period, and were thus ready + to be invoked. + +o "cng" is the number of times that this CPU needed another + grace period while RCU was idle. + +o "gpc" is the number of times that an old grace period had + completed, but this CPU was not yet aware of it. + +o "gps" is the number of times that a new grace period had started, + but this CPU was not yet aware of it. + +o "nf" is the number of times that this CPU suspected that the + current grace period had run for too long, and thus needed to + be forced. + + Please note that "forcing" consists of sending resched IPIs + to holdout CPUs. If that CPU really still is in an old RCU + read-side critical section, then we really do have to wait for it. + The assumption behing "forcing" is that the CPU is not still in + an old RCU read-side critical section, but has not yet responded + for some other reason. + +o "nn" is the number of times that this CPU needed nothing. Alert + readers will note that the rcu "nn" number for a given CPU very + closely matches the rcu_bh "np" number for that same CPU. This + is due to short-circuit evaluation in rcu_pending(). -- cgit v0.10.2 From 66aa230e437d89ca56224135f617e2d8e391a3ef Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Tue, 14 Apr 2009 12:54:29 +0530 Subject: x86: page_types.h unification of declarations Impact: unification of declarations, cleanup Unification of declarations: moved init_memory_mapping, initmem_init and free_initmem from page_XX_types.h to page_types.h Signed-off-by: Jaswinder Singh Rajput Acked-by: Pekka Enberg Cc: Andrew Morton LKML-Reference: <1239693869.3033.31.camel@ht.satnam> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h index 0f915ae..6f1b733 100644 --- a/arch/x86/include/asm/page_32_types.h +++ b/arch/x86/include/asm/page_32_types.h @@ -54,10 +54,6 @@ extern unsigned int __VMALLOC_RESERVE; extern int sysctl_legacy_va_layout; extern void find_low_pfn_range(void); -extern unsigned long init_memory_mapping(unsigned long start, - unsigned long end); -extern void initmem_init(unsigned long, unsigned long); -extern void free_initmem(void); extern void setup_bootmem_allocator(void); #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index d38c91b..3f58718 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -71,12 +71,6 @@ extern unsigned long __phys_addr(unsigned long); #define vmemmap ((struct page *)VMEMMAP_START) -extern unsigned long init_memory_mapping(unsigned long start, - unsigned long end); - -extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn); -extern void free_initmem(void); - extern void init_extra_mapping_uc(unsigned long phys, unsigned long size); extern void init_extra_mapping_wb(unsigned long phys, unsigned long size); diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index 826ad37..6473f5c 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -46,6 +46,12 @@ extern int devmem_is_allowed(unsigned long pagenr); extern unsigned long max_low_pfn_mapped; extern unsigned long max_pfn_mapped; +extern unsigned long init_memory_mapping(unsigned long start, + unsigned long end); + +extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn); +extern void free_initmem(void); + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_PAGE_DEFS_H */ -- cgit v0.10.2 From e7d43a74cb07cbc4b8e9b5e4a914816b33fb0719 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Tue, 14 Apr 2009 13:18:28 +0530 Subject: x86: avoid multiple declaration of kstack_depth_to_print Impact: cleanup asm/stacktrace.h is more appropriate so removing other 2 declarations. Signed-off-by: Jaswinder Singh Rajput Cc: Neil Horman LKML-Reference: <1239695308.3033.34.camel@ht.satnam> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 0d53425..9aa3ab2 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -74,7 +74,6 @@ static inline int get_si_code(unsigned long condition) } extern int panic_on_unrecovered_nmi; -extern int kstack_depth_to_print; void math_error(void __user *); void math_emulate(struct math_emu_info *); diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h index da87590b..81086c2 100644 --- a/arch/x86/kernel/dumpstack.h +++ b/arch/x86/kernel/dumpstack.h @@ -29,7 +29,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *sp, unsigned long bp, char *log_lvl); extern unsigned int code_bytes; -extern int kstack_depth_to_print; /* The form of the top of the frame on the stack */ struct stack_frame { -- cgit v0.10.2 From f711f6090a81cbd396b63de90f415d33f563af9b Mon Sep 17 00:00:00 2001 From: Gautham R Shenoy Date: Tue, 14 Apr 2009 10:25:30 +0530 Subject: sched: Nominate idle load balancer from a semi-idle package. Currently the nomination of idle-load balancer is done by choosing the first idle cpu in the nohz.cpu_mask. This may not be power-efficient, since such an idle cpu could come from a completely idle core/package thereby preventing the whole core/package from being in a low-power state. For eg, consider a quad-core dual package system. The cpu numbering need not be sequential and can something like [0, 2, 4, 6] and [1, 3, 5, 7]. With sched_mc/smt_power_savings and the power-aware IRQ balance, we try to keep as fewer Packages/Cores active. But the current idle load balancer logic goes against this by choosing the first_cpu in the nohz.cpu_mask and not taking the system topology into consideration. Improve the algorithm to nominate the idle load balancer from a semi idle cores/packages thereby increasing the probability of the cores/packages being in deeper sleep states for longer duration. The algorithm is activated only when sched_mc/smt_power_savings != 0. Signed-off-by: Gautham R Shenoy Acked-by: Peter Zijlstra LKML-Reference: <20090414045530.7645.12175.stgit@sofia.in.ibm.com> Signed-off-by: Ingo Molnar diff --git a/kernel/sched.c b/kernel/sched.c index 5724508..b0fefa3 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4240,10 +4240,126 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) static struct { atomic_t load_balancer; cpumask_var_t cpu_mask; + cpumask_var_t ilb_grp_nohz_mask; } nohz ____cacheline_aligned = { .load_balancer = ATOMIC_INIT(-1), }; +#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) +/** + * lowest_flag_domain - Return lowest sched_domain containing flag. + * @cpu: The cpu whose lowest level of sched domain is to + * be returned. + * @flag: The flag to check for the lowest sched_domain + * for the given cpu. + * + * Returns the lowest sched_domain of a cpu which contains the given flag. + */ +static inline struct sched_domain *lowest_flag_domain(int cpu, int flag) +{ + struct sched_domain *sd; + + for_each_domain(cpu, sd) + if (sd && (sd->flags & flag)) + break; + + return sd; +} + +/** + * for_each_flag_domain - Iterates over sched_domains containing the flag. + * @cpu: The cpu whose domains we're iterating over. + * @sd: variable holding the value of the power_savings_sd + * for cpu. + * @flag: The flag to filter the sched_domains to be iterated. + * + * Iterates over all the scheduler domains for a given cpu that has the 'flag' + * set, starting from the lowest sched_domain to the highest. + */ +#define for_each_flag_domain(cpu, sd, flag) \ + for (sd = lowest_flag_domain(cpu, flag); \ + (sd && (sd->flags & flag)); sd = sd->parent) + +/** + * is_semi_idle_group - Checks if the given sched_group is semi-idle. + * @ilb_group: group to be checked for semi-idleness + * + * Returns: 1 if the group is semi-idle. 0 otherwise. + * + * We define a sched_group to be semi idle if it has atleast one idle-CPU + * and atleast one non-idle CPU. This helper function checks if the given + * sched_group is semi-idle or not. + */ +static inline int is_semi_idle_group(struct sched_group *ilb_group) +{ + cpumask_and(nohz.ilb_grp_nohz_mask, nohz.cpu_mask, + sched_group_cpus(ilb_group)); + + /* + * A sched_group is semi-idle when it has atleast one busy cpu + * and atleast one idle cpu. + */ + if (cpumask_empty(nohz.ilb_grp_nohz_mask)) + return 0; + + if (cpumask_equal(nohz.ilb_grp_nohz_mask, sched_group_cpus(ilb_group))) + return 0; + + return 1; +} +/** + * find_new_ilb - Finds the optimum idle load balancer for nomination. + * @cpu: The cpu which is nominating a new idle_load_balancer. + * + * Returns: Returns the id of the idle load balancer if it exists, + * Else, returns >= nr_cpu_ids. + * + * This algorithm picks the idle load balancer such that it belongs to a + * semi-idle powersavings sched_domain. The idea is to try and avoid + * completely idle packages/cores just for the purpose of idle load balancing + * when there are other idle cpu's which are better suited for that job. + */ +static int find_new_ilb(int cpu) +{ + struct sched_domain *sd; + struct sched_group *ilb_group; + + /* + * Have idle load balancer selection from semi-idle packages only + * when power-aware load balancing is enabled + */ + if (!(sched_smt_power_savings || sched_mc_power_savings)) + goto out_done; + + /* + * Optimize for the case when we have no idle CPUs or only one + * idle CPU. Don't walk the sched_domain hierarchy in such cases + */ + if (cpumask_weight(nohz.cpu_mask) < 2) + goto out_done; + + for_each_flag_domain(cpu, sd, SD_POWERSAVINGS_BALANCE) { + ilb_group = sd->groups; + + do { + if (is_semi_idle_group(ilb_group)) + return cpumask_first(nohz.ilb_grp_nohz_mask); + + ilb_group = ilb_group->next; + + } while (ilb_group != sd->groups); + } + +out_done: + return cpumask_first(nohz.cpu_mask); +} +#else /* (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */ +static inline int find_new_ilb(int call_cpu) +{ + return first_cpu(nohz.cpu_mask); +} +#endif + /* * This routine will try to nominate the ilb (idle load balancing) * owner among the cpus whose ticks are stopped. ilb owner will do the idle @@ -4468,15 +4584,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu) } if (atomic_read(&nohz.load_balancer) == -1) { - /* - * simple selection for now: Nominate the - * first cpu in the nohz list to be the next - * ilb owner. - * - * TBD: Traverse the sched domains and nominate - * the nearest cpu in the nohz.cpu_mask. - */ - int ilb = cpumask_first(nohz.cpu_mask); + int ilb = find_new_ilb(cpu); if (ilb < nr_cpu_ids) resched_cpu(ilb); @@ -9051,6 +9159,7 @@ void __init sched_init(void) #ifdef CONFIG_SMP #ifdef CONFIG_NO_HZ alloc_bootmem_cpumask_var(&nohz.cpu_mask); + alloc_bootmem_cpumask_var(&nohz.ilb_grp_nohz_mask); #endif alloc_bootmem_cpumask_var(&cpu_isolated_map); #endif /* SMP */ -- cgit v0.10.2 From e790fb0ba64bfec158e1219d899cb588275d12ab Mon Sep 17 00:00:00 2001 From: Gautham R Shenoy Date: Tue, 14 Apr 2009 10:25:35 +0530 Subject: sched: Nominate a power-efficient ilb in select_nohz_balancer() The CPU that first goes idle becomes the idle-load-balancer and remains that until either it picks up a task or till all the CPUs of the system goes idle. Optimize this further to allow it to relinquish it's post once all it's siblings in the power-aware sched_domain go idle, thereby allowing the whole package-core to go idle. While relinquising the post, nominate another an idle-load balancer from a semi-idle core/package. Signed-off-by: Gautham R Shenoy Acked-by: Peter Zijlstra LKML-Reference: <20090414045535.7645.31641.stgit@sofia.in.ibm.com> Signed-off-by: Ingo Molnar diff --git a/kernel/sched.c b/kernel/sched.c index b0fefa3..36d213b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4414,8 +4414,24 @@ int select_nohz_load_balancer(int stop_tick) /* make me the ilb owner */ if (atomic_cmpxchg(&nohz.load_balancer, -1, cpu) == -1) return 1; - } else if (atomic_read(&nohz.load_balancer) == cpu) + } else if (atomic_read(&nohz.load_balancer) == cpu) { + int new_ilb; + + if (!(sched_smt_power_savings || + sched_mc_power_savings)) + return 1; + /* + * Check to see if there is a more power-efficient + * ilb. + */ + new_ilb = find_new_ilb(cpu); + if (new_ilb < nr_cpu_ids && new_ilb != cpu) { + atomic_set(&nohz.load_balancer, -1); + resched_cpu(new_ilb); + return 0; + } return 1; + } } else { if (!cpumask_test_cpu(cpu, nohz.cpu_mask)) return 0; -- cgit v0.10.2 From 6424fb38667fffbbb1b90be0ffd9a0c540db6a4b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 13 Apr 2009 23:51:46 -0700 Subject: x86: remove (null) in /sys kernel_page_tables Impact: cleanup %p prints out 0x000000000000000 as (null) so use %lx instead. Signed-off-by: Yinghai Lu LKML-Reference: <49E43282.1090607@kernel.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index e7277cb..a725b7f 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -161,13 +161,14 @@ static void note_page(struct seq_file *m, struct pg_state *st, st->current_address >= st->marker[1].start_address) { const char *unit = units; unsigned long delta; + int width = sizeof(unsigned long) * 2; /* * Now print the actual finished series */ - seq_printf(m, "0x%p-0x%p ", - (void *)st->start_address, - (void *)st->current_address); + seq_printf(m, "0x%0*lx-0x%0*lx ", + width, st->start_address, + width, st->current_address); delta = (st->current_address - st->start_address) >> 10; while (!(delta & 1023) && unit[1]) { -- cgit v0.10.2 From 02bec490450836ebbd628e97ec03f10b57def8ce Mon Sep 17 00:00:00 2001 From: Tim Blechmann Date: Tue, 24 Mar 2009 12:24:35 +0100 Subject: ALSA: lx6464es - driver for the digigram lx6464es interface prototype of a driver for the digigram lx6464es 64 channel ethersound interface. Signed-off-by: Tim Blechmann Signed-off-by: Takashi Iwai diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index ee98cd5..2b1a695 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1005,6 +1005,7 @@ #define PCI_DEVICE_ID_PLX_PCI200SYN 0x3196 #define PCI_DEVICE_ID_PLX_9030 0x9030 #define PCI_DEVICE_ID_PLX_9050 0x9050 +#define PCI_DEVICE_ID_PLX_9056 0x9056 #define PCI_DEVICE_ID_PLX_9080 0x9080 #define PCI_DEVICE_ID_PLX_GTEK_SERIAL2 0xa001 @@ -1847,6 +1848,10 @@ #define PCI_SUBDEVICE_ID_HYPERCOPE_METRO 0x0107 #define PCI_SUBDEVICE_ID_HYPERCOPE_CHAMP2 0x0108 +#define PCI_VENDOR_ID_DIGIGRAM 0x1369 +#define PCI_SUBDEVICE_ID_DIGIGRAM_LX6464ES_SERIAL_SUBSYSTEM 0xc001 +#define PCI_SUBDEVICE_ID_DIGIGRAM_LX6464ES_CAE_SERIAL_SUBSYSTEM 0xc002 + #define PCI_VENDOR_ID_KAWASAKI 0x136b #define PCI_DEVICE_ID_MCHIP_KL5A72002 0xff01 diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig index 93422e3..e912b70 100644 --- a/sound/pci/Kconfig +++ b/sound/pci/Kconfig @@ -622,6 +622,16 @@ config SND_KORG1212 To compile this driver as a module, choose M here: the module will be called snd-korg1212. +config SND_LX6464ES + tristate "Digigram LX6464ES" + select SND_PCM + help + Say Y here to include support for Digigram LX6464ES boards. + + To compile this driver as a module, choose M here: the module + will be called snd-lx6464es. + + config SND_MAESTRO3 tristate "ESS Allegro/Maestro3" select SND_AC97_CODEC diff --git a/sound/pci/Makefile b/sound/pci/Makefile index 65b25d2..7d83e08 100644 --- a/sound/pci/Makefile +++ b/sound/pci/Makefile @@ -62,6 +62,7 @@ obj-$(CONFIG_SND) += \ ca0106/ \ cs46xx/ \ cs5535audio/ \ + lx6464es/ \ echoaudio/ \ emu10k1/ \ hda/ \ diff --git a/sound/pci/lx6464es/Makefile b/sound/pci/lx6464es/Makefile new file mode 100644 index 0000000..eb04a6c --- /dev/null +++ b/sound/pci/lx6464es/Makefile @@ -0,0 +1,2 @@ +snd-lx6464es-objs := lx6464es.o lx_core.o +obj-$(CONFIG_SND_LX6464ES) += snd-lx6464es.o diff --git a/sound/pci/lx6464es/lx6464es.c b/sound/pci/lx6464es/lx6464es.c new file mode 100644 index 0000000..7bc8b8c --- /dev/null +++ b/sound/pci/lx6464es/lx6464es.c @@ -0,0 +1,1152 @@ +/* -*- linux-c -*- * + * + * ALSA driver for the digigram lx6464es interface + * + * Copyright (c) 2008, 2009 Tim Blechmann + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + * + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include "lx6464es.h" + +MODULE_AUTHOR("Tim Blechmann"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("digigram lx6464es"); +MODULE_SUPPORTED_DEVICE("{digigram lx6464es{}}"); + + +static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; +static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; +static int enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP; + +static const char card_name[] = "LX6464ES"; + + +#define PCI_DEVICE_ID_PLX_LX6464ES PCI_DEVICE_ID_PLX_9056 + +static struct pci_device_id snd_lx6464es_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_LX6464ES), + .subvendor = PCI_VENDOR_ID_DIGIGRAM, + .subdevice = PCI_SUBDEVICE_ID_DIGIGRAM_LX6464ES_SERIAL_SUBSYSTEM + }, /* LX6464ES */ + { PCI_DEVICE(PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_LX6464ES), + .subvendor = PCI_VENDOR_ID_DIGIGRAM, + .subdevice = PCI_SUBDEVICE_ID_DIGIGRAM_LX6464ES_CAE_SERIAL_SUBSYSTEM + }, /* LX6464ES-CAE */ + { 0, }, +}; + +MODULE_DEVICE_TABLE(pci, snd_lx6464es_ids); + + + +/* PGO pour USERo dans le registre pci_0x06/loc_0xEC */ +#define CHIPSC_RESET_XILINX (1L<<16) + + +/* alsa callbacks */ +static struct snd_pcm_hardware lx_caps = { + .info = (SNDRV_PCM_INFO_MMAP | + SNDRV_PCM_INFO_INTERLEAVED | + SNDRV_PCM_INFO_MMAP_VALID | + SNDRV_PCM_INFO_SYNC_START), + .formats = (SNDRV_PCM_FMTBIT_S16_LE | + SNDRV_PCM_FMTBIT_S16_BE | + SNDRV_PCM_FMTBIT_S24_3LE | + SNDRV_PCM_FMTBIT_S24_3BE), + .rates = (SNDRV_PCM_RATE_CONTINUOUS | + SNDRV_PCM_RATE_8000_192000), + .rate_min = 8000, + .rate_max = 192000, + .channels_min = 2, + .channels_max = 64, + .buffer_bytes_max = 64*2*3*MICROBLAZE_IBL_MAX*MAX_STREAM_BUFFER, + .period_bytes_min = (2*2*MICROBLAZE_IBL_MIN*2), + .period_bytes_max = (4*64*MICROBLAZE_IBL_MAX*MAX_STREAM_BUFFER), + .periods_min = 2, + .periods_max = MAX_STREAM_BUFFER, +}; + +static int lx_set_granularity(struct lx6464es *chip, u32 gran); + + +static int lx_hardware_open(struct lx6464es *chip, + struct snd_pcm_substream *substream) +{ + int err = 0; + struct snd_pcm_runtime *runtime = substream->runtime; + int channels = runtime->channels; + int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE); + + snd_pcm_uframes_t period_size = runtime->period_size; + + snd_printd(LXP "allocating pipe for %d channels\n", channels); + err = lx_pipe_allocate(chip, 0, is_capture, channels); + if (err < 0) { + snd_printk(KERN_ERR LXP "allocating pipe failed\n"); + return err; + } + + err = lx_set_granularity(chip, period_size); + if (err < 0) { + snd_printk(KERN_ERR LXP "setting granularity to %ld failed\n", + period_size); + return err; + } + + return 0; +} + +static int lx_hardware_start(struct lx6464es *chip, + struct snd_pcm_substream *substream) +{ + int err = 0; + struct snd_pcm_runtime *runtime = substream->runtime; + int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE); + + snd_printd(LXP "setting stream format\n"); + err = lx_stream_set_format(chip, runtime, 0, is_capture); + if (err < 0) { + snd_printk(KERN_ERR LXP "setting stream format failed\n"); + return err; + } + + snd_printd(LXP "starting pipe\n"); + err = lx_pipe_start(chip, 0, is_capture); + if (err < 0) { + snd_printk(KERN_ERR LXP "starting pipe failed\n"); + return err; + } + + snd_printd(LXP "waiting for pipe to start\n"); + err = lx_pipe_wait_for_start(chip, 0, is_capture); + if (err < 0) { + snd_printk(KERN_ERR LXP "waiting for pipe failed\n"); + return err; + } + + return err; +} + + +static int lx_hardware_stop(struct lx6464es *chip, + struct snd_pcm_substream *substream) +{ + int err = 0; + int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE); + + snd_printd(LXP "pausing pipe\n"); + err = lx_pipe_pause(chip, 0, is_capture); + if (err < 0) { + snd_printk(KERN_ERR LXP "pausing pipe failed\n"); + return err; + } + + snd_printd(LXP "waiting for pipe to become idle\n"); + err = lx_pipe_wait_for_idle(chip, 0, is_capture); + if (err < 0) { + snd_printk(KERN_ERR LXP "waiting for pipe failed\n"); + return err; + } + + snd_printd(LXP "stopping pipe\n"); + err = lx_pipe_stop(chip, 0, is_capture); + if (err < 0) { + snd_printk(LXP "stopping pipe failed\n"); + return err; + } + + return err; +} + + +static int lx_hardware_close(struct lx6464es *chip, + struct snd_pcm_substream *substream) +{ + int err = 0; + int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE); + + snd_printd(LXP "releasing pipe\n"); + err = lx_pipe_release(chip, 0, is_capture); + if (err < 0) { + snd_printk(LXP "releasing pipe failed\n"); + return err; + } + + return err; +} + + +static int lx_pcm_open(struct snd_pcm_substream *substream) +{ + struct lx6464es *chip = snd_pcm_substream_chip(substream); + struct snd_pcm_runtime *runtime = substream->runtime; + int err = 0; + int board_rate; + + snd_printdd("->lx_pcm_open\n"); + mutex_lock(&chip->setup_mutex); + + /* copy the struct snd_pcm_hardware struct */ + runtime->hw = lx_caps; + +#if 0 + /* buffer-size should better be multiple of period-size */ + err = snd_pcm_hw_constraint_integer(runtime, + SNDRV_PCM_HW_PARAM_PERIODS); + if (err < 0) { + snd_printk(KERN_WARNING LXP "could not constrain periods\n"); + goto exit; + } +#endif + + /* the clock rate cannot be changed */ + board_rate = chip->board_sample_rate; + err = snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_RATE, + board_rate, board_rate); + + if (err < 0) { + snd_printk(KERN_WARNING LXP "could not constrain periods\n"); + goto exit; + } + + /* constrain period size */ + err = snd_pcm_hw_constraint_minmax(runtime, + SNDRV_PCM_HW_PARAM_PERIOD_SIZE, + MICROBLAZE_IBL_MIN, + MICROBLAZE_IBL_MAX); + if (err < 0) { + snd_printk(KERN_WARNING LXP + "could not constrain period size\n"); + goto exit; + } + + snd_pcm_hw_constraint_step(runtime, 0, + SNDRV_PCM_HW_PARAM_BUFFER_SIZE, 32); + + snd_pcm_set_sync(substream); + err = 0; + +exit: + runtime->private_data = chip; + + mutex_unlock(&chip->setup_mutex); + snd_printdd("<-lx_pcm_open, %d\n", err); + return err; +} + +static int lx_pcm_close(struct snd_pcm_substream *substream) +{ + int err = 0; + snd_printdd("->lx_pcm_close\n"); + return err; +} + +static snd_pcm_uframes_t lx_pcm_stream_pointer(struct snd_pcm_substream + *substream) +{ + struct lx6464es *chip = snd_pcm_substream_chip(substream); + snd_pcm_uframes_t pos; + unsigned long flags; + int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE); + + struct lx_stream *lx_stream = is_capture ? &chip->capture_stream : + &chip->playback_stream; + + snd_printdd("->lx_pcm_stream_pointer\n"); + + spin_lock_irqsave(&chip->lock, flags); + pos = lx_stream->frame_pos * substream->runtime->period_size; + spin_unlock_irqrestore(&chip->lock, flags); + + snd_printdd(LXP "stream_pointer at %ld\n", pos); + return pos; +} + +static int lx_pcm_prepare(struct snd_pcm_substream *substream) +{ + struct lx6464es *chip = snd_pcm_substream_chip(substream); + int err = 0; + const int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE); + + snd_printdd("->lx_pcm_prepare\n"); + + mutex_lock(&chip->setup_mutex); + + if (chip->hardware_running[is_capture]) { + err = lx_hardware_stop(chip, substream); + if (err < 0) { + snd_printk(KERN_ERR LXP "failed to stop hardware. " + "Error code %d\n", err); + goto exit; + } + + err = lx_hardware_close(chip, substream); + if (err < 0) { + snd_printk(KERN_ERR LXP "failed to close hardware. " + "Error code %d\n", err); + goto exit; + } + } + + snd_printd(LXP "opening hardware\n"); + err = lx_hardware_open(chip, substream); + if (err < 0) { + snd_printk(KERN_ERR LXP "failed to open hardware. " + "Error code %d\n", err); + goto exit; + } + + err = lx_hardware_start(chip, substream); + if (err < 0) { + snd_printk(KERN_ERR LXP "failed to start hardware. " + "Error code %d\n", err); + goto exit; + } + + chip->hardware_running[is_capture] = 1; + + if (chip->board_sample_rate != substream->runtime->rate) { + if (!err) + chip->board_sample_rate = substream->runtime->rate; + } + +exit: + mutex_unlock(&chip->setup_mutex); + return err; +} + +static int lx_pcm_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *hw_params, int is_capture) +{ + struct lx6464es *chip = snd_pcm_substream_chip(substream); + int err = 0; + + snd_printdd("->lx_pcm_hw_params\n"); + + mutex_lock(&chip->setup_mutex); + + /* set dma buffer */ + err = snd_pcm_lib_malloc_pages(substream, + params_buffer_bytes(hw_params)); + + if (is_capture) + chip->capture_stream.stream = substream; + else + chip->playback_stream.stream = substream; + + mutex_unlock(&chip->setup_mutex); + return err; +} + +static int lx_pcm_hw_params_playback(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *hw_params) +{ + return lx_pcm_hw_params(substream, hw_params, 0); +} + +static int lx_pcm_hw_params_capture(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *hw_params) +{ + return lx_pcm_hw_params(substream, hw_params, 1); +} + +static int lx_pcm_hw_free(struct snd_pcm_substream *substream) +{ + struct lx6464es *chip = snd_pcm_substream_chip(substream); + int err = 0; + int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE); + + snd_printdd("->lx_pcm_hw_free\n"); + mutex_lock(&chip->setup_mutex); + + if (chip->hardware_running[is_capture]) { + err = lx_hardware_stop(chip, substream); + if (err < 0) { + snd_printk(KERN_ERR LXP "failed to stop hardware. " + "Error code %d\n", err); + goto exit; + } + + err = lx_hardware_close(chip, substream); + if (err < 0) { + snd_printk(KERN_ERR LXP "failed to close hardware. " + "Error code %d\n", err); + goto exit; + } + + chip->hardware_running[is_capture] = 0; + } + + err = snd_pcm_lib_free_pages(substream); + + if (is_capture) + chip->capture_stream.stream = 0; + else + chip->playback_stream.stream = 0; + +exit: + mutex_unlock(&chip->setup_mutex); + return err; +} + +static void lx_trigger_start(struct lx6464es *chip, struct lx_stream *lx_stream) +{ + struct snd_pcm_substream *substream = lx_stream->stream; + const int is_capture = lx_stream->is_capture; + + int err; + + const u32 channels = substream->runtime->channels; + const u32 bytes_per_frame = channels * 3; + const u32 period_size = substream->runtime->period_size; + const u32 periods = substream->runtime->periods; + const u32 period_bytes = period_size * bytes_per_frame; + + dma_addr_t buf = substream->dma_buffer.addr; + int i; + + u32 needed, freed; + u32 size_array[5]; + + for (i = 0; i != periods; ++i) { + u32 buffer_index = 0; + + err = lx_buffer_ask(chip, 0, is_capture, &needed, &freed, + size_array); + snd_printdd(LXP "starting: needed %d, freed %d\n", + needed, freed); + + err = lx_buffer_give(chip, 0, is_capture, period_bytes, + lower_32_bits(buf), upper_32_bits(buf), + &buffer_index); + + snd_printdd(LXP "starting: buffer index %x on %p (%d bytes)\n", + buffer_index, (void *)buf, period_bytes); + buf += period_bytes; + } + + err = lx_buffer_ask(chip, 0, is_capture, &needed, &freed, size_array); + snd_printdd(LXP "starting: needed %d, freed %d\n", needed, freed); + + snd_printd(LXP "starting: starting stream\n"); + err = lx_stream_start(chip, 0, is_capture); + if (err < 0) + snd_printk(KERN_ERR LXP "couldn't start stream\n"); + else + lx_stream->status = LX_STREAM_STATUS_RUNNING; + + lx_stream->frame_pos = 0; +} + +static void lx_trigger_stop(struct lx6464es *chip, struct lx_stream *lx_stream) +{ + const int is_capture = lx_stream->is_capture; + int err; + + snd_printd(LXP "stopping: stopping stream\n"); + err = lx_stream_stop(chip, 0, is_capture); + if (err < 0) + snd_printk(KERN_ERR LXP "couldn't stop stream\n"); + else + lx_stream->status = LX_STREAM_STATUS_FREE; + +} + +static void lx_trigger_tasklet_dispatch_stream(struct lx6464es *chip, + struct lx_stream *lx_stream) +{ + switch (lx_stream->status) { + case LX_STREAM_STATUS_SCHEDULE_RUN: + lx_trigger_start(chip, lx_stream); + break; + + case LX_STREAM_STATUS_SCHEDULE_STOP: + lx_trigger_stop(chip, lx_stream); + break; + + default: + break; + } +} + +static void lx_trigger_tasklet(unsigned long data) +{ + struct lx6464es *chip = (struct lx6464es *)data; + unsigned long flags; + + snd_printdd("->lx_trigger_tasklet\n"); + + spin_lock_irqsave(&chip->lock, flags); + lx_trigger_tasklet_dispatch_stream(chip, &chip->capture_stream); + lx_trigger_tasklet_dispatch_stream(chip, &chip->playback_stream); + spin_unlock_irqrestore(&chip->lock, flags); +} + +static int lx_pcm_trigger_dispatch(struct lx6464es *chip, + struct lx_stream *lx_stream, int cmd) +{ + int err = 0; + + switch (cmd) { + case SNDRV_PCM_TRIGGER_START: + lx_stream->status = LX_STREAM_STATUS_SCHEDULE_RUN; + break; + + case SNDRV_PCM_TRIGGER_STOP: + lx_stream->status = LX_STREAM_STATUS_SCHEDULE_STOP; + break; + + default: + err = -EINVAL; + goto exit; + } + tasklet_schedule(&chip->trigger_tasklet); + +exit: + return err; +} + + +static int lx_pcm_trigger(struct snd_pcm_substream *substream, int cmd) +{ + struct lx6464es *chip = snd_pcm_substream_chip(substream); + const int is_capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE); + struct lx_stream *stream = is_capture ? &chip->capture_stream : + &chip->playback_stream; + + snd_printdd("->lx_pcm_trigger\n"); + + return lx_pcm_trigger_dispatch(chip, stream, cmd); +} + +static int snd_lx6464es_free(struct lx6464es *chip) +{ + snd_printdd("->snd_lx6464es_free\n"); + + lx_irq_disable(chip); + + if (chip->irq >= 0) + free_irq(chip->irq, chip); + + iounmap(chip->port_dsp_bar); + ioport_unmap(chip->port_plx_remapped); + + pci_release_regions(chip->pci); + pci_disable_device(chip->pci); + + kfree(chip); + + return 0; +} + +static int snd_lx6464es_dev_free(struct snd_device *device) +{ + return snd_lx6464es_free(device->device_data); +} + +/* reset the dsp during initialization */ +static int __devinit lx_init_xilinx_reset(struct lx6464es *chip) +{ + int i; + u32 plx_reg = lx_plx_reg_read(chip, ePLX_CHIPSC); + + snd_printdd("->lx_init_xilinx_reset\n"); + + /* activate reset of xilinx */ + plx_reg &= ~CHIPSC_RESET_XILINX; + + lx_plx_reg_write(chip, ePLX_CHIPSC, plx_reg); + msleep(1); + + lx_plx_reg_write(chip, ePLX_MBOX3, 0); + msleep(1); + + plx_reg |= CHIPSC_RESET_XILINX; + lx_plx_reg_write(chip, ePLX_CHIPSC, plx_reg); + + /* deactivate reset of xilinx */ + for (i = 0; i != 100; ++i) { + u32 reg_mbox3; + msleep(10); + reg_mbox3 = lx_plx_reg_read(chip, ePLX_MBOX3); + if (reg_mbox3) { + snd_printd(LXP "xilinx reset done\n"); + snd_printdd(LXP "xilinx took %d loops\n", i); + break; + } + } + + /* todo: add some error handling? */ + + /* clear mr */ + lx_dsp_reg_write(chip, eReg_CSM, 0); + + /* le xilinx ES peut ne pas etre encore pret, on attend. */ + msleep(600); + + return 0; +} + +static int __devinit lx_init_xilinx_test(struct lx6464es *chip) +{ + u32 reg; + + snd_printdd("->lx_init_xilinx_test\n"); + + /* TEST if we have access to Xilinx/MicroBlaze */ + lx_dsp_reg_write(chip, eReg_CSM, 0); + + reg = lx_dsp_reg_read(chip, eReg_CSM); + + if (reg) { + snd_printk(KERN_ERR LXP "Problem: Reg_CSM %x.\n", reg); + + /* PCI9056_SPACE0_REMAP */ + lx_plx_reg_write(chip, ePLX_PCICR, 1); + + reg = lx_dsp_reg_read(chip, eReg_CSM); + if (reg) { + snd_printk(KERN_ERR LXP "Error: Reg_CSM %x.\n", reg); + return -EAGAIN; /* seems to be appropriate */ + } + } + + snd_printd(LXP "Xilinx/MicroBlaze access test successful\n"); + + return 0; +} + +/* initialize ethersound */ +static int __devinit lx_init_ethersound_config(struct lx6464es *chip) +{ + int i; + u32 orig_conf_es = lx_dsp_reg_read(chip, eReg_CONFES); + + u32 default_conf_es = (64 << IOCR_OUTPUTS_OFFSET) | + (64 << IOCR_INPUTS_OFFSET) | + (FREQ_RATIO_SINGLE_MODE << FREQ_RATIO_OFFSET); + + u32 conf_es = (orig_conf_es & CONFES_READ_PART_MASK) + | (default_conf_es & CONFES_WRITE_PART_MASK); + + snd_printdd("->lx_init_ethersound\n"); + + chip->freq_ratio = FREQ_RATIO_SINGLE_MODE; + + /* + * write it to the card ! + * this actually kicks the ES xilinx, the first time since poweron. + * the MAC address in the Reg_ADMACESMSB Reg_ADMACESLSB registers + * is not ready before this is done, and the bit 2 in Reg_CSES is set. + * */ + lx_dsp_reg_write(chip, eReg_CONFES, conf_es); + + for (i = 0; i != 1000; ++i) { + if (lx_dsp_reg_read(chip, eReg_CSES) & 4) { + snd_printd(LXP "ethersound initialized after %dms\n", + i); + goto ethersound_initialized; + } + msleep(1); + } + snd_printk(KERN_WARNING LXP + "ethersound could not be initialized after %dms\n", i); + return -ETIMEDOUT; + + ethersound_initialized: + snd_printd(LXP "ethersound initialized\n"); + return 0; +} + +static int __devinit lx_init_get_version_features(struct lx6464es *chip) +{ + u32 dsp_version; + + int err; + + snd_printdd("->lx_init_get_version_features\n"); + + err = lx_dsp_get_version(chip, &dsp_version); + + if (err == 0) { + u32 freq; + + snd_printk(LXP "DSP version: V%02d.%02d #%d\n", + (dsp_version>>16) & 0xff, (dsp_version>>8) & 0xff, + dsp_version & 0xff); + + /* later: what firmware version do we expect? */ + + /* retrieve Play/Rec features */ + /* done here because we may have to handle alternate + * DSP files. */ + /* later */ + + /* init the EtherSound sample rate */ + err = lx_dsp_get_clock_frequency(chip, &freq); + if (err == 0) + chip->board_sample_rate = freq; + snd_printd(LXP "actual clock frequency %d\n", freq); + } else { + snd_printk(KERN_ERR LXP "DSP corrupted \n"); + err = -EAGAIN; + } + + return err; +} + +static int lx_set_granularity(struct lx6464es *chip, u32 gran) +{ + int err = 0; + u32 snapped_gran = MICROBLAZE_IBL_MIN; + + snd_printdd("->lx_set_granularity\n"); + + /* blocksize is a power of 2 */ + while ((snapped_gran < gran) && + (snapped_gran < MICROBLAZE_IBL_MAX)) { + snapped_gran *= 2; + } + + if (snapped_gran == chip->pcm_granularity) + return 0; + + err = lx_dsp_set_granularity(chip, snapped_gran); + if (err < 0) { + snd_printk(KERN_WARNING LXP "could not set granularity\n"); + err = -EAGAIN; + } + + if (snapped_gran != gran) + snd_printk(LXP "snapped blocksize to %d\n", snapped_gran); + + snd_printd(LXP "set blocksize on board %d\n", snapped_gran); + chip->pcm_granularity = snapped_gran; + + return err; +} + +/* initialize and test the xilinx dsp chip */ +static int __devinit lx_init_dsp(struct lx6464es *chip) +{ + int err; + u8 mac_address[6]; + int i; + + snd_printdd("->lx_init_dsp\n"); + + snd_printd(LXP "initialize board\n"); + err = lx_init_xilinx_reset(chip); + if (err) + return err; + + snd_printd(LXP "testing board\n"); + err = lx_init_xilinx_test(chip); + if (err) + return err; + + snd_printd(LXP "initialize ethersound configuration\n"); + err = lx_init_ethersound_config(chip); + if (err) + return err; + + lx_irq_enable(chip); + + /** \todo the mac address should be ready by not, but it isn't, + * so we wait for it */ + for (i = 0; i != 1000; ++i) { + err = lx_dsp_get_mac(chip, mac_address); + if (err) + return err; + if (mac_address[0] || mac_address[1] || mac_address[2] || + mac_address[3] || mac_address[4] || mac_address[5]) + goto mac_ready; + msleep(1); + } + return -ETIMEDOUT; + +mac_ready: + snd_printd(LXP "mac address ready read after: %dms\n", i); + snd_printk(LXP "mac address: %02X.%02X.%02X.%02X.%02X.%02X\n", + mac_address[0], mac_address[1], mac_address[2], + mac_address[3], mac_address[4], mac_address[5]); + + err = lx_init_get_version_features(chip); + if (err) + return err; + + lx_set_granularity(chip, MICROBLAZE_IBL_DEFAULT); + + chip->playback_mute = 0; + + return err; +} + +static struct snd_pcm_ops lx_ops_playback = { + .open = lx_pcm_open, + .close = lx_pcm_close, + .ioctl = snd_pcm_lib_ioctl, + .prepare = lx_pcm_prepare, + .hw_params = lx_pcm_hw_params_playback, + .hw_free = lx_pcm_hw_free, + .trigger = lx_pcm_trigger, + .pointer = lx_pcm_stream_pointer, +}; + +static struct snd_pcm_ops lx_ops_capture = { + .open = lx_pcm_open, + .close = lx_pcm_close, + .ioctl = snd_pcm_lib_ioctl, + .prepare = lx_pcm_prepare, + .hw_params = lx_pcm_hw_params_capture, + .hw_free = lx_pcm_hw_free, + .trigger = lx_pcm_trigger, + .pointer = lx_pcm_stream_pointer, +}; + +static int __devinit lx_pcm_create(struct lx6464es *chip) +{ + int err; + struct snd_pcm *pcm; + + u32 size = 64 * /* channels */ + 3 * /* 24 bit samples */ + MAX_STREAM_BUFFER * /* periods */ + MICROBLAZE_IBL_MAX * /* frames per period */ + 2; /* duplex */ + + size = PAGE_ALIGN(size); + + /* hardcoded device name & channel count */ + err = snd_pcm_new(chip->card, (char *)card_name, 0, + 1, 1, &pcm); + + pcm->private_data = chip; + + snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &lx_ops_playback); + snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE, &lx_ops_capture); + + pcm->info_flags = 0; + strcpy(pcm->name, card_name); + + err = snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV, + snd_dma_pci_data(chip->pci), + size, size); + if (err < 0) + return err; + + chip->pcm = pcm; + chip->capture_stream.is_capture = 1; + + return 0; +} + +static int lx_control_playback_info(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_info *uinfo) +{ + uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN; + uinfo->count = 1; + uinfo->value.integer.min = 0; + uinfo->value.integer.max = 1; + return 0; +} + +static int lx_control_playback_get(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct lx6464es *chip = snd_kcontrol_chip(kcontrol); + ucontrol->value.integer.value[0] = chip->playback_mute; + return 0; +} + +static int lx_control_playback_put(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct lx6464es *chip = snd_kcontrol_chip(kcontrol); + int changed = 0; + int current_value = chip->playback_mute; + + if (current_value != ucontrol->value.integer.value[0]) { + lx_level_unmute(chip, 0, !current_value); + chip->playback_mute = !current_value; + changed = 1; + } + return changed; +} + +static struct snd_kcontrol_new lx_control_playback_switch __devinitdata = { + .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .name = "PCM Playback Switch", + .index = 0, + .access = SNDRV_CTL_ELEM_ACCESS_READWRITE, + .private_value = 0, + .info = lx_control_playback_info, + .get = lx_control_playback_get, + .put = lx_control_playback_put +}; + + + +static void lx_proc_levels_read(struct snd_info_entry *entry, + struct snd_info_buffer *buffer) +{ + u32 levels[64]; + int err; + int i, j; + struct lx6464es *chip = entry->private_data; + + snd_iprintf(buffer, "capture levels:\n"); + err = lx_level_peaks(chip, 1, 64, levels); + if (err < 0) + return; + + for (i = 0; i != 8; ++i) { + for (j = 0; j != 8; ++j) + snd_iprintf(buffer, "%08x ", levels[i*8+j]); + snd_iprintf(buffer, "\n"); + } + + snd_iprintf(buffer, "\nplayback levels:\n"); + + err = lx_level_peaks(chip, 0, 64, levels); + if (err < 0) + return; + + for (i = 0; i != 8; ++i) { + for (j = 0; j != 8; ++j) + snd_iprintf(buffer, "%08x ", levels[i*8+j]); + snd_iprintf(buffer, "\n"); + } + + snd_iprintf(buffer, "\n"); +} + +static int __devinit lx_proc_create(struct snd_card *card, struct lx6464es *chip) +{ + struct snd_info_entry *entry; + int err = snd_card_proc_new(card, "levels", &entry); + if (err < 0) + return err; + + snd_info_set_text_ops(entry, chip, lx_proc_levels_read); + return 0; +} + + +static int __devinit snd_lx6464es_create(struct snd_card *card, + struct pci_dev *pci, + struct lx6464es **rchip) +{ + struct lx6464es *chip; + int err; + + static struct snd_device_ops ops = { + .dev_free = snd_lx6464es_dev_free, + }; + + snd_printdd("->snd_lx6464es_create\n"); + + *rchip = NULL; + + /* enable PCI device */ + err = pci_enable_device(pci); + if (err < 0) + return err; + + pci_set_master(pci); + + /* check if we can restrict PCI DMA transfers to 32 bits */ + err = pci_set_dma_mask(pci, DMA_32BIT_MASK); + if (err < 0) { + snd_printk(KERN_ERR "architecture does not support " + "32bit PCI busmaster DMA\n"); + pci_disable_device(pci); + return -ENXIO; + } + + chip = kzalloc(sizeof(*chip), GFP_KERNEL); + if (chip == NULL) { + err = -ENOMEM; + goto alloc_failed; + } + + chip->card = card; + chip->pci = pci; + chip->irq = -1; + + /* initialize synchronization structs */ + spin_lock_init(&chip->lock); + spin_lock_init(&chip->msg_lock); + mutex_init(&chip->setup_mutex); + tasklet_init(&chip->trigger_tasklet, lx_trigger_tasklet, + (unsigned long)chip); + tasklet_init(&chip->tasklet_capture, lx_tasklet_capture, + (unsigned long)chip); + tasklet_init(&chip->tasklet_playback, lx_tasklet_playback, + (unsigned long)chip); + + /* request resources */ + err = pci_request_regions(pci, card_name); + if (err < 0) + goto request_regions_failed; + + /* plx port */ + chip->port_plx = pci_resource_start(pci, 1); + chip->port_plx_remapped = ioport_map(chip->port_plx, + pci_resource_len(pci, 1)); + + /* dsp port */ + chip->port_dsp_bar = pci_ioremap_bar(pci, 2); + + err = request_irq(pci->irq, lx_interrupt, IRQF_SHARED, + card_name, chip); + if (err) { + snd_printk(KERN_ERR LXP "unable to grab IRQ %d\n", pci->irq); + goto request_irq_failed; + } + chip->irq = pci->irq; + + err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops); + if (err < 0) + goto device_new_failed; + + err = lx_init_dsp(chip); + if (err < 0) { + snd_printk(KERN_ERR LXP "error during DSP initialization\n"); + return err; + } + + err = lx_pcm_create(chip); + if (err < 0) + return err; + + err = lx_proc_create(card, chip); + if (err < 0) + return err; + + err = snd_ctl_add(card, snd_ctl_new1(&lx_control_playback_switch, + chip)); + if (err < 0) + return err; + + snd_card_set_dev(card, &pci->dev); + + *rchip = chip; + return 0; + +device_new_failed: + free_irq(pci->irq, chip); + +request_irq_failed: + pci_release_regions(pci); + +request_regions_failed: + kfree(chip); + +alloc_failed: + pci_disable_device(pci); + + return err; +} + +static int __devinit snd_lx6464es_probe(struct pci_dev *pci, + const struct pci_device_id *pci_id) +{ + static int dev; + struct snd_card *card; + struct lx6464es *chip; + int err; + + snd_printdd("->snd_lx6464es_probe\n"); + + if (dev >= SNDRV_CARDS) + return -ENODEV; + if (!enable[dev]) { + dev++; + return -ENOENT; + } + + card = snd_card_new(index[dev], id[dev], THIS_MODULE, 0); + if (card == NULL) + return -ENOMEM; + + err = snd_lx6464es_create(card, pci, &chip); + if (err < 0) { + snd_printk(KERN_ERR LXP "error during snd_lx6464es_create\n"); + goto out_free; + } + + strcpy(card->driver, "lx6464es"); + strcpy(card->shortname, "Digigram LX6464ES"); + sprintf(card->longname, "%s at 0x%lx, 0x%p, irq %i", + card->shortname, chip->port_plx, + chip->port_dsp_bar, chip->irq); + + err = snd_card_register(card); + if (err < 0) + goto out_free; + + snd_printdd(LXP "initialization successful\n"); + pci_set_drvdata(pci, card); + dev++; + return 0; + +out_free: + snd_card_free(card); + return err; + +} + +static void __devexit snd_lx6464es_remove(struct pci_dev *pci) +{ + snd_card_free(pci_get_drvdata(pci)); + pci_set_drvdata(pci, NULL); +} + + +static struct pci_driver driver = { + .name = "Digigram LX6464ES", + .id_table = snd_lx6464es_ids, + .probe = snd_lx6464es_probe, + .remove = __devexit_p(snd_lx6464es_remove), +}; + + +/* module initialization */ +static int __init mod_init(void) +{ + return pci_register_driver(&driver); +} + +static void __exit mod_exit(void) +{ + pci_unregister_driver(&driver); +} + +module_init(mod_init); +module_exit(mod_exit); diff --git a/sound/pci/lx6464es/lx6464es.h b/sound/pci/lx6464es/lx6464es.h new file mode 100644 index 0000000..012c010 --- /dev/null +++ b/sound/pci/lx6464es/lx6464es.h @@ -0,0 +1,114 @@ +/* -*- linux-c -*- * + * + * ALSA driver for the digigram lx6464es interface + * + * Copyright (c) 2009 Tim Blechmann + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + * + */ + +#ifndef LX6464ES_H +#define LX6464ES_H + +#include +#include + +#include +#include + +#include "lx_core.h" + +#define LXP "LX6464ES: " + +enum { + ES_cmd_free = 0, /* no command executing */ + ES_cmd_processing = 1, /* execution of a read/write command */ + ES_read_pending = 2, /* a asynchron read command is pending */ + ES_read_finishing = 3, /* a read command has finished waiting (set by + * Interrupt or CancelIrp) */ +}; + +enum lx_stream_status { + LX_STREAM_STATUS_FREE, +/* LX_STREAM_STATUS_OPEN, */ + LX_STREAM_STATUS_SCHEDULE_RUN, +/* LX_STREAM_STATUS_STARTED, */ + LX_STREAM_STATUS_RUNNING, + LX_STREAM_STATUS_SCHEDULE_STOP, +/* LX_STREAM_STATUS_STOPPED, */ +/* LX_STREAM_STATUS_PAUSED */ +}; + + +struct lx_stream { + struct snd_pcm_substream *stream; + snd_pcm_uframes_t frame_pos; + enum lx_stream_status status; /* free, open, running, draining + * pause */ + int is_capture:1; +}; + + +struct lx6464es { + struct snd_card *card; + struct pci_dev *pci; + int irq; + + spinlock_t lock; /* interrupt spinlock */ + struct mutex setup_mutex; /* mutex used in hw_params, open + * and close */ + + struct tasklet_struct trigger_tasklet; /* trigger tasklet */ + struct tasklet_struct tasklet_capture; + struct tasklet_struct tasklet_playback; + + /* ports */ + unsigned long port_plx; /* io port (size=256) */ + void __iomem *port_plx_remapped; /* remapped plx port */ + void __iomem *port_dsp_bar; /* memory port (32-bit, + * non-prefetchable, + * size=8K) */ + + /* messaging */ + spinlock_t msg_lock; /* message spinlock */ + atomic_t send_message_locked; + struct lx_rmh rmh; + + /* configuration */ + uint freq_ratio : 2; + uint playback_mute : 1; + uint hardware_running[2]; + u32 board_sample_rate; /* sample rate read from + * board */ + u32 sample_rate; /* our sample rate */ + u16 pcm_granularity; /* board blocksize */ + + /* dma */ + struct snd_dma_buffer capture_dma_buf; + struct snd_dma_buffer playback_dma_buf; + + /* pcm */ + struct snd_pcm *pcm; + + /* streams */ + struct lx_stream capture_stream; + struct lx_stream playback_stream; +}; + + +#endif /* LX6464ES_H */ diff --git a/sound/pci/lx6464es/lx_core.c b/sound/pci/lx6464es/lx_core.c new file mode 100644 index 0000000..a9f8f88 --- /dev/null +++ b/sound/pci/lx6464es/lx_core.c @@ -0,0 +1,1442 @@ +/* -*- linux-c -*- * + * + * ALSA driver for the digigram lx6464es interface + * low-level interface + * + * Copyright (c) 2009 Tim Blechmann + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + * + */ + +/* #define RMH_DEBUG 1 */ + +#include +#include +#include + +#include "lx6464es.h" +#include "lx_core.h" + +/* low-level register access */ + +static const unsigned long dsp_port_offsets[] = { + 0, + 0x400, + 0x401, + 0x402, + 0x403, + 0x404, + 0x405, + 0x406, + 0x407, + 0x408, + 0x409, + 0x40a, + 0x40b, + 0x40c, + + 0x410, + 0x411, + 0x412, + 0x413, + 0x414, + 0x415, + 0x416, + + 0x420, + 0x430, + 0x431, + 0x432, + 0x433, + 0x434, + 0x440 +}; + +static void __iomem *lx_dsp_register(struct lx6464es *chip, int port) +{ + void __iomem *base_address = chip->port_dsp_bar; + return base_address + dsp_port_offsets[port]*4; +} + +unsigned long lx_dsp_reg_read(struct lx6464es *chip, int port) +{ + void __iomem *address = lx_dsp_register(chip, port); + return ioread32(address); +} + +void lx_dsp_reg_readbuf(struct lx6464es *chip, int port, u32 *data, u32 len) +{ + void __iomem *address = lx_dsp_register(chip, port); + memcpy_fromio(data, address, len*sizeof(u32)); +} + + +void lx_dsp_reg_write(struct lx6464es *chip, int port, unsigned data) +{ + void __iomem *address = lx_dsp_register(chip, port); + iowrite32(data, address); +} + +void lx_dsp_reg_writebuf(struct lx6464es *chip, int port, const u32 *data, + u32 len) +{ + void __iomem *address = lx_dsp_register(chip, port); + memcpy_toio(address, data, len*sizeof(u32)); +} + + +static const unsigned long plx_port_offsets[] = { + 0x04, + 0x40, + 0x44, + 0x48, + 0x4c, + 0x50, + 0x54, + 0x58, + 0x5c, + 0x64, + 0x68, + 0x6C +}; + +static void __iomem *lx_plx_register(struct lx6464es *chip, int port) +{ + void __iomem *base_address = chip->port_plx_remapped; + return base_address + plx_port_offsets[port]; +} + +unsigned long lx_plx_reg_read(struct lx6464es *chip, int port) +{ + void __iomem *address = lx_plx_register(chip, port); + return ioread32(address); +} + +void lx_plx_reg_write(struct lx6464es *chip, int port, u32 data) +{ + void __iomem *address = lx_plx_register(chip, port); + iowrite32(data, address); +} + +u32 lx_plx_mbox_read(struct lx6464es *chip, int mbox_nr) +{ + int index; + + switch (mbox_nr) { + case 1: + index = ePLX_MBOX1; break; + case 2: + index = ePLX_MBOX2; break; + case 3: + index = ePLX_MBOX3; break; + case 4: + index = ePLX_MBOX4; break; + case 5: + index = ePLX_MBOX5; break; + case 6: + index = ePLX_MBOX6; break; + case 7: + index = ePLX_MBOX7; break; + case 0: /* reserved for HF flags */ + snd_BUG(); + default: + return 0xdeadbeef; + } + + return lx_plx_reg_read(chip, index); +} + +int lx_plx_mbox_write(struct lx6464es *chip, int mbox_nr, u32 value) +{ + int index = -1; + + switch (mbox_nr) { + case 1: + index = ePLX_MBOX1; break; + case 3: + index = ePLX_MBOX3; break; + case 4: + index = ePLX_MBOX4; break; + case 5: + index = ePLX_MBOX5; break; + case 6: + index = ePLX_MBOX6; break; + case 7: + index = ePLX_MBOX7; break; + case 0: /* reserved for HF flags */ + case 2: /* reserved for Pipe States + * the DSP keeps an image of it */ + snd_BUG(); + return -EBADRQC; + } + + lx_plx_reg_write(chip, index, value); + return 0; +} + + +/* rmh */ + +#ifdef CONFIG_SND_DEBUG +#define CMD_NAME(a) a +#else +#define CMD_NAME(a) NULL +#endif + +#define Reg_CSM_MR 0x00000002 +#define Reg_CSM_MC 0x00000001 + +struct dsp_cmd_info { + u32 dcCodeOp; /* Op Code of the command (usually 1st 24-bits + * word).*/ + u16 dcCmdLength; /* Command length in words of 24 bits.*/ + u16 dcStatusType; /* Status type: 0 for fixed length, 1 for + * random. */ + u16 dcStatusLength; /* Status length (if fixed).*/ + char *dcOpName; +}; + +/* + Initialization and control data for the Microblaze interface + - OpCode: + the opcode field of the command set at the proper offset + - CmdLength + the number of command words + - StatusType + offset in the status registers: 0 means that the return value may be + different from 0, and must be read + - StatusLength + the number of status words (in addition to the return value) +*/ + +static struct dsp_cmd_info dsp_commands[] = +{ + { (CMD_00_INFO_DEBUG << OPCODE_OFFSET) , 1 /*custom*/ + , 1 , 0 /**/ , CMD_NAME("INFO_DEBUG") }, + { (CMD_01_GET_SYS_CFG << OPCODE_OFFSET) , 1 /**/ + , 1 , 2 /**/ , CMD_NAME("GET_SYS_CFG") }, + { (CMD_02_SET_GRANULARITY << OPCODE_OFFSET) , 1 /**/ + , 1 , 0 /**/ , CMD_NAME("SET_GRANULARITY") }, + { (CMD_03_SET_TIMER_IRQ << OPCODE_OFFSET) , 1 /**/ + , 1 , 0 /**/ , CMD_NAME("SET_TIMER_IRQ") }, + { (CMD_04_GET_EVENT << OPCODE_OFFSET) , 1 /**/ + , 1 , 0 /*up to 10*/ , CMD_NAME("GET_EVENT") }, + { (CMD_05_GET_PIPES << OPCODE_OFFSET) , 1 /**/ + , 1 , 2 /*up to 4*/ , CMD_NAME("GET_PIPES") }, + { (CMD_06_ALLOCATE_PIPE << OPCODE_OFFSET) , 1 /**/ + , 0 , 0 /**/ , CMD_NAME("ALLOCATE_PIPE") }, + { (CMD_07_RELEASE_PIPE << OPCODE_OFFSET) , 1 /**/ + , 0 , 0 /**/ , CMD_NAME("RELEASE_PIPE") }, + { (CMD_08_ASK_BUFFERS << OPCODE_OFFSET) , 1 /**/ + , 1 , MAX_STREAM_BUFFER , CMD_NAME("ASK_BUFFERS") }, + { (CMD_09_STOP_PIPE << OPCODE_OFFSET) , 1 /**/ + , 0 , 0 /*up to 2*/ , CMD_NAME("STOP_PIPE") }, + { (CMD_0A_GET_PIPE_SPL_COUNT << OPCODE_OFFSET) , 1 /**/ + , 1 , 1 /*up to 2*/ , CMD_NAME("GET_PIPE_SPL_COUNT") }, + { (CMD_0B_TOGGLE_PIPE_STATE << OPCODE_OFFSET) , 1 /*up to 5*/ + , 1 , 0 /**/ , CMD_NAME("TOGGLE_PIPE_STATE") }, + { (CMD_0C_DEF_STREAM << OPCODE_OFFSET) , 1 /*up to 4*/ + , 1 , 0 /**/ , CMD_NAME("DEF_STREAM") }, + { (CMD_0D_SET_MUTE << OPCODE_OFFSET) , 3 /**/ + , 1 , 0 /**/ , CMD_NAME("SET_MUTE") }, + { (CMD_0E_GET_STREAM_SPL_COUNT << OPCODE_OFFSET) , 1/**/ + , 1 , 2 /**/ , CMD_NAME("GET_STREAM_SPL_COUNT") }, + { (CMD_0F_UPDATE_BUFFER << OPCODE_OFFSET) , 3 /*up to 4*/ + , 0 , 1 /**/ , CMD_NAME("UPDATE_BUFFER") }, + { (CMD_10_GET_BUFFER << OPCODE_OFFSET) , 1 /**/ + , 1 , 4 /**/ , CMD_NAME("GET_BUFFER") }, + { (CMD_11_CANCEL_BUFFER << OPCODE_OFFSET) , 1 /**/ + , 1 , 1 /*up to 4*/ , CMD_NAME("CANCEL_BUFFER") }, + { (CMD_12_GET_PEAK << OPCODE_OFFSET) , 1 /**/ + , 1 , 1 /**/ , CMD_NAME("GET_PEAK") }, + { (CMD_13_SET_STREAM_STATE << OPCODE_OFFSET) , 1 /**/ + , 1 , 0 /**/ , CMD_NAME("SET_STREAM_STATE") }, +}; + +static void lx_message_init(struct lx_rmh *rmh, enum cmd_mb_opcodes cmd) +{ + snd_BUG_ON(cmd >= CMD_14_INVALID); + + rmh->cmd[0] = dsp_commands[cmd].dcCodeOp; + rmh->cmd_len = dsp_commands[cmd].dcCmdLength; + rmh->stat_len = dsp_commands[cmd].dcStatusLength; + rmh->dsp_stat = dsp_commands[cmd].dcStatusType; + rmh->cmd_idx = cmd; + memset(&rmh->cmd[1], 0, (REG_CRM_NUMBER - 1) * sizeof(u32)); + +#ifdef CONFIG_SND_DEBUG + memset(rmh->stat, 0, REG_CRM_NUMBER * sizeof(u32)); +#endif +#ifdef RMH_DEBUG + rmh->cmd_idx = cmd; +#endif +} + +#ifdef RMH_DEBUG +#define LXRMH "lx6464es rmh: " +static void lx_message_dump(struct lx_rmh *rmh) +{ + u8 idx = rmh->cmd_idx; + int i; + + snd_printk(LXRMH "command %s\n", dsp_commands[idx].dcOpName); + + for (i = 0; i != rmh->cmd_len; ++i) + snd_printk(LXRMH "\tcmd[%d] %08x\n", i, rmh->cmd[i]); + + for (i = 0; i != rmh->stat_len; ++i) + snd_printk(LXRMH "\tstat[%d]: %08x\n", i, rmh->stat[i]); + snd_printk("\n"); +} +#else +static inline void lx_message_dump(struct lx_rmh *rmh) +{} +#endif + + + +/* sleep 500 - 100 = 400 times 100us -> the timeout is >= 40 ms */ +#define XILINX_TIMEOUT_MS 40 +#define XILINX_POLL_NO_SLEEP 100 +#define XILINX_POLL_ITERATIONS 150 + +static int lx_message_send(struct lx6464es *chip, struct lx_rmh *rmh) +{ + u32 reg = ED_DSP_TIMED_OUT; + int dwloop; + int answer_received; + + if (lx_dsp_reg_read(chip, eReg_CSM) & (Reg_CSM_MC | Reg_CSM_MR)) { + snd_printk(KERN_ERR LXP "PIOSendMessage eReg_CSM %x\n", reg); + return -EBUSY; + } + + /* write command */ + lx_dsp_reg_writebuf(chip, eReg_CRM1, rmh->cmd, rmh->cmd_len); + + snd_BUG_ON(atomic_read(&chip->send_message_locked) != 0); + atomic_set(&chip->send_message_locked, 1); + + /* MicoBlaze gogogo */ + lx_dsp_reg_write(chip, eReg_CSM, Reg_CSM_MC); + + /* wait for interrupt to answer */ + for (dwloop = 0; dwloop != XILINX_TIMEOUT_MS; ++dwloop) { + answer_received = atomic_read(&chip->send_message_locked); + if (answer_received == 0) + break; + msleep(1); + } + + if (answer_received == 0) { + /* in Debug mode verify Reg_CSM_MR */ + snd_BUG_ON(!(lx_dsp_reg_read(chip, eReg_CSM) & Reg_CSM_MR)); + + /* command finished, read status */ + if (rmh->dsp_stat == 0) + reg = lx_dsp_reg_read(chip, eReg_CRM1); + else + reg = 0; + } else { + int i; + snd_printk(KERN_WARNING LXP "TIMEOUT lx_message_send! " + "Interrupts disabled?\n"); + + /* attente bit Reg_CSM_MR */ + for (i = 0; i != XILINX_POLL_ITERATIONS; i++) { + if ((lx_dsp_reg_read(chip, eReg_CSM) & Reg_CSM_MR)) { + if (rmh->dsp_stat == 0) + reg = lx_dsp_reg_read(chip, eReg_CRM1); + else + reg = 0; + goto polling_successful; + } + + if (i > XILINX_POLL_NO_SLEEP) + msleep(1); + } + snd_printk(KERN_WARNING LXP "TIMEOUT lx_message_send! " + "polling failed\n"); + +polling_successful: + atomic_set(&chip->send_message_locked, 0); + } + + if ((reg & ERROR_VALUE) == 0) { + /* read response */ + if (rmh->stat_len) { + snd_BUG_ON(rmh->stat_len >= (REG_CRM_NUMBER-1)); + + lx_dsp_reg_readbuf(chip, eReg_CRM2, rmh->stat, + rmh->stat_len); + } + } else + snd_printk(KERN_WARNING LXP "lx_message_send: error_value %x\n", + reg); + + /* clear Reg_CSM_MR */ + lx_dsp_reg_write(chip, eReg_CSM, 0); + + switch (reg) { + case ED_DSP_TIMED_OUT: + snd_printk(KERN_WARNING LXP "lx_message_send: dsp timeout\n"); + return -ETIMEDOUT; + + case ED_DSP_CRASHED: + snd_printk(KERN_WARNING LXP "lx_message_send: dsp crashed\n"); + return -EAGAIN; + } + + lx_message_dump(rmh); + return 0; +} + +static int lx_message_send_atomic(struct lx6464es *chip, struct lx_rmh *rmh) +{ + u32 reg = ED_DSP_TIMED_OUT; + int dwloop; + + if (lx_dsp_reg_read(chip, eReg_CSM) & (Reg_CSM_MC | Reg_CSM_MR)) { + snd_printk(KERN_ERR LXP "PIOSendMessage eReg_CSM %x\n", reg); + return -EBUSY; + } + + /* write command */ + lx_dsp_reg_writebuf(chip, eReg_CRM1, rmh->cmd, rmh->cmd_len); + + /* MicoBlaze gogogo */ + lx_dsp_reg_write(chip, eReg_CSM, Reg_CSM_MC); + + /* wait for interrupt to answer */ + for (dwloop = 0; dwloop != XILINX_TIMEOUT_MS * 1000; ++dwloop) { + if (lx_dsp_reg_read(chip, eReg_CSM) & Reg_CSM_MR) { + if (rmh->dsp_stat == 0) + reg = lx_dsp_reg_read(chip, eReg_CRM1); + else + reg = 0; + goto polling_successful; + } else + udelay(1); + } + snd_printk(KERN_WARNING LXP "TIMEOUT lx_message_send_atomic! " + "polling failed\n"); + +polling_successful: + if ((reg & ERROR_VALUE) == 0) { + /* read response */ + if (rmh->stat_len) { + snd_BUG_ON(rmh->stat_len >= (REG_CRM_NUMBER-1)); + lx_dsp_reg_readbuf(chip, eReg_CRM2, rmh->stat, + rmh->stat_len); + } + } else + snd_printk(LXP "rmh error: %08x\n", reg); + + /* clear Reg_CSM_MR */ + lx_dsp_reg_write(chip, eReg_CSM, 0); + + switch (reg) { + case ED_DSP_TIMED_OUT: + snd_printk(KERN_WARNING LXP "lx_message_send: dsp timeout\n"); + return -ETIMEDOUT; + + case ED_DSP_CRASHED: + snd_printk(KERN_WARNING LXP "lx_message_send: dsp crashed\n"); + return -EAGAIN; + } + + lx_message_dump(rmh); + + return reg; +} + + +/* low-level dsp access */ +int __devinit lx_dsp_get_version(struct lx6464es *chip, u32 *rdsp_version) +{ + u16 ret; + unsigned long flags; + + spin_lock_irqsave(&chip->msg_lock, flags); + + lx_message_init(&chip->rmh, CMD_01_GET_SYS_CFG); + ret = lx_message_send_atomic(chip, &chip->rmh); + + *rdsp_version = chip->rmh.stat[1]; + spin_unlock_irqrestore(&chip->msg_lock, flags); + return ret; +} + +int lx_dsp_get_clock_frequency(struct lx6464es *chip, u32 *rfreq) +{ + u16 ret = 0; + unsigned long flags; + u32 freq_raw = 0; + u32 freq = 0; + u32 frequency = 0; + + spin_lock_irqsave(&chip->msg_lock, flags); + + lx_message_init(&chip->rmh, CMD_01_GET_SYS_CFG); + ret = lx_message_send_atomic(chip, &chip->rmh); + + if (ret == 0) { + freq_raw = chip->rmh.stat[0] >> FREQ_FIELD_OFFSET; + freq = freq_raw & XES_FREQ_COUNT8_MASK; + + if ((freq < XES_FREQ_COUNT8_48_MAX) || + (freq > XES_FREQ_COUNT8_44_MIN)) + frequency = 0; /* unknown */ + else if (freq >= XES_FREQ_COUNT8_44_MAX) + frequency = 44100; + else + frequency = 48000; + } + + spin_unlock_irqrestore(&chip->msg_lock, flags); + + *rfreq = frequency * chip->freq_ratio; + + return ret; +} + +int lx_dsp_get_mac(struct lx6464es *chip, u8 *mac_address) +{ + u32 macmsb, maclsb; + + macmsb = lx_dsp_reg_read(chip, eReg_ADMACESMSB) & 0x00FFFFFF; + maclsb = lx_dsp_reg_read(chip, eReg_ADMACESLSB) & 0x00FFFFFF; + + /* todo: endianess handling */ + mac_address[5] = ((u8 *)(&maclsb))[0]; + mac_address[4] = ((u8 *)(&maclsb))[1]; + mac_address[3] = ((u8 *)(&maclsb))[2]; + mac_address[2] = ((u8 *)(&macmsb))[0]; + mac_address[1] = ((u8 *)(&macmsb))[1]; + mac_address[0] = ((u8 *)(&macmsb))[2]; + + return 0; +} + + +int lx_dsp_set_granularity(struct lx6464es *chip, u32 gran) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&chip->msg_lock, flags); + + lx_message_init(&chip->rmh, CMD_02_SET_GRANULARITY); + chip->rmh.cmd[0] |= gran; + + ret = lx_message_send_atomic(chip, &chip->rmh); + spin_unlock_irqrestore(&chip->msg_lock, flags); + return ret; +} + +int lx_dsp_read_async_events(struct lx6464es *chip, u32 *data) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&chip->msg_lock, flags); + + lx_message_init(&chip->rmh, CMD_04_GET_EVENT); + chip->rmh.stat_len = 9; /* we don't necessarily need the full length */ + + ret = lx_message_send_atomic(chip, &chip->rmh); + + if (!ret) + memcpy(data, chip->rmh.stat, chip->rmh.stat_len * sizeof(u32)); + + spin_unlock_irqrestore(&chip->msg_lock, flags); + return ret; +} + +#define CSES_TIMEOUT 100 /* microseconds */ +#define CSES_CE 0x0001 +#define CSES_BROADCAST 0x0002 +#define CSES_UPDATE_LDSV 0x0004 + +int lx_dsp_es_check_pipeline(struct lx6464es *chip) +{ + int i; + + for (i = 0; i != CSES_TIMEOUT; ++i) { + /* + * le bit CSES_UPDATE_LDSV est à 1 dés que le macprog + * est pret. il re-passe à 0 lorsque le premier read a + * été fait. pour l'instant on retire le test car ce bit + * passe a 1 environ 200 à 400 ms aprés que le registre + * confES à été écrit (kick du xilinx ES). + * + * On ne teste que le bit CE. + * */ + + u32 cses = lx_dsp_reg_read(chip, eReg_CSES); + + if ((cses & CSES_CE) == 0) + return 0; + + udelay(1); + } + + return -ETIMEDOUT; +} + + +#define PIPE_INFO_TO_CMD(capture, pipe) \ + ((u32)((u32)(pipe) | ((capture) ? ID_IS_CAPTURE : 0L)) << ID_OFFSET) + + + +/* low-level pipe handling */ +int lx_pipe_allocate(struct lx6464es *chip, u32 pipe, int is_capture, + int channels) +{ + int err; + unsigned long flags; + + u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe); + + spin_lock_irqsave(&chip->msg_lock, flags); + lx_message_init(&chip->rmh, CMD_06_ALLOCATE_PIPE); + + chip->rmh.cmd[0] |= pipe_cmd; + chip->rmh.cmd[0] |= channels; + + err = lx_message_send_atomic(chip, &chip->rmh); + spin_unlock_irqrestore(&chip->msg_lock, flags); + + if (err != 0) + snd_printk(KERN_ERR "lx6464es: could not allocate pipe\n"); + + return err; +} + +int lx_pipe_release(struct lx6464es *chip, u32 pipe, int is_capture) +{ + int err; + unsigned long flags; + + u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe); + + spin_lock_irqsave(&chip->msg_lock, flags); + lx_message_init(&chip->rmh, CMD_07_RELEASE_PIPE); + + chip->rmh.cmd[0] |= pipe_cmd; + + err = lx_message_send_atomic(chip, &chip->rmh); + spin_unlock_irqrestore(&chip->msg_lock, flags); + + return err; +} + +int lx_buffer_ask(struct lx6464es *chip, u32 pipe, int is_capture, + u32 *r_needed, u32 *r_freed, u32 *size_array) +{ + int err; + unsigned long flags; + + u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe); + +#ifdef CONFIG_SND_DEBUG + if (size_array) + memset(size_array, 0, sizeof(u32)*MAX_STREAM_BUFFER); +#endif + + *r_needed = 0; + *r_freed = 0; + + spin_lock_irqsave(&chip->msg_lock, flags); + lx_message_init(&chip->rmh, CMD_08_ASK_BUFFERS); + + chip->rmh.cmd[0] |= pipe_cmd; + + err = lx_message_send_atomic(chip, &chip->rmh); + + if (!err) { + int i; + for (i = 0; i < MAX_STREAM_BUFFER; ++i) { + u32 stat = chip->rmh.stat[i]; + if (stat & (BF_EOB << BUFF_FLAGS_OFFSET)) { + /* finished */ + *r_freed += 1; + if (size_array) + size_array[i] = stat & MASK_DATA_SIZE; + } else if ((stat & (BF_VALID << BUFF_FLAGS_OFFSET)) + == 0) + /* free */ + *r_needed += 1; + } + +#if 0 + snd_printdd(LXP "CMD_08_ASK_BUFFERS: needed %d, freed %d\n", + *r_needed, *r_freed); + for (i = 0; i < MAX_STREAM_BUFFER; ++i) { + for (i = 0; i != chip->rmh.stat_len; ++i) + snd_printdd(" stat[%d]: %x, %x\n", i, + chip->rmh.stat[i], + chip->rmh.stat[i] & MASK_DATA_SIZE); + } +#endif + } + + spin_unlock_irqrestore(&chip->msg_lock, flags); + return err; +} + + +int lx_pipe_stop(struct lx6464es *chip, u32 pipe, int is_capture) +{ + int err; + unsigned long flags; + + u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe); + + spin_lock_irqsave(&chip->msg_lock, flags); + lx_message_init(&chip->rmh, CMD_09_STOP_PIPE); + + chip->rmh.cmd[0] |= pipe_cmd; + + err = lx_message_send_atomic(chip, &chip->rmh); + + spin_unlock_irqrestore(&chip->msg_lock, flags); + return err; +} + +static int lx_pipe_toggle_state(struct lx6464es *chip, u32 pipe, int is_capture) +{ + int err; + unsigned long flags; + + u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe); + + spin_lock_irqsave(&chip->msg_lock, flags); + lx_message_init(&chip->rmh, CMD_0B_TOGGLE_PIPE_STATE); + + chip->rmh.cmd[0] |= pipe_cmd; + + err = lx_message_send_atomic(chip, &chip->rmh); + + spin_unlock_irqrestore(&chip->msg_lock, flags); + return err; +} + + +int lx_pipe_start(struct lx6464es *chip, u32 pipe, int is_capture) +{ + int err; + + err = lx_pipe_wait_for_idle(chip, pipe, is_capture); + if (err < 0) + return err; + + err = lx_pipe_toggle_state(chip, pipe, is_capture); + + return err; +} + +int lx_pipe_pause(struct lx6464es *chip, u32 pipe, int is_capture) +{ + int err = 0; + + err = lx_pipe_wait_for_start(chip, pipe, is_capture); + if (err < 0) + return err; + + err = lx_pipe_toggle_state(chip, pipe, is_capture); + + return err; +} + + +int lx_pipe_sample_count(struct lx6464es *chip, u32 pipe, int is_capture, + u64 *rsample_count) +{ + int err; + unsigned long flags; + + u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe); + + spin_lock_irqsave(&chip->msg_lock, flags); + lx_message_init(&chip->rmh, CMD_0A_GET_PIPE_SPL_COUNT); + + chip->rmh.cmd[0] |= pipe_cmd; + chip->rmh.stat_len = 2; /* need all words here! */ + + err = lx_message_send_atomic(chip, &chip->rmh); /* don't sleep! */ + + if (err != 0) + snd_printk(KERN_ERR + "lx6464es: could not query pipe's sample count\n"); + else { + *rsample_count = ((u64)(chip->rmh.stat[0] & MASK_SPL_COUNT_HI) + << 24) /* hi part */ + + chip->rmh.stat[1]; /* lo part */ + } + + spin_unlock_irqrestore(&chip->msg_lock, flags); + return err; +} + +int lx_pipe_state(struct lx6464es *chip, u32 pipe, int is_capture, u16 *rstate) +{ + int err; + unsigned long flags; + + u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe); + + spin_lock_irqsave(&chip->msg_lock, flags); + lx_message_init(&chip->rmh, CMD_0A_GET_PIPE_SPL_COUNT); + + chip->rmh.cmd[0] |= pipe_cmd; + + err = lx_message_send_atomic(chip, &chip->rmh); + + if (err != 0) + snd_printk(KERN_ERR "lx6464es: could not query pipe's state\n"); + else + *rstate = (chip->rmh.stat[0] >> PSTATE_OFFSET) & 0x0F; + + spin_unlock_irqrestore(&chip->msg_lock, flags); + return err; +} + +static int lx_pipe_wait_for_state(struct lx6464es *chip, u32 pipe, + int is_capture, u16 state) +{ + int i; + + /* max 2*PCMOnlyGranularity = 2*1024 at 44100 = < 50 ms: + * timeout 50 ms */ + for (i = 0; i != 50; ++i) { + u16 current_state; + int err = lx_pipe_state(chip, pipe, is_capture, ¤t_state); + + if (err < 0) + return err; + + if (current_state == state) + return 0; + + mdelay(1); + } + + return -ETIMEDOUT; +} + +int lx_pipe_wait_for_start(struct lx6464es *chip, u32 pipe, int is_capture) +{ + return lx_pipe_wait_for_state(chip, pipe, is_capture, PSTATE_RUN); +} + +int lx_pipe_wait_for_idle(struct lx6464es *chip, u32 pipe, int is_capture) +{ + return lx_pipe_wait_for_state(chip, pipe, is_capture, PSTATE_IDLE); +} + +/* low-level stream handling */ +int lx_stream_set_state(struct lx6464es *chip, u32 pipe, + int is_capture, enum stream_state_t state) +{ + int err; + unsigned long flags; + + u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe); + + spin_lock_irqsave(&chip->msg_lock, flags); + lx_message_init(&chip->rmh, CMD_13_SET_STREAM_STATE); + + chip->rmh.cmd[0] |= pipe_cmd; + chip->rmh.cmd[0] |= state; + + err = lx_message_send_atomic(chip, &chip->rmh); + spin_unlock_irqrestore(&chip->msg_lock, flags); + + return err; +} + +int lx_stream_set_format(struct lx6464es *chip, struct snd_pcm_runtime *runtime, + u32 pipe, int is_capture) +{ + int err; + unsigned long flags; + + u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe); + + u32 channels = runtime->channels; + + if (runtime->channels != channels) + snd_printk(KERN_ERR LXP "channel count mismatch: %d vs %d", + runtime->channels, channels); + + spin_lock_irqsave(&chip->msg_lock, flags); + lx_message_init(&chip->rmh, CMD_0C_DEF_STREAM); + + chip->rmh.cmd[0] |= pipe_cmd; + + if (runtime->sample_bits == 16) + /* 16 bit format */ + chip->rmh.cmd[0] |= (STREAM_FMT_16b << STREAM_FMT_OFFSET); + + if (snd_pcm_format_little_endian(runtime->format)) + /* little endian/intel format */ + chip->rmh.cmd[0] |= (STREAM_FMT_intel << STREAM_FMT_OFFSET); + + chip->rmh.cmd[0] |= channels-1; + + err = lx_message_send_atomic(chip, &chip->rmh); + spin_unlock_irqrestore(&chip->msg_lock, flags); + + return err; +} + +int lx_stream_state(struct lx6464es *chip, u32 pipe, int is_capture, + int *rstate) +{ + int err; + unsigned long flags; + + u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe); + + spin_lock_irqsave(&chip->msg_lock, flags); + lx_message_init(&chip->rmh, CMD_0E_GET_STREAM_SPL_COUNT); + + chip->rmh.cmd[0] |= pipe_cmd; + + err = lx_message_send_atomic(chip, &chip->rmh); + + *rstate = (chip->rmh.stat[0] & SF_START) ? START_STATE : PAUSE_STATE; + + spin_unlock_irqrestore(&chip->msg_lock, flags); + return err; +} + +int lx_stream_sample_position(struct lx6464es *chip, u32 pipe, int is_capture, + u64 *r_bytepos) +{ + int err; + unsigned long flags; + + u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe); + + spin_lock_irqsave(&chip->msg_lock, flags); + lx_message_init(&chip->rmh, CMD_0E_GET_STREAM_SPL_COUNT); + + chip->rmh.cmd[0] |= pipe_cmd; + + err = lx_message_send_atomic(chip, &chip->rmh); + + *r_bytepos = ((u64) (chip->rmh.stat[0] & MASK_SPL_COUNT_HI) + << 32) /* hi part */ + + chip->rmh.stat[1]; /* lo part */ + + spin_unlock_irqrestore(&chip->msg_lock, flags); + return err; +} + +/* low-level buffer handling */ +int lx_buffer_give(struct lx6464es *chip, u32 pipe, int is_capture, + u32 buffer_size, u32 buf_address_lo, u32 buf_address_hi, + u32 *r_buffer_index) +{ + int err; + unsigned long flags; + + u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe); + + spin_lock_irqsave(&chip->msg_lock, flags); + lx_message_init(&chip->rmh, CMD_0F_UPDATE_BUFFER); + + chip->rmh.cmd[0] |= pipe_cmd; + chip->rmh.cmd[0] |= BF_NOTIFY_EOB; /* request interrupt notification */ + + /* todo: pause request, circular buffer */ + + chip->rmh.cmd[1] = buffer_size & MASK_DATA_SIZE; + chip->rmh.cmd[2] = buf_address_lo; + + if (buf_address_hi) { + chip->rmh.cmd_len = 4; + chip->rmh.cmd[3] = buf_address_hi; + chip->rmh.cmd[0] |= BF_64BITS_ADR; + } + + err = lx_message_send_atomic(chip, &chip->rmh); + + if (err == 0) { + *r_buffer_index = chip->rmh.stat[0]; + goto done; + } + + if (err == EB_RBUFFERS_TABLE_OVERFLOW) + snd_printk(LXP "lx_buffer_give EB_RBUFFERS_TABLE_OVERFLOW\n"); + + if (err == EB_INVALID_STREAM) + snd_printk(LXP "lx_buffer_give EB_INVALID_STREAM\n"); + + if (err == EB_CMD_REFUSED) + snd_printk(LXP "lx_buffer_give EB_CMD_REFUSED\n"); + + done: + spin_unlock_irqrestore(&chip->msg_lock, flags); + return err; +} + +int lx_buffer_free(struct lx6464es *chip, u32 pipe, int is_capture, + u32 *r_buffer_size) +{ + int err; + unsigned long flags; + + u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe); + + spin_lock_irqsave(&chip->msg_lock, flags); + lx_message_init(&chip->rmh, CMD_11_CANCEL_BUFFER); + + chip->rmh.cmd[0] |= pipe_cmd; + chip->rmh.cmd[0] |= MASK_BUFFER_ID; /* ask for the current buffer: the + * microblaze will seek for it */ + + err = lx_message_send_atomic(chip, &chip->rmh); + + if (err == 0) + *r_buffer_size = chip->rmh.stat[0] & MASK_DATA_SIZE; + + spin_unlock_irqrestore(&chip->msg_lock, flags); + return err; +} + +int lx_buffer_cancel(struct lx6464es *chip, u32 pipe, int is_capture, + u32 buffer_index) +{ + int err; + unsigned long flags; + + u32 pipe_cmd = PIPE_INFO_TO_CMD(is_capture, pipe); + + spin_lock_irqsave(&chip->msg_lock, flags); + lx_message_init(&chip->rmh, CMD_11_CANCEL_BUFFER); + + chip->rmh.cmd[0] |= pipe_cmd; + chip->rmh.cmd[0] |= buffer_index; + + err = lx_message_send_atomic(chip, &chip->rmh); + + spin_unlock_irqrestore(&chip->msg_lock, flags); + return err; +} + + +/* low-level gain/peak handling + * + * \todo: can we unmute capture/playback channels independently? + * + * */ +int lx_level_unmute(struct lx6464es *chip, int is_capture, int unmute) +{ + int err; + unsigned long flags; + + /* bit set to 1: channel muted */ + u64 mute_mask = unmute ? 0 : 0xFFFFFFFFFFFFFFFFLLU; + + spin_lock_irqsave(&chip->msg_lock, flags); + lx_message_init(&chip->rmh, CMD_0D_SET_MUTE); + + chip->rmh.cmd[0] |= PIPE_INFO_TO_CMD(is_capture, 0); + + chip->rmh.cmd[1] = (u32)(mute_mask >> (u64)32); /* hi part */ + chip->rmh.cmd[2] = (u32)(mute_mask & (u64)0xFFFFFFFF); /* lo part */ + + snd_printk("mute %x %x %x\n", chip->rmh.cmd[0], chip->rmh.cmd[1], + chip->rmh.cmd[2]); + + err = lx_message_send_atomic(chip, &chip->rmh); + + spin_unlock_irqrestore(&chip->msg_lock, flags); + return err; +} + +static u32 peak_map[] = { + 0x00000109, /* -90.308dB */ + 0x0000083B, /* -72.247dB */ + 0x000020C4, /* -60.205dB */ + 0x00008273, /* -48.030dB */ + 0x00020756, /* -36.005dB */ + 0x00040C37, /* -30.001dB */ + 0x00081385, /* -24.002dB */ + 0x00101D3F, /* -18.000dB */ + 0x0016C310, /* -15.000dB */ + 0x002026F2, /* -12.001dB */ + 0x002D6A86, /* -9.000dB */ + 0x004026E6, /* -6.004dB */ + 0x005A9DF6, /* -3.000dB */ + 0x0065AC8B, /* -2.000dB */ + 0x00721481, /* -1.000dB */ + 0x007FFFFF, /* FS */ +}; + +int lx_level_peaks(struct lx6464es *chip, int is_capture, int channels, + u32 *r_levels) +{ + int err = 0; + unsigned long flags; + int i; + spin_lock_irqsave(&chip->msg_lock, flags); + + for (i = 0; i < channels; i += 4) { + u32 s0, s1, s2, s3; + + lx_message_init(&chip->rmh, CMD_12_GET_PEAK); + chip->rmh.cmd[0] |= PIPE_INFO_TO_CMD(is_capture, i); + + err = lx_message_send_atomic(chip, &chip->rmh); + + if (err == 0) { + s0 = peak_map[chip->rmh.stat[0] & 0x0F]; + s1 = peak_map[(chip->rmh.stat[0] >> 4) & 0xf]; + s2 = peak_map[(chip->rmh.stat[0] >> 8) & 0xf]; + s3 = peak_map[(chip->rmh.stat[0] >> 12) & 0xf]; + } else + s0 = s1 = s2 = s3 = 0; + + r_levels[0] = s0; + r_levels[1] = s1; + r_levels[2] = s2; + r_levels[3] = s3; + + r_levels += 4; + } + + spin_unlock_irqrestore(&chip->msg_lock, flags); + return err; +} + +/* interrupt handling */ +#define PCX_IRQ_NONE 0 +#define IRQCS_ACTIVE_PCIDB 0x00002000L /* Bit nø 13 */ +#define IRQCS_ENABLE_PCIIRQ 0x00000100L /* Bit nø 08 */ +#define IRQCS_ENABLE_PCIDB 0x00000200L /* Bit nø 09 */ + +static u32 lx_interrupt_test_ack(struct lx6464es *chip) +{ + u32 irqcs = lx_plx_reg_read(chip, ePLX_IRQCS); + + /* Test if PCI Doorbell interrupt is active */ + if (irqcs & IRQCS_ACTIVE_PCIDB) { + u32 temp; + irqcs = PCX_IRQ_NONE; + + while ((temp = lx_plx_reg_read(chip, ePLX_L2PCIDB))) { + /* RAZ interrupt */ + irqcs |= temp; + lx_plx_reg_write(chip, ePLX_L2PCIDB, temp); + } + + return irqcs; + } + return PCX_IRQ_NONE; +} + +static int lx_interrupt_ack(struct lx6464es *chip, u32 *r_irqsrc, + int *r_async_pending, int *r_async_escmd) +{ + u32 irq_async; + u32 irqsrc = lx_interrupt_test_ack(chip); + + if (irqsrc == PCX_IRQ_NONE) + return 0; + + *r_irqsrc = irqsrc; + + irq_async = irqsrc & MASK_SYS_ASYNC_EVENTS; /* + EtherSound response + * (set by xilinx) + EOB */ + + if (irq_async & MASK_SYS_STATUS_ESA) { + irq_async &= ~MASK_SYS_STATUS_ESA; + *r_async_escmd = 1; + } + + if (irqsrc & MASK_SYS_STATUS_CMD_DONE) + /* xilinx command notification */ + atomic_set(&chip->send_message_locked, 0); + + if (irq_async) { + /* snd_printd("interrupt: async event pending\n"); */ + *r_async_pending = 1; + } + + return 1; +} + +static int lx_interrupt_handle_async_events(struct lx6464es *chip, u32 irqsrc, + int *r_freq_changed, + u64 *r_notified_in_pipe_mask, + u64 *r_notified_out_pipe_mask) +{ + int err; + u32 stat[9]; /* answer from CMD_04_GET_EVENT */ + + /* On peut optimiser pour ne pas lire les evenements vides + * les mots de réponse sont dans l'ordre suivant : + * Stat[0] mot de status général + * Stat[1] fin de buffer OUT pF + * Stat[2] fin de buffer OUT pf + * Stat[3] fin de buffer IN pF + * Stat[4] fin de buffer IN pf + * Stat[5] underrun poid fort + * Stat[6] underrun poid faible + * Stat[7] overrun poid fort + * Stat[8] overrun poid faible + * */ + + u64 orun_mask; + u64 urun_mask; +#if 0 + int has_underrun = (irqsrc & MASK_SYS_STATUS_URUN) ? 1 : 0; + int has_overrun = (irqsrc & MASK_SYS_STATUS_ORUN) ? 1 : 0; +#endif + int eb_pending_out = (irqsrc & MASK_SYS_STATUS_EOBO) ? 1 : 0; + int eb_pending_in = (irqsrc & MASK_SYS_STATUS_EOBI) ? 1 : 0; + + *r_freq_changed = (irqsrc & MASK_SYS_STATUS_FREQ) ? 1 : 0; + + err = lx_dsp_read_async_events(chip, stat); + if (err < 0) + return err; + + if (eb_pending_in) { + *r_notified_in_pipe_mask = ((u64)stat[3] << 32) + + stat[4]; + snd_printdd(LXP "interrupt: EOBI pending %llx\n", + *r_notified_in_pipe_mask); + } + if (eb_pending_out) { + *r_notified_out_pipe_mask = ((u64)stat[1] << 32) + + stat[2]; + snd_printdd(LXP "interrupt: EOBO pending %llx\n", + *r_notified_out_pipe_mask); + } + + orun_mask = ((u64)stat[7] << 32) + stat[8]; + urun_mask = ((u64)stat[5] << 32) + stat[6]; + + /* todo: handle xrun notification */ + + return err; +} + +static int lx_interrupt_request_new_buffer(struct lx6464es *chip, + struct lx_stream *lx_stream) +{ + struct snd_pcm_substream *substream = lx_stream->stream; + int is_capture = lx_stream->is_capture; + int err; + unsigned long flags; + + const u32 channels = substream->runtime->channels; + const u32 bytes_per_frame = channels * 3; + const u32 period_size = substream->runtime->period_size; + const u32 period_bytes = period_size * bytes_per_frame; + const u32 pos = lx_stream->frame_pos; + const u32 next_pos = ((pos+1) == substream->runtime->periods) ? + 0 : pos + 1; + + dma_addr_t buf = substream->dma_buffer.addr + pos * period_bytes; + u32 buf_hi = 0; + u32 buf_lo = 0; + u32 buffer_index = 0; + + u32 needed, freed; + u32 size_array[MAX_STREAM_BUFFER]; + + snd_printdd("->lx_interrupt_request_new_buffer\n"); + + spin_lock_irqsave(&chip->lock, flags); + + err = lx_buffer_ask(chip, 0, is_capture, &needed, &freed, size_array); + snd_printdd(LXP "interrupt: needed %d, freed %d\n", needed, freed); + + unpack_pointer(buf, &buf_lo, &buf_hi); + err = lx_buffer_give(chip, 0, is_capture, period_bytes, buf_lo, buf_hi, + &buffer_index); + snd_printdd(LXP "interrupt: gave buffer index %x on %p (%d bytes)\n", + buffer_index, (void *)buf, period_bytes); + + lx_stream->frame_pos = next_pos; + spin_unlock_irqrestore(&chip->lock, flags); + + return err; +} + +void lx_tasklet_playback(unsigned long data) +{ + struct lx6464es *chip = (struct lx6464es *)data; + struct lx_stream *lx_stream = &chip->playback_stream; + int err; + + snd_printdd("->lx_tasklet_playback\n"); + + err = lx_interrupt_request_new_buffer(chip, lx_stream); + if (err < 0) + snd_printk(KERN_ERR LXP + "cannot request new buffer for playback\n"); + + snd_pcm_period_elapsed(lx_stream->stream); +} + +void lx_tasklet_capture(unsigned long data) +{ + struct lx6464es *chip = (struct lx6464es *)data; + struct lx_stream *lx_stream = &chip->capture_stream; + int err; + + snd_printdd("->lx_tasklet_capture\n"); + err = lx_interrupt_request_new_buffer(chip, lx_stream); + if (err < 0) + snd_printk(KERN_ERR LXP + "cannot request new buffer for capture\n"); + + snd_pcm_period_elapsed(lx_stream->stream); +} + + + +static int lx_interrupt_handle_audio_transfer(struct lx6464es *chip, + u64 notified_in_pipe_mask, + u64 notified_out_pipe_mask) +{ + int err = 0; + + if (notified_in_pipe_mask) { + snd_printdd(LXP "requesting audio transfer for capture\n"); + tasklet_hi_schedule(&chip->tasklet_capture); + } + + if (notified_out_pipe_mask) { + snd_printdd(LXP "requesting audio transfer for playback\n"); + tasklet_hi_schedule(&chip->tasklet_playback); + } + + return err; +} + + +irqreturn_t lx_interrupt(int irq, void *dev_id) +{ + struct lx6464es *chip = dev_id; + int async_pending, async_escmd; + u32 irqsrc; + + spin_lock(&chip->lock); + + snd_printdd("**************************************************\n"); + + if (!lx_interrupt_ack(chip, &irqsrc, &async_pending, &async_escmd)) { + spin_unlock(&chip->lock); + snd_printdd("IRQ_NONE\n"); + return IRQ_NONE; /* this device did not cause the interrupt */ + } + + if (irqsrc & MASK_SYS_STATUS_CMD_DONE) + goto exit; + +#if 0 + if (irqsrc & MASK_SYS_STATUS_EOBI) + snd_printdd(LXP "interrupt: EOBI\n"); + + if (irqsrc & MASK_SYS_STATUS_EOBO) + snd_printdd(LXP "interrupt: EOBO\n"); + + if (irqsrc & MASK_SYS_STATUS_URUN) + snd_printdd(LXP "interrupt: URUN\n"); + + if (irqsrc & MASK_SYS_STATUS_ORUN) + snd_printdd(LXP "interrupt: ORUN\n"); +#endif + + if (async_pending) { + u64 notified_in_pipe_mask = 0; + u64 notified_out_pipe_mask = 0; + int freq_changed; + int err; + + /* handle async events */ + err = lx_interrupt_handle_async_events(chip, irqsrc, + &freq_changed, + ¬ified_in_pipe_mask, + ¬ified_out_pipe_mask); + if (err) + snd_printk(KERN_ERR LXP + "error handling async events\n"); + + err = lx_interrupt_handle_audio_transfer(chip, + notified_in_pipe_mask, + notified_out_pipe_mask + ); + if (err) + snd_printk(KERN_ERR LXP + "error during audio transfer\n"); + } + + if (async_escmd) { +#if 0 + /* backdoor for ethersound commands + * + * for now, we do not need this + * + * */ + + snd_printdd("lx6464es: interrupt requests escmd handling\n"); +#endif + } + +exit: + spin_unlock(&chip->lock); + return IRQ_HANDLED; /* this device caused the interrupt */ +} + + +static void lx_irq_set(struct lx6464es *chip, int enable) +{ + u32 reg = lx_plx_reg_read(chip, ePLX_IRQCS); + + /* enable/disable interrupts + * + * Set the Doorbell and PCI interrupt enable bits + * + * */ + if (enable) + reg |= (IRQCS_ENABLE_PCIIRQ | IRQCS_ENABLE_PCIDB); + else + reg &= ~(IRQCS_ENABLE_PCIIRQ | IRQCS_ENABLE_PCIDB); + lx_plx_reg_write(chip, ePLX_IRQCS, reg); +} + +void lx_irq_enable(struct lx6464es *chip) +{ + snd_printdd("->lx_irq_enable\n"); + lx_irq_set(chip, 1); +} + +void lx_irq_disable(struct lx6464es *chip) +{ + snd_printdd("->lx_irq_disable\n"); + lx_irq_set(chip, 0); +} diff --git a/sound/pci/lx6464es/lx_core.h b/sound/pci/lx6464es/lx_core.h new file mode 100644 index 0000000..6bd9cbb --- /dev/null +++ b/sound/pci/lx6464es/lx_core.h @@ -0,0 +1,242 @@ +/* -*- linux-c -*- * + * + * ALSA driver for the digigram lx6464es interface + * low-level interface + * + * Copyright (c) 2009 Tim Blechmann + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + * + */ + +#ifndef LX_CORE_H +#define LX_CORE_H + +#include + +#include "lx_defs.h" + +#define REG_CRM_NUMBER 12 + +struct lx6464es; + +/* low-level register access */ + +/* dsp register access */ +enum { + eReg_BASE, + eReg_CSM, + eReg_CRM1, + eReg_CRM2, + eReg_CRM3, + eReg_CRM4, + eReg_CRM5, + eReg_CRM6, + eReg_CRM7, + eReg_CRM8, + eReg_CRM9, + eReg_CRM10, + eReg_CRM11, + eReg_CRM12, + + eReg_ICR, + eReg_CVR, + eReg_ISR, + eReg_RXHTXH, + eReg_RXMTXM, + eReg_RHLTXL, + eReg_RESETDSP, + + eReg_CSUF, + eReg_CSES, + eReg_CRESMSB, + eReg_CRESLSB, + eReg_ADMACESMSB, + eReg_ADMACESLSB, + eReg_CONFES, + + eMaxPortLx +}; + +unsigned long lx_dsp_reg_read(struct lx6464es *chip, int port); +void lx_dsp_reg_readbuf(struct lx6464es *chip, int port, u32 *data, u32 len); +void lx_dsp_reg_write(struct lx6464es *chip, int port, unsigned data); +void lx_dsp_reg_writebuf(struct lx6464es *chip, int port, const u32 *data, + u32 len); + +/* plx register access */ +enum { + ePLX_PCICR, + + ePLX_MBOX0, + ePLX_MBOX1, + ePLX_MBOX2, + ePLX_MBOX3, + ePLX_MBOX4, + ePLX_MBOX5, + ePLX_MBOX6, + ePLX_MBOX7, + + ePLX_L2PCIDB, + ePLX_IRQCS, + ePLX_CHIPSC, + + eMaxPort +}; + +unsigned long lx_plx_reg_read(struct lx6464es *chip, int port); +void lx_plx_reg_write(struct lx6464es *chip, int port, u32 data); + +/* rhm */ +struct lx_rmh { + u16 cmd_len; /* length of the command to send (WORDs) */ + u16 stat_len; /* length of the status received (WORDs) */ + u16 dsp_stat; /* status type, RMP_SSIZE_XXX */ + u16 cmd_idx; /* index of the command */ + u32 cmd[REG_CRM_NUMBER]; + u32 stat[REG_CRM_NUMBER]; +}; + + +/* low-level dsp access */ +int __devinit lx_dsp_get_version(struct lx6464es *chip, u32 *rdsp_version); +int lx_dsp_get_clock_frequency(struct lx6464es *chip, u32 *rfreq); +int lx_dsp_set_granularity(struct lx6464es *chip, u32 gran); +int lx_dsp_read_async_events(struct lx6464es *chip, u32 *data); +int lx_dsp_get_mac(struct lx6464es *chip, u8 *mac_address); + + +/* low-level pipe handling */ +int lx_pipe_allocate(struct lx6464es *chip, u32 pipe, int is_capture, + int channels); +int lx_pipe_release(struct lx6464es *chip, u32 pipe, int is_capture); +int lx_pipe_sample_count(struct lx6464es *chip, u32 pipe, int is_capture, + u64 *rsample_count); +int lx_pipe_state(struct lx6464es *chip, u32 pipe, int is_capture, u16 *rstate); +int lx_pipe_stop(struct lx6464es *chip, u32 pipe, int is_capture); +int lx_pipe_start(struct lx6464es *chip, u32 pipe, int is_capture); +int lx_pipe_pause(struct lx6464es *chip, u32 pipe, int is_capture); + +int lx_pipe_wait_for_start(struct lx6464es *chip, u32 pipe, int is_capture); +int lx_pipe_wait_for_idle(struct lx6464es *chip, u32 pipe, int is_capture); + +/* low-level stream handling */ +int lx_stream_set_format(struct lx6464es *chip, struct snd_pcm_runtime *runtime, + u32 pipe, int is_capture); +int lx_stream_state(struct lx6464es *chip, u32 pipe, int is_capture, + int *rstate); +int lx_stream_sample_position(struct lx6464es *chip, u32 pipe, int is_capture, + u64 *r_bytepos); + +int lx_stream_set_state(struct lx6464es *chip, u32 pipe, + int is_capture, enum stream_state_t state); + +static inline int lx_stream_start(struct lx6464es *chip, u32 pipe, + int is_capture) +{ + snd_printdd("->lx_stream_start\n"); + return lx_stream_set_state(chip, pipe, is_capture, SSTATE_RUN); +} + +static inline int lx_stream_pause(struct lx6464es *chip, u32 pipe, + int is_capture) +{ + snd_printdd("->lx_stream_pause\n"); + return lx_stream_set_state(chip, pipe, is_capture, SSTATE_PAUSE); +} + +static inline int lx_stream_stop(struct lx6464es *chip, u32 pipe, + int is_capture) +{ + snd_printdd("->lx_stream_stop\n"); + return lx_stream_set_state(chip, pipe, is_capture, SSTATE_STOP); +} + +/* low-level buffer handling */ +int lx_buffer_ask(struct lx6464es *chip, u32 pipe, int is_capture, + u32 *r_needed, u32 *r_freed, u32 *size_array); +int lx_buffer_give(struct lx6464es *chip, u32 pipe, int is_capture, + u32 buffer_size, u32 buf_address_lo, u32 buf_address_hi, + u32 *r_buffer_index); +int lx_buffer_free(struct lx6464es *chip, u32 pipe, int is_capture, + u32 *r_buffer_size); +int lx_buffer_cancel(struct lx6464es *chip, u32 pipe, int is_capture, + u32 buffer_index); + +/* low-level gain/peak handling */ +int lx_level_unmute(struct lx6464es *chip, int is_capture, int unmute); +int lx_level_peaks(struct lx6464es *chip, int is_capture, int channels, + u32 *r_levels); + + +/* interrupt handling */ +irqreturn_t lx_interrupt(int irq, void *dev_id); +void lx_irq_enable(struct lx6464es *chip); +void lx_irq_disable(struct lx6464es *chip); + +void lx_tasklet_capture(unsigned long data); +void lx_tasklet_playback(unsigned long data); + + +/* Stream Format Header Defines (for LIN and IEEE754) */ +#define HEADER_FMT_BASE HEADER_FMT_BASE_LIN +#define HEADER_FMT_BASE_LIN 0xFED00000 +#define HEADER_FMT_BASE_FLOAT 0xFAD00000 +#define HEADER_FMT_MONO 0x00000080 /* bit 23 in header_lo. WARNING: old + * bit 22 is ignored in float + * format */ +#define HEADER_FMT_INTEL 0x00008000 +#define HEADER_FMT_16BITS 0x00002000 +#define HEADER_FMT_24BITS 0x00004000 +#define HEADER_FMT_UPTO11 0x00000200 /* frequency is less or equ. to 11k. + * */ +#define HEADER_FMT_UPTO32 0x00000100 /* frequency is over 11k and less + * then 32k.*/ + + +#define BIT_FMP_HEADER 23 +#define BIT_FMP_SD 22 +#define BIT_FMP_MULTICHANNEL 19 + +#define START_STATE 1 +#define PAUSE_STATE 0 + + + + + +/* from PcxAll_e.h */ +/* Start/Pause condition for pipes (PCXStartPipe, PCXPausePipe) */ +#define START_PAUSE_IMMEDIATE 0 +#define START_PAUSE_ON_SYNCHRO 1 +#define START_PAUSE_ON_TIME_CODE 2 + + +/* Pipe / Stream state */ +#define START_STATE 1 +#define PAUSE_STATE 0 + +static inline void unpack_pointer(dma_addr_t ptr, u32 *r_low, u32 *r_high) +{ + *r_low = (u32)(ptr & 0xffffffff); +#if BITS_PER_LONG == 32 + *r_high = 0; +#else + *r_high = (u32)((u64)ptr>>32); +#endif +} + +#endif /* LX_CORE_H */ diff --git a/sound/pci/lx6464es/lx_defs.h b/sound/pci/lx6464es/lx_defs.h new file mode 100644 index 0000000..49d36bd --- /dev/null +++ b/sound/pci/lx6464es/lx_defs.h @@ -0,0 +1,376 @@ +/* -*- linux-c -*- * + * + * ALSA driver for the digigram lx6464es interface + * adapted upstream headers + * + * Copyright (c) 2009 Tim Blechmann + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + * + */ + +#ifndef LX_DEFS_H +#define LX_DEFS_H + +/* code adapted from ethersound.h */ +#define XES_FREQ_COUNT8_MASK 0x00001FFF /* compteur 25MHz entre 8 ech. */ +#define XES_FREQ_COUNT8_44_MIN 0x00001288 /* 25M / + * [ 44k - ( 44.1k + 48k ) / 2 ] + * * 8 */ +#define XES_FREQ_COUNT8_44_MAX 0x000010F0 /* 25M / [ ( 44.1k + 48k ) / 2 ] + * * 8 */ +#define XES_FREQ_COUNT8_48_MAX 0x00000F08 /* 25M / + * [ 48k + ( 44.1k + 48k ) / 2 ] + * * 8 */ + +/* code adapted from LXES_registers.h */ + +#define IOCR_OUTPUTS_OFFSET 0 /* (rw) offset for the number of OUTs in the + * ConfES register. */ +#define IOCR_INPUTS_OFFSET 8 /* (rw) offset for the number of INs in the + * ConfES register. */ +#define FREQ_RATIO_OFFSET 19 /* (rw) offset for frequency ratio in the + * ConfES register. */ +#define FREQ_RATIO_SINGLE_MODE 0x01 /* value for single mode frequency ratio: + * sample rate = frequency rate. */ + +#define CONFES_READ_PART_MASK 0x00070000 +#define CONFES_WRITE_PART_MASK 0x00F80000 + +/* code adapted from if_drv_mb.h */ + +#define MASK_SYS_STATUS_ERROR (1L << 31) /* events that lead to a PCI irq if + * not yet pending */ +#define MASK_SYS_STATUS_URUN (1L << 30) +#define MASK_SYS_STATUS_ORUN (1L << 29) +#define MASK_SYS_STATUS_EOBO (1L << 28) +#define MASK_SYS_STATUS_EOBI (1L << 27) +#define MASK_SYS_STATUS_FREQ (1L << 26) +#define MASK_SYS_STATUS_ESA (1L << 25) /* reserved, this is set by the + * XES */ +#define MASK_SYS_STATUS_TIMER (1L << 24) + +#define MASK_SYS_ASYNC_EVENTS (MASK_SYS_STATUS_ERROR | \ + MASK_SYS_STATUS_URUN | \ + MASK_SYS_STATUS_ORUN | \ + MASK_SYS_STATUS_EOBO | \ + MASK_SYS_STATUS_EOBI | \ + MASK_SYS_STATUS_FREQ | \ + MASK_SYS_STATUS_ESA) + +#define MASK_SYS_PCI_EVENTS (MASK_SYS_ASYNC_EVENTS | \ + MASK_SYS_STATUS_TIMER) + +#define MASK_SYS_TIMER_COUNT 0x0000FFFF + +#define MASK_SYS_STATUS_EOT_PLX (1L << 22) /* event that remains + * internal: reserved fo end + * of plx dma */ +#define MASK_SYS_STATUS_XES (1L << 21) /* event that remains + * internal: pending XES + * IRQ */ +#define MASK_SYS_STATUS_CMD_DONE (1L << 20) /* alternate command + * management: notify driver + * instead of polling */ + + +#define MAX_STREAM_BUFFER 5 /* max amount of stream buffers. */ + +#define MICROBLAZE_IBL_MIN 32 +#define MICROBLAZE_IBL_DEFAULT 128 +#define MICROBLAZE_IBL_MAX 512 +/* #define MASK_GRANULARITY (2*MICROBLAZE_IBL_MAX-1) */ + + + +/* command opcodes, see reference for details */ + +/* + the capture bit position in the object_id field in driver commands + depends upon the number of managed channels. For now, 64 IN + 64 OUT are + supported. HOwever, the communication protocol forsees 1024 channels, hence + bit 10 indicates a capture (input) object). +*/ +#define ID_IS_CAPTURE (1L << 10) +#define ID_OFFSET 13 /* object ID is at the 13th bit in the + * 1st command word.*/ +#define ID_CH_MASK 0x3F +#define OPCODE_OFFSET 24 /* offset of the command opcode in the first + * command word.*/ + +enum cmd_mb_opcodes { + CMD_00_INFO_DEBUG = 0x00, + CMD_01_GET_SYS_CFG = 0x01, + CMD_02_SET_GRANULARITY = 0x02, + CMD_03_SET_TIMER_IRQ = 0x03, + CMD_04_GET_EVENT = 0x04, + CMD_05_GET_PIPES = 0x05, + + CMD_06_ALLOCATE_PIPE = 0x06, + CMD_07_RELEASE_PIPE = 0x07, + CMD_08_ASK_BUFFERS = 0x08, + CMD_09_STOP_PIPE = 0x09, + CMD_0A_GET_PIPE_SPL_COUNT = 0x0a, + CMD_0B_TOGGLE_PIPE_STATE = 0x0b, + + CMD_0C_DEF_STREAM = 0x0c, + CMD_0D_SET_MUTE = 0x0d, + CMD_0E_GET_STREAM_SPL_COUNT = 0x0e, + CMD_0F_UPDATE_BUFFER = 0x0f, + CMD_10_GET_BUFFER = 0x10, + CMD_11_CANCEL_BUFFER = 0x11, + CMD_12_GET_PEAK = 0x12, + CMD_13_SET_STREAM_STATE = 0x13, + CMD_14_INVALID = 0x14, +}; + +/* pipe states */ +enum pipe_state_t { + PSTATE_IDLE = 0, /* the pipe is not processed in the XES_IRQ + * (free or stopped, or paused). */ + PSTATE_RUN = 1, /* sustained play/record state. */ + PSTATE_PURGE = 2, /* the ES channels are now off, render pipes do + * not DMA, record pipe do a last DMA. */ + PSTATE_ACQUIRE = 3, /* the ES channels are now on, render pipes do + * not yet increase their sample count, record + * pipes do not DMA. */ + PSTATE_CLOSING = 4, /* the pipe is releasing, and may not yet + * receive an "alloc" command. */ +}; + +/* stream states */ +enum stream_state_t { + SSTATE_STOP = 0x00, /* setting to stop resets the stream spl + * count.*/ + SSTATE_RUN = (0x01 << 0), /* start DMA and spl count handling. */ + SSTATE_PAUSE = (0x01 << 1), /* pause DMA and spl count handling. */ +}; + +/* buffer flags */ +enum buffer_flags { + BF_VALID = 0x80, /* set if the buffer is valid, clear if free.*/ + BF_CURRENT = 0x40, /* set if this is the current buffer (there is + * always a current buffer).*/ + BF_NOTIFY_EOB = 0x20, /* set if this buffer must cause a PCI event + * when finished.*/ + BF_CIRCULAR = 0x10, /* set if buffer[1] must be copied to buffer[0] + * by the end of this buffer.*/ + BF_64BITS_ADR = 0x08, /* set if the hi part of the address is valid.*/ + BF_xx = 0x04, /* future extension.*/ + BF_EOB = 0x02, /* set if finished, but not yet free.*/ + BF_PAUSE = 0x01, /* pause stream at buffer end.*/ + BF_ZERO = 0x00, /* no flags (init).*/ +}; + +/** +* Stream Flags definitions +*/ +enum stream_flags { + SF_ZERO = 0x00000000, /* no flags (stream invalid). */ + SF_VALID = 0x10000000, /* the stream has a valid DMA_conf + * info (setstreamformat). */ + SF_XRUN = 0x20000000, /* the stream is un x-run state. */ + SF_START = 0x40000000, /* the DMA is running.*/ + SF_ASIO = 0x80000000, /* ASIO.*/ +}; + + +#define MASK_SPL_COUNT_HI 0x00FFFFFF /* 4 MSBits are status bits */ +#define PSTATE_OFFSET 28 /* 4 MSBits are status bits */ + + +#define MASK_STREAM_HAS_MAPPING (1L << 12) +#define MASK_STREAM_IS_ASIO (1L << 9) +#define STREAM_FMT_OFFSET 10 /* the stream fmt bits start at the 10th + * bit in the command word. */ + +#define STREAM_FMT_16b 0x02 +#define STREAM_FMT_intel 0x01 + +#define FREQ_FIELD_OFFSET 15 /* offset of the freq field in the response + * word */ + +#define BUFF_FLAGS_OFFSET 24 /* offset of the buffer flags in the + * response word. */ +#define MASK_DATA_SIZE 0x00FFFFFF /* this must match the field size of + * datasize in the buffer_t structure. */ + +#define MASK_BUFFER_ID 0xFF /* the cancel command awaits a buffer ID, + * may be 0xFF for "current". */ + + +/* code adapted from PcxErr_e.h */ + +/* Bits masks */ + +#define ERROR_MASK 0x8000 + +#define SOURCE_MASK 0x7800 + +#define E_SOURCE_BOARD 0x4000 /* 8 >> 1 */ +#define E_SOURCE_DRV 0x2000 /* 4 >> 1 */ +#define E_SOURCE_API 0x1000 /* 2 >> 1 */ +/* Error tools */ +#define E_SOURCE_TOOLS 0x0800 /* 1 >> 1 */ +/* Error pcxaudio */ +#define E_SOURCE_AUDIO 0x1800 /* 3 >> 1 */ +/* Error virtual pcx */ +#define E_SOURCE_VPCX 0x2800 /* 5 >> 1 */ +/* Error dispatcher */ +#define E_SOURCE_DISPATCHER 0x3000 /* 6 >> 1 */ +/* Error from CobraNet firmware */ +#define E_SOURCE_COBRANET 0x3800 /* 7 >> 1 */ + +#define E_SOURCE_USER 0x7800 + +#define CLASS_MASK 0x0700 + +#define CODE_MASK 0x00FF + +/* Bits values */ + +/* Values for the error/warning bit */ +#define ERROR_VALUE 0x8000 +#define WARNING_VALUE 0x0000 + +/* Class values */ +#define E_CLASS_GENERAL 0x0000 +#define E_CLASS_INVALID_CMD 0x0100 +#define E_CLASS_INVALID_STD_OBJECT 0x0200 +#define E_CLASS_RSRC_IMPOSSIBLE 0x0300 +#define E_CLASS_WRONG_CONTEXT 0x0400 +#define E_CLASS_BAD_SPECIFIC_PARAMETER 0x0500 +#define E_CLASS_REAL_TIME_ERROR 0x0600 +#define E_CLASS_DIRECTSHOW 0x0700 +#define E_CLASS_FREE 0x0700 + + +/* Complete DRV error code for the general class */ +#define ED_GN (ERROR_VALUE | E_SOURCE_DRV | E_CLASS_GENERAL) +#define ED_CONCURRENCY (ED_GN | 0x01) +#define ED_DSP_CRASHED (ED_GN | 0x02) +#define ED_UNKNOWN_BOARD (ED_GN | 0x03) +#define ED_NOT_INSTALLED (ED_GN | 0x04) +#define ED_CANNOT_OPEN_SVC_MANAGER (ED_GN | 0x05) +#define ED_CANNOT_READ_REGISTRY (ED_GN | 0x06) +#define ED_DSP_VERSION_MISMATCH (ED_GN | 0x07) +#define ED_UNAVAILABLE_FEATURE (ED_GN | 0x08) +#define ED_CANCELLED (ED_GN | 0x09) +#define ED_NO_RESPONSE_AT_IRQA (ED_GN | 0x10) +#define ED_INVALID_ADDRESS (ED_GN | 0x11) +#define ED_DSP_CORRUPTED (ED_GN | 0x12) +#define ED_PENDING_OPERATION (ED_GN | 0x13) +#define ED_NET_ALLOCATE_MEMORY_IMPOSSIBLE (ED_GN | 0x14) +#define ED_NET_REGISTER_ERROR (ED_GN | 0x15) +#define ED_NET_THREAD_ERROR (ED_GN | 0x16) +#define ED_NET_OPEN_ERROR (ED_GN | 0x17) +#define ED_NET_CLOSE_ERROR (ED_GN | 0x18) +#define ED_NET_NO_MORE_PACKET (ED_GN | 0x19) +#define ED_NET_NO_MORE_BUFFER (ED_GN | 0x1A) +#define ED_NET_SEND_ERROR (ED_GN | 0x1B) +#define ED_NET_RECEIVE_ERROR (ED_GN | 0x1C) +#define ED_NET_WRONG_MSG_SIZE (ED_GN | 0x1D) +#define ED_NET_WAIT_ERROR (ED_GN | 0x1E) +#define ED_NET_EEPROM_ERROR (ED_GN | 0x1F) +#define ED_INVALID_RS232_COM_NUMBER (ED_GN | 0x20) +#define ED_INVALID_RS232_INIT (ED_GN | 0x21) +#define ED_FILE_ERROR (ED_GN | 0x22) +#define ED_INVALID_GPIO_CMD (ED_GN | 0x23) +#define ED_RS232_ALREADY_OPENED (ED_GN | 0x24) +#define ED_RS232_NOT_OPENED (ED_GN | 0x25) +#define ED_GPIO_ALREADY_OPENED (ED_GN | 0x26) +#define ED_GPIO_NOT_OPENED (ED_GN | 0x27) +#define ED_REGISTRY_ERROR (ED_GN | 0x28) /* <- NCX */ +#define ED_INVALID_SERVICE (ED_GN | 0x29) /* <- NCX */ + +#define ED_READ_FILE_ALREADY_OPENED (ED_GN | 0x2a) /* <- Decalage + * pour RCX + * (old 0x28) + * */ +#define ED_READ_FILE_INVALID_COMMAND (ED_GN | 0x2b) /* ~ */ +#define ED_READ_FILE_INVALID_PARAMETER (ED_GN | 0x2c) /* ~ */ +#define ED_READ_FILE_ALREADY_CLOSED (ED_GN | 0x2d) /* ~ */ +#define ED_READ_FILE_NO_INFORMATION (ED_GN | 0x2e) /* ~ */ +#define ED_READ_FILE_INVALID_HANDLE (ED_GN | 0x2f) /* ~ */ +#define ED_READ_FILE_END_OF_FILE (ED_GN | 0x30) /* ~ */ +#define ED_READ_FILE_ERROR (ED_GN | 0x31) /* ~ */ + +#define ED_DSP_CRASHED_EXC_DSPSTACK_OVERFLOW (ED_GN | 0x32) /* <- Decalage pour + * PCX (old 0x14) */ +#define ED_DSP_CRASHED_EXC_SYSSTACK_OVERFLOW (ED_GN | 0x33) /* ~ */ +#define ED_DSP_CRASHED_EXC_ILLEGAL (ED_GN | 0x34) /* ~ */ +#define ED_DSP_CRASHED_EXC_TIMER_REENTRY (ED_GN | 0x35) /* ~ */ +#define ED_DSP_CRASHED_EXC_FATAL_ERROR (ED_GN | 0x36) /* ~ */ + +#define ED_FLASH_PCCARD_NOT_PRESENT (ED_GN | 0x37) + +#define ED_NO_CURRENT_CLOCK (ED_GN | 0x38) + +/* Complete DRV error code for real time class */ +#define ED_RT (ERROR_VALUE | E_SOURCE_DRV | E_CLASS_REAL_TIME_ERROR) +#define ED_DSP_TIMED_OUT (ED_RT | 0x01) +#define ED_DSP_CHK_TIMED_OUT (ED_RT | 0x02) +#define ED_STREAM_OVERRUN (ED_RT | 0x03) +#define ED_DSP_BUSY (ED_RT | 0x04) +#define ED_DSP_SEMAPHORE_TIME_OUT (ED_RT | 0x05) +#define ED_BOARD_TIME_OUT (ED_RT | 0x06) +#define ED_XILINX_ERROR (ED_RT | 0x07) +#define ED_COBRANET_ITF_NOT_RESPONDING (ED_RT | 0x08) + +/* Complete BOARD error code for the invaid standard object class */ +#define EB_ISO (ERROR_VALUE | E_SOURCE_BOARD | \ + E_CLASS_INVALID_STD_OBJECT) +#define EB_INVALID_EFFECT (EB_ISO | 0x00) +#define EB_INVALID_PIPE (EB_ISO | 0x40) +#define EB_INVALID_STREAM (EB_ISO | 0x80) +#define EB_INVALID_AUDIO (EB_ISO | 0xC0) + +/* Complete BOARD error code for impossible resource allocation class */ +#define EB_RI (ERROR_VALUE | E_SOURCE_BOARD | E_CLASS_RSRC_IMPOSSIBLE) +#define EB_ALLOCATE_ALL_STREAM_TRANSFERT_BUFFERS_IMPOSSIBLE (EB_RI | 0x01) +#define EB_ALLOCATE_PIPE_SAMPLE_BUFFER_IMPOSSIBLE (EB_RI | 0x02) + +#define EB_ALLOCATE_MEM_STREAM_IMPOSSIBLE \ + EB_ALLOCATE_ALL_STREAM_TRANSFERT_BUFFERS_IMPOSSIBLE +#define EB_ALLOCATE_MEM_PIPE_IMPOSSIBLE \ + EB_ALLOCATE_PIPE_SAMPLE_BUFFER_IMPOSSIBLE + +#define EB_ALLOCATE_DIFFERED_CMD_IMPOSSIBLE (EB_RI | 0x03) +#define EB_TOO_MANY_DIFFERED_CMD (EB_RI | 0x04) +#define EB_RBUFFERS_TABLE_OVERFLOW (EB_RI | 0x05) +#define EB_ALLOCATE_EFFECTS_IMPOSSIBLE (EB_RI | 0x08) +#define EB_ALLOCATE_EFFECT_POS_IMPOSSIBLE (EB_RI | 0x09) +#define EB_RBUFFER_NOT_AVAILABLE (EB_RI | 0x0A) +#define EB_ALLOCATE_CONTEXT_LIII_IMPOSSIBLE (EB_RI | 0x0B) +#define EB_STATUS_DIALOG_IMPOSSIBLE (EB_RI | 0x1D) +#define EB_CONTROL_CMD_IMPOSSIBLE (EB_RI | 0x1E) +#define EB_STATUS_SEND_IMPOSSIBLE (EB_RI | 0x1F) +#define EB_ALLOCATE_PIPE_IMPOSSIBLE (EB_RI | 0x40) +#define EB_ALLOCATE_STREAM_IMPOSSIBLE (EB_RI | 0x80) +#define EB_ALLOCATE_AUDIO_IMPOSSIBLE (EB_RI | 0xC0) + +/* Complete BOARD error code for wrong call context class */ +#define EB_WCC (ERROR_VALUE | E_SOURCE_BOARD | E_CLASS_WRONG_CONTEXT) +#define EB_CMD_REFUSED (EB_WCC | 0x00) +#define EB_START_STREAM_REFUSED (EB_WCC | 0xFC) +#define EB_SPC_REFUSED (EB_WCC | 0xFD) +#define EB_CSN_REFUSED (EB_WCC | 0xFE) +#define EB_CSE_REFUSED (EB_WCC | 0xFF) + + + + +#endif /* LX_DEFS_H */ -- cgit v0.10.2 From 7852fd08fdc78bf43150137bdbfdfdccdefffe0f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 14 Apr 2009 12:25:52 +0200 Subject: ALSA: lx6464es - Use snd_card_create() Use snd_card_create() instead of the obsoleted snd_card_new(). Signed-off-by: Takashi Iwai diff --git a/sound/pci/lx6464es/lx6464es.c b/sound/pci/lx6464es/lx6464es.c index 7bc8b8c..870bfc5 100644 --- a/sound/pci/lx6464es/lx6464es.c +++ b/sound/pci/lx6464es/lx6464es.c @@ -1091,9 +1091,9 @@ static int __devinit snd_lx6464es_probe(struct pci_dev *pci, return -ENOENT; } - card = snd_card_new(index[dev], id[dev], THIS_MODULE, 0); - if (card == NULL) - return -ENOMEM; + err = snd_card_create(index[dev], id[dev], THIS_MODULE, 0, &card); + if (err < 0) + return err; err = snd_lx6464es_create(card, pci, &chip); if (err < 0) { -- cgit v0.10.2 From d7dee4d7744d039bf28e4f6d4f5674f44820265a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 14 Apr 2009 12:27:12 +0200 Subject: ALSA: lx6464es - Disable lx_message_send() Disable lx_message_send() function temporarily as it's not used anywhere. Signed-off-by: Takashi Iwai diff --git a/sound/pci/lx6464es/lx_core.c b/sound/pci/lx6464es/lx_core.c index a9f8f88..5812780 100644 --- a/sound/pci/lx6464es/lx_core.c +++ b/sound/pci/lx6464es/lx_core.c @@ -314,6 +314,7 @@ static inline void lx_message_dump(struct lx_rmh *rmh) #define XILINX_POLL_NO_SLEEP 100 #define XILINX_POLL_ITERATIONS 150 +#if 0 /* not used now */ static int lx_message_send(struct lx6464es *chip, struct lx_rmh *rmh) { u32 reg = ED_DSP_TIMED_OUT; @@ -404,6 +405,7 @@ polling_successful: lx_message_dump(rmh); return 0; } +#endif /* not used now */ static int lx_message_send_atomic(struct lx6464es *chip, struct lx_rmh *rmh) { -- cgit v0.10.2 From 8338c300642f67af5a8cc279ca5e088c7055b7eb Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 14 Apr 2009 12:30:36 +0200 Subject: ALSA: Add missing description of lx6464es to ALSA-Configuration.txt Signed-off-by: Takashi Iwai diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt index 012858d..dfd266e 100644 --- a/Documentation/sound/alsa/ALSA-Configuration.txt +++ b/Documentation/sound/alsa/ALSA-Configuration.txt @@ -1093,6 +1093,13 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed. This module supports multiple cards. The driver requires the firmware loader support on kernel. + Module snd-lx6464es + ------------------- + + Module for Digigram LX6464ES boards + + This module supports multiple cards. + Module snd-maestro3 ------------------- -- cgit v0.10.2 From 7e05575c422d45f393c2d9b5900e97a30bf69bea Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 14 Apr 2009 12:12:29 +0900 Subject: x86: calgary: remove IOMMU_DEBUG CONFIG_IOMMU_DEBUG has depends on CONFIG_GART_IOMMU: config IOMMU_DEBUG bool "Enable IOMMU debugging" depends on GART_IOMMU && DEBUG_KERNEL depends on X86_64 So it's not useful to have CONFIG_IOMMU_DEBUG in Calgary IOMMU code, which does the extra checking of the bitmap space management. And Calgary uses the iommu helper for the bitmap space management now so it would be better to have the extra checking feature in the iommu helper rather than Calgary code (if necessary). Signed-off-by: FUJITA Tomonori Acked-by: Muli Ben-Yehuda Cc: Joerg Roedel Cc: alexisb@us.ibm.com LKML-Reference: <20090414120827G.fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 755c21e..971a3be 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -186,37 +186,6 @@ static struct cal_chipset_ops calioc2_chip_ops = { static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, }; -/* enable this to stress test the chip's TCE cache */ -#ifdef CONFIG_IOMMU_DEBUG -static int debugging = 1; - -static inline unsigned long verify_bit_range(unsigned long* bitmap, - int expected, unsigned long start, unsigned long end) -{ - unsigned long idx = start; - - BUG_ON(start >= end); - - while (idx < end) { - if (!!test_bit(idx, bitmap) != expected) - return idx; - ++idx; - } - - /* all bits have the expected value */ - return ~0UL; -} -#else /* debugging is disabled */ -static int debugging; - -static inline unsigned long verify_bit_range(unsigned long* bitmap, - int expected, unsigned long start, unsigned long end) -{ - return ~0UL; -} - -#endif /* CONFIG_IOMMU_DEBUG */ - static inline int translation_enabled(struct iommu_table *tbl) { /* only PHBs with translation enabled have an IOMMU table */ @@ -228,7 +197,6 @@ static void iommu_range_reserve(struct iommu_table *tbl, { unsigned long index; unsigned long end; - unsigned long badbit; unsigned long flags; index = start_addr >> PAGE_SHIFT; @@ -243,14 +211,6 @@ static void iommu_range_reserve(struct iommu_table *tbl, spin_lock_irqsave(&tbl->it_lock, flags); - badbit = verify_bit_range(tbl->it_map, 0, index, end); - if (badbit != ~0UL) { - if (printk_ratelimit()) - printk(KERN_ERR "Calgary: entry already allocated at " - "0x%lx tbl %p dma 0x%lx npages %u\n", - badbit, tbl, start_addr, npages); - } - iommu_area_reserve(tbl->it_map, index, npages); spin_unlock_irqrestore(&tbl->it_lock, flags); @@ -326,7 +286,6 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, unsigned int npages) { unsigned long entry; - unsigned long badbit; unsigned long badend; unsigned long flags; @@ -346,14 +305,6 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, spin_lock_irqsave(&tbl->it_lock, flags); - badbit = verify_bit_range(tbl->it_map, 1, entry, entry + npages); - if (badbit != ~0UL) { - if (printk_ratelimit()) - printk(KERN_ERR "Calgary: bit is off at 0x%lx " - "tbl %p dma 0x%Lx entry 0x%lx npages %u\n", - badbit, tbl, dma_addr, entry, npages); - } - iommu_area_free(tbl->it_map, entry, npages); spin_unlock_irqrestore(&tbl->it_lock, flags); @@ -1488,9 +1439,8 @@ void __init detect_calgary(void) iommu_detected = 1; calgary_detected = 1; printk(KERN_INFO "PCI-DMA: Calgary IOMMU detected.\n"); - printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d, " - "CONFIG_IOMMU_DEBUG is %s.\n", specified_table_size, - debugging ? "enabled" : "disabled"); + printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n", + specified_table_size); /* swiotlb for devices that aren't behind the Calgary. */ if (max_pfn > MAX_DMA32_PFN) -- cgit v0.10.2 From c282866101bfde888a44da3babd2f9ab265ca6f9 Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Sat, 4 Apr 2009 14:48:32 +0200 Subject: ALSA: sc6000: add support for SC-6600 and SC-7000 Add support for later cards based on CompuMedia ASC-9408 chipsets. These cards were produced by Gallant. This patch make the OSS aedsp16 driver redundant. Signed-off-by: Krzysztof Helt Signed-off-by: Takashi Iwai diff --git a/sound/isa/Kconfig b/sound/isa/Kconfig index c5c9a92..64129bf 100644 --- a/sound/isa/Kconfig +++ b/sound/isa/Kconfig @@ -177,15 +177,18 @@ config SND_ES18XX will be called snd-es18xx. config SND_SC6000 - tristate "Gallant SC-6000, Audio Excel DSP 16" + tristate "Gallant SC-6000/6600/7000 and Audio Excel DSP 16" depends on HAS_IOPORT select SND_WSS_LIB select SND_OPL3_LIB select SND_MPU401_UART help - Say Y here to include support for Gallant SC-6000 card and clones: + Say Y here to include support for Gallant SC-6000, SC-6600, SC-7000 + cards and clones: Audio Excel DSP 16 and Zoltrix AV302. + These cards are based on CompuMedia ASC-9308 or ASC-9408 chips. + To compile this driver as a module, choose M here: the module will be called snd-sc6000. diff --git a/sound/isa/sc6000.c b/sound/isa/sc6000.c index 7820106..983ab7e 100644 --- a/sound/isa/sc6000.c +++ b/sound/isa/sc6000.c @@ -2,6 +2,8 @@ * Driver for Gallant SC-6000 soundcard. This card is also known as * Audio Excel DSP 16 or Zoltrix AV302. * These cards use CompuMedia ASC-9308 chip + AD1848 codec. + * SC-6600 and SC-7000 cards are also supported. They are based on + * CompuMedia ASC-9408 chip and CS4231 codec. * * Copyright (C) 2007 Krzysztof Helt * @@ -191,7 +193,7 @@ static __devinit unsigned char sc6000_mpu_irq_to_softcfg(int mpu_irq) return val; } -static __devinit int sc6000_wait_data(char __iomem *vport) +static int sc6000_wait_data(char __iomem *vport) { int loop = 1000; unsigned char val = 0; @@ -206,7 +208,7 @@ static __devinit int sc6000_wait_data(char __iomem *vport) return -EAGAIN; } -static __devinit int sc6000_read(char __iomem *vport) +static int sc6000_read(char __iomem *vport) { if (sc6000_wait_data(vport)) return -EBUSY; @@ -215,7 +217,7 @@ static __devinit int sc6000_read(char __iomem *vport) } -static __devinit int sc6000_write(char __iomem *vport, int cmd) +static int sc6000_write(char __iomem *vport, int cmd) { unsigned char val; int loop = 500000; @@ -276,8 +278,33 @@ static int __devinit sc6000_dsp_reset(char __iomem *vport) } /* detection and initialization */ -static int __devinit sc6000_cfg_write(char __iomem *vport, - unsigned char softcfg) +static int __devinit sc6000_hw_cfg_write(char __iomem *vport, const int *cfg) +{ + if (sc6000_write(vport, COMMAND_6C) < 0) { + snd_printk(KERN_WARNING "CMD 0x%x: failed!\n", COMMAND_6C); + return -EIO; + } + if (sc6000_write(vport, COMMAND_5C) < 0) { + snd_printk(KERN_ERR "CMD 0x%x: failed!\n", COMMAND_5C); + return -EIO; + } + if (sc6000_write(vport, cfg[0]) < 0) { + snd_printk(KERN_ERR "DATA 0x%x: failed!\n", cfg[0]); + return -EIO; + } + if (sc6000_write(vport, cfg[1]) < 0) { + snd_printk(KERN_ERR "DATA 0x%x: failed!\n", cfg[1]); + return -EIO; + } + if (sc6000_write(vport, COMMAND_C5) < 0) { + snd_printk(KERN_ERR "CMD 0x%x: failed!\n", COMMAND_C5); + return -EIO; + } + + return 0; +} + +static int sc6000_cfg_write(char __iomem *vport, unsigned char softcfg) { if (sc6000_write(vport, WRITE_MDIRQ_CFG)) { @@ -291,7 +318,7 @@ static int __devinit sc6000_cfg_write(char __iomem *vport, return 0; } -static int __devinit sc6000_setup_board(char __iomem *vport, int config) +static int sc6000_setup_board(char __iomem *vport, int config) { int loop = 10; @@ -334,16 +361,38 @@ static int __devinit sc6000_init_mss(char __iomem *vport, int config, return 0; } -static int __devinit sc6000_init_board(char __iomem *vport, int irq, int dma, - char __iomem *vmss_port, int mpu_irq) +static void __devinit sc6000_hw_cfg_encode(char __iomem *vport, int *cfg, + long xport, long xmpu, + long xmss_port) +{ + cfg[0] = 0; + cfg[1] = 0; + if (xport == 0x240) + cfg[0] |= 1; + if (xmpu != SNDRV_AUTO_PORT) { + cfg[0] |= (xmpu & 0x30) >> 2; + cfg[1] |= 0x20; + } + if (xmss_port == 0xe80) + cfg[0] |= 0x10; + cfg[0] |= 0x40; /* always set */ + cfg[1] |= 0x80; /* enable WSS system */ + cfg[1] &= ~0x40; /* disable IDE */ + snd_printd("hw cfg %x, %x\n", cfg[0], cfg[1]); +} + +static int __devinit sc6000_init_board(char __iomem *vport, + char __iomem *vmss_port, int dev) { char answer[15]; char version[2]; - int mss_config = sc6000_irq_to_softcfg(irq) | - sc6000_dma_to_softcfg(dma); + int mss_config = sc6000_irq_to_softcfg(irq[dev]) | + sc6000_dma_to_softcfg(dma[dev]); int config = mss_config | - sc6000_mpu_irq_to_softcfg(mpu_irq); + sc6000_mpu_irq_to_softcfg(mpu_irq[dev]); int err; + int cfg[2]; + int old = 0; err = sc6000_dsp_reset(vport); if (err < 0) { @@ -360,7 +409,6 @@ static int __devinit sc6000_init_board(char __iomem *vport, int irq, int dma, /* * My SC-6000 card return "SC-6000" in DSPCopyright, so * if we have something different, we have to be warned. - * Mine returns "SC-6000A " - KH */ if (strncmp("SC-6000", answer, 7)) snd_printk(KERN_WARNING "Warning: non SC-6000 audio card!\n"); @@ -372,13 +420,29 @@ static int __devinit sc6000_init_board(char __iomem *vport, int irq, int dma, printk(KERN_INFO PFX "Detected model: %s, DSP version %d.%d\n", answer, version[0], version[1]); - /* - * 0x0A == (IRQ 7, DMA 1, MIRQ 0) - */ - err = sc6000_cfg_write(vport, 0x0a); + /* set configuration */ + sc6000_hw_cfg_encode(vport, &cfg[0], port[dev], mpu_port[dev], + mss_port[dev]); + if (sc6000_hw_cfg_write(vport, cfg) < 0) { + snd_printk(KERN_ERR "sc6000_hw_cfg_write: failed!\n"); + return -EIO; + } + err = sc6000_setup_board(vport, config); if (err < 0) { - snd_printk(KERN_ERR "sc6000_cfg_write: failed!\n"); - return -EFAULT; + snd_printk(KERN_ERR "sc6000_setup_board: failed!\n"); + return -ENODEV; + } + + sc6000_dsp_reset(vport); + sc6000_write(vport, COMMAND_5C); + if (sc6000_read(vport) < 0) + old = 1; + sc6000_dsp_reset(vport); + + if (!old) { + sc6000_write(vport, COMMAND_60); + sc6000_write(vport, 0x02); + sc6000_dsp_reset(vport); } err = sc6000_setup_board(vport, config); @@ -386,10 +450,9 @@ static int __devinit sc6000_init_board(char __iomem *vport, int irq, int dma, snd_printk(KERN_ERR "sc6000_setup_board: failed!\n"); return -ENODEV; } - err = sc6000_init_mss(vport, config, vmss_port, mss_config); if (err < 0) { - snd_printk(KERN_ERR "Can not initialize " + snd_printk(KERN_ERR "Cannot initialize " "Microsoft Sound System mode.\n"); return -ENODEV; } @@ -485,14 +548,16 @@ static int __devinit snd_sc6000_probe(struct device *devptr, unsigned int dev) struct snd_card *card; struct snd_wss *chip; struct snd_opl3 *opl3; - char __iomem *vport; + char __iomem **vport; char __iomem *vmss_port; - err = snd_card_create(index[dev], id[dev], THIS_MODULE, 0, &card); + err = snd_card_create(index[dev], id[dev], THIS_MODULE, sizeof(vport), + &card); if (err < 0) return err; + vport = card->private_data; if (xirq == SNDRV_AUTO_IRQ) { xirq = snd_legacy_find_free_irq(possible_irqs); if (xirq < 0) { @@ -517,8 +582,8 @@ static int __devinit snd_sc6000_probe(struct device *devptr, unsigned int dev) err = -EBUSY; goto err_exit; } - vport = devm_ioport_map(devptr, port[dev], 0x10); - if (!vport) { + *vport = devm_ioport_map(devptr, port[dev], 0x10); + if (*vport == NULL) { snd_printk(KERN_ERR PFX "I/O port cannot be iomaped.\n"); err = -EBUSY; @@ -533,7 +598,7 @@ static int __devinit snd_sc6000_probe(struct device *devptr, unsigned int dev) goto err_unmap1; } vmss_port = devm_ioport_map(devptr, mss_port[dev], 4); - if (!vport) { + if (!vmss_port) { snd_printk(KERN_ERR PFX "MSS port I/O cannot be iomaped.\n"); err = -EBUSY; @@ -544,7 +609,7 @@ static int __devinit snd_sc6000_probe(struct device *devptr, unsigned int dev) port[dev], xirq, xdma, mpu_irq[dev] == SNDRV_AUTO_IRQ ? 0 : mpu_irq[dev]); - err = sc6000_init_board(vport, xirq, xdma, vmss_port, mpu_irq[dev]); + err = sc6000_init_board(*vport, vmss_port, dev); if (err < 0) goto err_unmap2; @@ -552,7 +617,6 @@ static int __devinit snd_sc6000_probe(struct device *devptr, unsigned int dev) WSS_HW_DETECT, 0, &chip); if (err < 0) goto err_unmap2; - card->private_data = chip; err = snd_wss_pcm(chip, 0, NULL); if (err < 0) { @@ -608,6 +672,7 @@ static int __devinit snd_sc6000_probe(struct device *devptr, unsigned int dev) return 0; err_unmap2: + sc6000_setup_board(*vport, 0); release_region(mss_port[dev], 4); err_unmap1: release_region(port[dev], 0x10); @@ -618,11 +683,17 @@ err_exit: static int __devexit snd_sc6000_remove(struct device *devptr, unsigned int dev) { + struct snd_card *card = dev_get_drvdata(devptr); + char __iomem **vport = card->private_data; + + if (sc6000_setup_board(*vport, 0) < 0) + snd_printk(KERN_WARNING "sc6000_setup_board failed on exit!\n"); + release_region(port[dev], 0x10); release_region(mss_port[dev], 4); - snd_card_free(dev_get_drvdata(devptr)); dev_set_drvdata(devptr, NULL); + snd_card_free(card); return 0; } -- cgit v0.10.2 From ea20d9293ce423a39717ed4375393129a2e701f9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 10 Apr 2009 08:54:16 -0400 Subject: tracing: consolidate trace and trace_event headers Impact: clean up Neil Horman (et. al.) criticized the way the trace events were broken up into two files. The reason for that was that ftrace needed to separate out the declarations from where the #include was used. It then dawned on me that the tracepoint.h header only needs to define the TRACE_EVENT macro if it is not already defined. The solution is simply to test if TRACE_EVENT is defined, and if it is not then the linux/tracepoint.h header can define it. This change consolidates all the .h and _event_types.h into the .h file. Reported-by: Neil Horman Reported-by: Theodore Tso Reported-by: Jiaying Zhang Cc: Zhaolei Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Jason Baron Cc: Mathieu Desnoyers Signed-off-by: Steven Rostedt diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index d35a7ee..4353f3f 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -31,6 +31,8 @@ struct tracepoint { * Keep in sync with vmlinux.lds.h. */ +#ifndef DECLARE_TRACE + #define TP_PROTO(args...) args #define TP_ARGS(args...) args @@ -114,6 +116,7 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end) { } #endif /* CONFIG_TRACEPOINTS */ +#endif /* DECLARE_TRACE */ /* * Connect a probe to a tracepoint. @@ -154,10 +157,13 @@ static inline void tracepoint_synchronize_unregister(void) } #define PARAMS(args...) args + +#ifndef TRACE_FORMAT #define TRACE_FORMAT(name, proto, args, fmt) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) +#endif - +#ifndef TRACE_EVENT /* * For use with the TRACE_EVENT macro: * @@ -262,5 +268,6 @@ static inline void tracepoint_synchronize_unregister(void) #define TRACE_EVENT(name, proto, args, struct, assign, print) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) +#endif #endif diff --git a/include/trace/irq.h b/include/trace/irq.h index ff5d449..04ab4c6 100644 --- a/include/trace/irq.h +++ b/include/trace/irq.h @@ -1,9 +1,54 @@ -#ifndef _TRACE_IRQ_H +#if !defined(_TRACE_IRQ_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_IRQ_H -#include #include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM irq + +/* + * Tracepoint for entry of interrupt handler: + */ +TRACE_FORMAT(irq_handler_entry, + TP_PROTO(int irq, struct irqaction *action), + TP_ARGS(irq, action), + TP_FMT("irq=%d handler=%s", irq, action->name) + ); + +/* + * Tracepoint for return of an interrupt handler: + */ +TRACE_EVENT(irq_handler_exit, + + TP_PROTO(int irq, struct irqaction *action, int ret), + + TP_ARGS(irq, action, ret), + + TP_STRUCT__entry( + __field( int, irq ) + __field( int, ret ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->ret = ret; + ), + + TP_printk("irq=%d return=%s", + __entry->irq, __entry->ret ? "handled" : "unhandled") +); + +TRACE_FORMAT(softirq_entry, + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + TP_ARGS(h, vec), + TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) + ); -#include +TRACE_FORMAT(softirq_exit, + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + TP_ARGS(h, vec), + TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) + ); #endif diff --git a/include/trace/irq_event_types.h b/include/trace/irq_event_types.h deleted file mode 100644 index 85964eb..0000000 --- a/include/trace/irq_event_types.h +++ /dev/null @@ -1,55 +0,0 @@ - -/* use instead */ -#ifndef TRACE_FORMAT -# error Do not include this file directly. -# error Unless you know what you are doing. -#endif - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM irq - -/* - * Tracepoint for entry of interrupt handler: - */ -TRACE_FORMAT(irq_handler_entry, - TP_PROTO(int irq, struct irqaction *action), - TP_ARGS(irq, action), - TP_FMT("irq=%d handler=%s", irq, action->name) - ); - -/* - * Tracepoint for return of an interrupt handler: - */ -TRACE_EVENT(irq_handler_exit, - - TP_PROTO(int irq, struct irqaction *action, int ret), - - TP_ARGS(irq, action, ret), - - TP_STRUCT__entry( - __field( int, irq ) - __field( int, ret ) - ), - - TP_fast_assign( - __entry->irq = irq; - __entry->ret = ret; - ), - - TP_printk("irq=%d return=%s", - __entry->irq, __entry->ret ? "handled" : "unhandled") -); - -TRACE_FORMAT(softirq_entry, - TP_PROTO(struct softirq_action *h, struct softirq_action *vec), - TP_ARGS(h, vec), - TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) - ); - -TRACE_FORMAT(softirq_exit, - TP_PROTO(struct softirq_action *h, struct softirq_action *vec), - TP_ARGS(h, vec), - TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) - ); - -#undef TRACE_SYSTEM diff --git a/include/trace/kmem.h b/include/trace/kmem.h index 46efc24..d7d1218 100644 --- a/include/trace/kmem.h +++ b/include/trace/kmem.h @@ -1,9 +1,192 @@ -#ifndef _TRACE_KMEM_H +#if !defined(_TRACE_KMEM_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_KMEM_H #include #include -#include +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kmem -#endif /* _TRACE_KMEM_H */ +TRACE_EVENT(kmalloc, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags) +); + +TRACE_EVENT(kmem_cache_alloc, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags) +); + +TRACE_EVENT(kmalloc_node, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + __field( int, node ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + __entry->node = node; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags, + __entry->node) +); + +TRACE_EVENT(kmem_cache_alloc_node, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + __field( int, node ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + __entry->node = node; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags, + __entry->node) +); + +TRACE_EVENT(kfree, + + TP_PROTO(unsigned long call_site, const void *ptr), + + TP_ARGS(call_site, ptr), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + ), + + TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) +); + +TRACE_EVENT(kmem_cache_free, + + TP_PROTO(unsigned long call_site, const void *ptr), + + TP_ARGS(call_site, ptr), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + ), + + TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) +); + +#endif diff --git a/include/trace/lockdep.h b/include/trace/lockdep.h index 5ca67df..8ee7900 100644 --- a/include/trace/lockdep.h +++ b/include/trace/lockdep.h @@ -1,9 +1,57 @@ -#ifndef _TRACE_LOCKDEP_H +#if !defined(_TRACE_LOCKDEP_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_LOCKDEP_H #include #include -#include +#undef TRACE_SYSTEM +#define TRACE_SYSTEM lock + +#ifdef CONFIG_LOCKDEP + +TRACE_FORMAT(lock_acquire, + TP_PROTO(struct lockdep_map *lock, unsigned int subclass, + int trylock, int read, int check, + struct lockdep_map *next_lock, unsigned long ip), + TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), + TP_FMT("%s%s%s", trylock ? "try " : "", + read ? "read " : "", lock->name) + ); + +TRACE_FORMAT(lock_release, + TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), + TP_ARGS(lock, nested, ip), + TP_FMT("%s", lock->name) + ); + +#ifdef CONFIG_LOCK_STAT + +TRACE_FORMAT(lock_contended, + TP_PROTO(struct lockdep_map *lock, unsigned long ip), + TP_ARGS(lock, ip), + TP_FMT("%s", lock->name) + ); + +TRACE_EVENT(lock_acquired, + TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime), + + TP_ARGS(lock, ip, waittime), + + TP_STRUCT__entry( + __field(const char *, name) + __field(unsigned long, wait_usec) + __field(unsigned long, wait_nsec_rem) + ), + TP_fast_assign( + __entry->name = lock->name; + __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC); + __entry->wait_usec = (unsigned long) waittime; + ), + TP_printk("%s (%lu.%03lu us)", __entry->name, __entry->wait_usec, + __entry->wait_nsec_rem) +); #endif +#endif + +#endif /* _TRACE_LOCKDEP_H */ diff --git a/include/trace/lockdep_event_types.h b/include/trace/lockdep_event_types.h deleted file mode 100644 index 863f1e4..0000000 --- a/include/trace/lockdep_event_types.h +++ /dev/null @@ -1,57 +0,0 @@ - -#ifndef TRACE_FORMAT -# error Do not include this file directly. -# error Unless you know what you are doing. -#endif - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM lock - -#ifdef CONFIG_LOCKDEP - -TRACE_FORMAT(lock_acquire, - TP_PROTO(struct lockdep_map *lock, unsigned int subclass, - int trylock, int read, int check, - struct lockdep_map *next_lock, unsigned long ip), - TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), - TP_FMT("%s%s%s", trylock ? "try " : "", - read ? "read " : "", lock->name) - ); - -TRACE_FORMAT(lock_release, - TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), - TP_ARGS(lock, nested, ip), - TP_FMT("%s", lock->name) - ); - -#ifdef CONFIG_LOCK_STAT - -TRACE_FORMAT(lock_contended, - TP_PROTO(struct lockdep_map *lock, unsigned long ip), - TP_ARGS(lock, ip), - TP_FMT("%s", lock->name) - ); - -TRACE_EVENT(lock_acquired, - TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime), - - TP_ARGS(lock, ip, waittime), - - TP_STRUCT__entry( - __field(const char *, name) - __field(unsigned long, wait_usec) - __field(unsigned long, wait_nsec_rem) - ), - TP_fast_assign( - __entry->name = lock->name; - __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC); - __entry->wait_usec = (unsigned long) waittime; - ), - TP_printk("%s (%lu.%03lu us)", __entry->name, __entry->wait_usec, - __entry->wait_nsec_rem) -); - -#endif -#endif - -#undef TRACE_SYSTEM diff --git a/include/trace/sched.h b/include/trace/sched.h index 4e372a1..5b1cf4a 100644 --- a/include/trace/sched.h +++ b/include/trace/sched.h @@ -1,9 +1,336 @@ -#ifndef _TRACE_SCHED_H +#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_SCHED_H #include #include -#include +#undef TRACE_SYSTEM +#define TRACE_SYSTEM sched -#endif +/* + * Tracepoint for calling kthread_stop, performed to end a kthread: + */ +TRACE_EVENT(sched_kthread_stop, + + TP_PROTO(struct task_struct *t), + + TP_ARGS(t), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + ), + + TP_fast_assign( + memcpy(__entry->comm, t->comm, TASK_COMM_LEN); + __entry->pid = t->pid; + ), + + TP_printk("task %s:%d", __entry->comm, __entry->pid) +); + +/* + * Tracepoint for the return value of the kthread stopping: + */ +TRACE_EVENT(sched_kthread_stop_ret, + + TP_PROTO(int ret), + + TP_ARGS(ret), + + TP_STRUCT__entry( + __field( int, ret ) + ), + + TP_fast_assign( + __entry->ret = ret; + ), + + TP_printk("ret %d", __entry->ret) +); + +/* + * Tracepoint for waiting on task to unschedule: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_wait_task, + + TP_PROTO(struct rq *rq, struct task_struct *p), + + TP_ARGS(rq, p), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) +); + +/* + * Tracepoint for waking up a task: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_wakeup, + + TP_PROTO(struct rq *rq, struct task_struct *p, int success), + + TP_ARGS(rq, p, success), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + __field( int, success ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + __entry->success = success; + ), + + TP_printk("task %s:%d [%d] success=%d", + __entry->comm, __entry->pid, __entry->prio, + __entry->success) +); + +/* + * Tracepoint for waking up a new task: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_wakeup_new, + + TP_PROTO(struct rq *rq, struct task_struct *p, int success), + + TP_ARGS(rq, p, success), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + __field( int, success ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + __entry->success = success; + ), + + TP_printk("task %s:%d [%d] success=%d", + __entry->comm, __entry->pid, __entry->prio, + __entry->success) +); + +/* + * Tracepoint for task switches, performed by the scheduler: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_switch, + + TP_PROTO(struct rq *rq, struct task_struct *prev, + struct task_struct *next), + + TP_ARGS(rq, prev, next), + + TP_STRUCT__entry( + __array( char, prev_comm, TASK_COMM_LEN ) + __field( pid_t, prev_pid ) + __field( int, prev_prio ) + __array( char, next_comm, TASK_COMM_LEN ) + __field( pid_t, next_pid ) + __field( int, next_prio ) + ), + + TP_fast_assign( + memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); + __entry->prev_pid = prev->pid; + __entry->prev_prio = prev->prio; + memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); + __entry->next_pid = next->pid; + __entry->next_prio = next->prio; + ), + + TP_printk("task %s:%d [%d] ==> %s:%d [%d]", + __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, + __entry->next_comm, __entry->next_pid, __entry->next_prio) +); + +/* + * Tracepoint for a task being migrated: + */ +TRACE_EVENT(sched_migrate_task, + + TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu), + + TP_ARGS(p, orig_cpu, dest_cpu), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + __field( int, orig_cpu ) + __field( int, dest_cpu ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + __entry->orig_cpu = orig_cpu; + __entry->dest_cpu = dest_cpu; + ), + + TP_printk("task %s:%d [%d] from: %d to: %d", + __entry->comm, __entry->pid, __entry->prio, + __entry->orig_cpu, __entry->dest_cpu) +); + +/* + * Tracepoint for freeing a task: + */ +TRACE_EVENT(sched_process_free, + + TP_PROTO(struct task_struct *p), + + TP_ARGS(p), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) +); + +/* + * Tracepoint for a task exiting: + */ +TRACE_EVENT(sched_process_exit, + + TP_PROTO(struct task_struct *p), + + TP_ARGS(p), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) +); + +/* + * Tracepoint for a waiting task: + */ +TRACE_EVENT(sched_process_wait, + + TP_PROTO(struct pid *pid), + + TP_ARGS(pid), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + __entry->pid = pid_nr(pid); + __entry->prio = current->prio; + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) +); + +/* + * Tracepoint for do_fork: + */ +TRACE_EVENT(sched_process_fork, + + TP_PROTO(struct task_struct *parent, struct task_struct *child), + + TP_ARGS(parent, child), + + TP_STRUCT__entry( + __array( char, parent_comm, TASK_COMM_LEN ) + __field( pid_t, parent_pid ) + __array( char, child_comm, TASK_COMM_LEN ) + __field( pid_t, child_pid ) + ), + + TP_fast_assign( + memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN); + __entry->parent_pid = parent->pid; + memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN); + __entry->child_pid = child->pid; + ), + + TP_printk("parent %s:%d child %s:%d", + __entry->parent_comm, __entry->parent_pid, + __entry->child_comm, __entry->child_pid) +); + +/* + * Tracepoint for sending a signal: + */ +TRACE_EVENT(sched_signal_send, + + TP_PROTO(int sig, struct task_struct *p), + + TP_ARGS(sig, p), + + TP_STRUCT__entry( + __field( int, sig ) + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->sig = sig; + ), + + TP_printk("sig: %d task %s:%d", + __entry->sig, __entry->comm, __entry->pid) +); + +#endif /* _TRACE_SCHED_H */ diff --git a/include/trace/sched_event_types.h b/include/trace/sched_event_types.h deleted file mode 100644 index 63547dc..0000000 --- a/include/trace/sched_event_types.h +++ /dev/null @@ -1,337 +0,0 @@ - -/* use instead */ -#ifndef TRACE_EVENT -# error Do not include this file directly. -# error Unless you know what you are doing. -#endif - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM sched - -/* - * Tracepoint for calling kthread_stop, performed to end a kthread: - */ -TRACE_EVENT(sched_kthread_stop, - - TP_PROTO(struct task_struct *t), - - TP_ARGS(t), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - ), - - TP_fast_assign( - memcpy(__entry->comm, t->comm, TASK_COMM_LEN); - __entry->pid = t->pid; - ), - - TP_printk("task %s:%d", __entry->comm, __entry->pid) -); - -/* - * Tracepoint for the return value of the kthread stopping: - */ -TRACE_EVENT(sched_kthread_stop_ret, - - TP_PROTO(int ret), - - TP_ARGS(ret), - - TP_STRUCT__entry( - __field( int, ret ) - ), - - TP_fast_assign( - __entry->ret = ret; - ), - - TP_printk("ret %d", __entry->ret) -); - -/* - * Tracepoint for waiting on task to unschedule: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) - */ -TRACE_EVENT(sched_wait_task, - - TP_PROTO(struct rq *rq, struct task_struct *p), - - TP_ARGS(rq, p), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio) -); - -/* - * Tracepoint for waking up a task: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) - */ -TRACE_EVENT(sched_wakeup, - - TP_PROTO(struct rq *rq, struct task_struct *p, int success), - - TP_ARGS(rq, p, success), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - __field( int, success ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - __entry->success = success; - ), - - TP_printk("task %s:%d [%d] success=%d", - __entry->comm, __entry->pid, __entry->prio, - __entry->success) -); - -/* - * Tracepoint for waking up a new task: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) - */ -TRACE_EVENT(sched_wakeup_new, - - TP_PROTO(struct rq *rq, struct task_struct *p, int success), - - TP_ARGS(rq, p, success), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - __field( int, success ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - __entry->success = success; - ), - - TP_printk("task %s:%d [%d] success=%d", - __entry->comm, __entry->pid, __entry->prio, - __entry->success) -); - -/* - * Tracepoint for task switches, performed by the scheduler: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) - */ -TRACE_EVENT(sched_switch, - - TP_PROTO(struct rq *rq, struct task_struct *prev, - struct task_struct *next), - - TP_ARGS(rq, prev, next), - - TP_STRUCT__entry( - __array( char, prev_comm, TASK_COMM_LEN ) - __field( pid_t, prev_pid ) - __field( int, prev_prio ) - __array( char, next_comm, TASK_COMM_LEN ) - __field( pid_t, next_pid ) - __field( int, next_prio ) - ), - - TP_fast_assign( - memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); - __entry->prev_pid = prev->pid; - __entry->prev_prio = prev->prio; - memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); - __entry->next_pid = next->pid; - __entry->next_prio = next->prio; - ), - - TP_printk("task %s:%d [%d] ==> %s:%d [%d]", - __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, - __entry->next_comm, __entry->next_pid, __entry->next_prio) -); - -/* - * Tracepoint for a task being migrated: - */ -TRACE_EVENT(sched_migrate_task, - - TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu), - - TP_ARGS(p, orig_cpu, dest_cpu), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - __field( int, orig_cpu ) - __field( int, dest_cpu ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - __entry->orig_cpu = orig_cpu; - __entry->dest_cpu = dest_cpu; - ), - - TP_printk("task %s:%d [%d] from: %d to: %d", - __entry->comm, __entry->pid, __entry->prio, - __entry->orig_cpu, __entry->dest_cpu) -); - -/* - * Tracepoint for freeing a task: - */ -TRACE_EVENT(sched_process_free, - - TP_PROTO(struct task_struct *p), - - TP_ARGS(p), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio) -); - -/* - * Tracepoint for a task exiting: - */ -TRACE_EVENT(sched_process_exit, - - TP_PROTO(struct task_struct *p), - - TP_ARGS(p), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio) -); - -/* - * Tracepoint for a waiting task: - */ -TRACE_EVENT(sched_process_wait, - - TP_PROTO(struct pid *pid), - - TP_ARGS(pid), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - __entry->pid = pid_nr(pid); - __entry->prio = current->prio; - ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio) -); - -/* - * Tracepoint for do_fork: - */ -TRACE_EVENT(sched_process_fork, - - TP_PROTO(struct task_struct *parent, struct task_struct *child), - - TP_ARGS(parent, child), - - TP_STRUCT__entry( - __array( char, parent_comm, TASK_COMM_LEN ) - __field( pid_t, parent_pid ) - __array( char, child_comm, TASK_COMM_LEN ) - __field( pid_t, child_pid ) - ), - - TP_fast_assign( - memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN); - __entry->parent_pid = parent->pid; - memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN); - __entry->child_pid = child->pid; - ), - - TP_printk("parent %s:%d child %s:%d", - __entry->parent_comm, __entry->parent_pid, - __entry->child_comm, __entry->child_pid) -); - -/* - * Tracepoint for sending a signal: - */ -TRACE_EVENT(sched_signal_send, - - TP_PROTO(int sig, struct task_struct *p), - - TP_ARGS(sig, p), - - TP_STRUCT__entry( - __field( int, sig ) - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->sig = sig; - ), - - TP_printk("sig: %d task %s:%d", - __entry->sig, __entry->comm, __entry->pid) -); - -#undef TRACE_SYSTEM diff --git a/include/trace/skb.h b/include/trace/skb.h index d2de717..e6fd281 100644 --- a/include/trace/skb.h +++ b/include/trace/skb.h @@ -1,9 +1,37 @@ -#ifndef _TRACE_SKB_H_ -#define _TRACE_SKB_H_ +#if !defined(_TRACE_SKB_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_SKB_H #include #include -#include +#undef TRACE_SYSTEM +#define TRACE_SYSTEM skb -#endif +/* + * Tracepoint for free an sk_buff: + */ +TRACE_EVENT(kfree_skb, + + TP_PROTO(struct sk_buff *skb, void *location), + + TP_ARGS(skb, location), + + TP_STRUCT__entry( + __field( void *, skbaddr ) + __field( unsigned short, protocol ) + __field( void *, location ) + ), + + TP_fast_assign( + __entry->skbaddr = skb; + if (skb) { + __entry->protocol = ntohs(skb->protocol); + } + __entry->location = location; + ), + + TP_printk("skbaddr=%p protocol=%u location=%p", + __entry->skbaddr, __entry->protocol, __entry->location) +); + +#endif /* _TRACE_SKB_H */ diff --git a/include/trace/skb_event_types.h b/include/trace/skb_event_types.h deleted file mode 100644 index 4a1c504..0000000 --- a/include/trace/skb_event_types.h +++ /dev/null @@ -1,38 +0,0 @@ - -/* use instead */ -#ifndef TRACE_EVENT -# error Do not include this file directly. -# error Unless you know what you are doing. -#endif - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM skb - -/* - * Tracepoint for free an sk_buff: - */ -TRACE_EVENT(kfree_skb, - - TP_PROTO(struct sk_buff *skb, void *location), - - TP_ARGS(skb, location), - - TP_STRUCT__entry( - __field( void *, skbaddr ) - __field( unsigned short, protocol ) - __field( void *, location ) - ), - - TP_fast_assign( - __entry->skbaddr = skb; - if (skb) { - __entry->protocol = ntohs(skb->protocol); - } - __entry->location = location; - ), - - TP_printk("skbaddr=%p protocol=%u location=%p", - __entry->skbaddr, __entry->protocol, __entry->location) -); - -#undef TRACE_SYSTEM diff --git a/include/trace/trace_event_types.h b/include/trace/trace_event_types.h deleted file mode 100644 index 552a50e..0000000 --- a/include/trace/trace_event_types.h +++ /dev/null @@ -1,7 +0,0 @@ -/* trace/_event_types.h here */ - -#include -#include -#include -#include -#include diff --git a/kernel/trace/events.c b/kernel/trace/events.c index 246f2aa..5a35a91 100644 --- a/kernel/trace/events.c +++ b/kernel/trace/events.c @@ -8,6 +8,7 @@ #include "trace_output.h" +#define TRACE_HEADER_MULTI_READ #include "trace_events_stage_1.h" #include "trace_events_stage_2.h" #include "trace_events_stage_3.h" diff --git a/kernel/trace/trace_events_stage_1.h b/kernel/trace/trace_events_stage_1.h index 38985f9..475f46a 100644 --- a/kernel/trace/trace_events_stage_1.h +++ b/kernel/trace/trace_events_stage_1.h @@ -1,7 +1,7 @@ /* * Stage 1 of the trace events. * - * Override the macros in to include the following: + * Override the macros in to include the following: * * struct ftrace_raw_ { * struct trace_entry ent; @@ -36,4 +36,4 @@ }; \ static struct ftrace_event_call event_##name -#include +#include diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h index 59cfd7d..aa4a67a 100644 --- a/kernel/trace/trace_events_stage_2.h +++ b/kernel/trace/trace_events_stage_2.h @@ -1,7 +1,7 @@ /* * Stage 2 of the trace events. * - * Override the macros in to include the following: + * Override the macros in to include the following: * * enum print_line_t * ftrace_raw_output_(struct trace_iterator *iter, int flags) @@ -64,7 +64,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ return TRACE_TYPE_HANDLED; \ } -#include +#include /* * Setup the showing format of trace point. @@ -128,7 +128,7 @@ ftrace_format_##call(struct trace_seq *s) \ return ret; \ } -#include +#include #undef __field #define __field(type, item) \ @@ -167,4 +167,4 @@ ftrace_define_fields_##call(void) \ return ret; \ } -#include +#include diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h index 5bb1b7f..45c04e1 100644 --- a/kernel/trace/trace_events_stage_3.h +++ b/kernel/trace/trace_events_stage_3.h @@ -1,7 +1,7 @@ /* * Stage 3 of the trace events. * - * Override the macros in to include the following: + * Override the macros in to include the following: * * static void ftrace_event_(proto) * { @@ -272,7 +272,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ _TRACE_PROFILE_INIT(call) \ } -#include +#include #undef _TRACE_PROFILE #undef _TRACE_PROFILE_INIT -- cgit v0.10.2 From 78ddb08feb7d4fbe3c0a9931804c51ee58be4023 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 14 Apr 2009 16:53:05 +0200 Subject: wait: don't use __wake_up_common() '777c6c5 wait: prevent exclusive waiter starvation' made __wake_up_common() global to be used from abort_exclusive_wait(). It was needed to do a wake-up with the waitqueue lock held while passing down a key to the wake-up function. Since '4ede816 epoll keyed wakeups: add __wake_up_locked_key() and __wake_up_sync_key()' there is an appropriate wrapper for this case: __wake_up_locked_key(). Use it here and make __wake_up_common() private to the scheduler again. Signed-off-by: Johannes Weiner Cc: Andrew Morton Cc: Peter Zijlstra LKML-Reference: <1239720785-19661-1-git-send-email-hannes@cmpxchg.org> Signed-off-by: Ingo Molnar diff --git a/include/linux/wait.h b/include/linux/wait.h index 5d631c1..67d4e89 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -132,8 +132,6 @@ static inline void __remove_wait_queue(wait_queue_head_t *head, list_del(&old->task_list); } -void __wake_up_common(wait_queue_head_t *q, unsigned int mode, - int nr_exclusive, int sync, void *key); void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key); void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key); void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, diff --git a/kernel/sched.c b/kernel/sched.c index 36d213b..92b4b56 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5345,7 +5345,7 @@ EXPORT_SYMBOL(default_wake_function); * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns * zero in this (rare) case, and we handle it by continuing to scan the queue. */ -void __wake_up_common(wait_queue_head_t *q, unsigned int mode, +static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, int nr_exclusive, int sync, void *key) { wait_queue_t *curr, *next; diff --git a/kernel/wait.c b/kernel/wait.c index 42a2dbc..ea7c3b4 100644 --- a/kernel/wait.c +++ b/kernel/wait.c @@ -154,7 +154,7 @@ void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, if (!list_empty(&wait->task_list)) list_del_init(&wait->task_list); else if (waitqueue_active(q)) - __wake_up_common(q, mode, 1, 0, key); + __wake_up_locked_key(q, mode, key); spin_unlock_irqrestore(&q->lock, flags); } EXPORT_SYMBOL(abort_exclusive_wait); -- cgit v0.10.2 From 56449f437add737a1e5e1cb7e00f63ac8ead1938 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 14 Apr 2009 11:24:36 +0200 Subject: tracing: make the trace clocks available generally Jeremy Fitzhardinge reported this build failure: LD .tmp_vmlinux1 arch/x86/kernel/built-in.o: In function `ds_take_timestamp': git/linux/arch/x86/kernel/ds.c:1380: undefined reference to `trace_clock_global' git/linux/arch/x86/kernel/ds.c:1380: undefined reference to `trace_clock_global' Which is due to !CONFIG_TRACING && CONFIG_X86_DS=y. Expose the trace clock code to CONFIG_X86_DS as well. [ Unfortunately librarizing doesnt work well - ancient architectures with no raw_local_irq_save() primitive break the build. ] Reported-by: Jeremy Fitzhardinge LKML-Reference: <49E4413F.7070700@goop.org> Signed-off-by: Ingo Molnar diff --git a/kernel/Makefile b/kernel/Makefile index bab1dff..c8e1be5 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -92,6 +92,7 @@ obj-$(CONFIG_LATENCYTOP) += latencytop.o obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o obj-$(CONFIG_FUNCTION_TRACER) += trace/ obj-$(CONFIG_TRACING) += trace/ +obj-$(CONFIG_X86_DS) += trace/ obj-$(CONFIG_SMP) += sched_cpupri.o obj-$(CONFIG_SLOW_WORK) += slow-work.o diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 2630f51..ecc671e 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -15,11 +15,16 @@ ifdef CONFIG_TRACING_BRANCHES KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING endif +# +# Make the trace clocks available generally: it's infrastructure +# relied on by ptrace for example: +# +obj-y += trace_clock.o + obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o obj-$(CONFIG_RING_BUFFER) += ring_buffer.o obj-$(CONFIG_TRACING) += trace.o -obj-$(CONFIG_TRACING) += trace_clock.o obj-$(CONFIG_TRACING) += trace_output.o obj-$(CONFIG_TRACING) += trace_stat.o obj-$(CONFIG_TRACING) += trace_printk.o -- cgit v0.10.2 From 72c6a9870f901045f2464c3dc6ee8914bfdc07aa Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 14 Apr 2009 17:33:57 +0200 Subject: rculist.h: introduce list_entry_rcu() and list_first_entry_rcu() I've run into the situation where I need to use list_first_entry with rcu-guarded list. This patch introduces this. Also simplify list_for_each_entry_rcu() to use new list_entry_rcu() instead of list_entry(). Signed-off-by: Jiri Pirko Reviewed-by: Paul E. McKenney Cc: dipankar@in.ibm.com LKML-Reference: <20090414153356.GC3999@psychotron.englab.brq.redhat.com> Signed-off-by: Ingo Molnar diff --git a/include/linux/rculist.h b/include/linux/rculist.h index e649bd3..5710f43 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -198,6 +198,32 @@ static inline void list_splice_init_rcu(struct list_head *list, at->prev = last; } +/** + * list_entry_rcu - get the struct for this entry + * @ptr: the &struct list_head pointer. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_struct within the struct. + * + * This primitive may safely run concurrently with the _rcu list-mutation + * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). + */ +#define list_entry_rcu(ptr, type, member) \ + container_of(rcu_dereference(ptr), type, member) + +/** + * list_first_entry_rcu - get the first element from a list + * @ptr: the list head to take the element from. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_struct within the struct. + * + * Note, that list is expected to be not empty. + * + * This primitive may safely run concurrently with the _rcu list-mutation + * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). + */ +#define list_first_entry_rcu(ptr, type, member) \ + list_entry_rcu((ptr)->next, type, member) + #define __list_for_each_rcu(pos, head) \ for (pos = rcu_dereference((head)->next); \ pos != (head); \ @@ -214,9 +240,9 @@ static inline void list_splice_init_rcu(struct list_head *list, * as long as the traversal is guarded by rcu_read_lock(). */ #define list_for_each_entry_rcu(pos, head, member) \ - for (pos = list_entry(rcu_dereference((head)->next), typeof(*pos), member); \ + for (pos = list_entry_rcu((head)->next, typeof(*pos), member); \ prefetch(pos->member.next), &pos->member != (head); \ - pos = list_entry(rcu_dereference(pos->member.next), typeof(*pos), member)) + pos = list_entry_rcu(pos->member.next, typeof(*pos), member)) /** -- cgit v0.10.2 From a8d154b009168337494fbf345671bab74d3e4b8b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 10 Apr 2009 09:36:00 -0400 Subject: tracing: create automated trace defines This patch lowers the number of places a developer must modify to add new tracepoints. The current method to add a new tracepoint into an existing system is to write the trace point macro in the trace header with one of the macros TRACE_EVENT, TRACE_FORMAT or DECLARE_TRACE, then they must add the same named item into the C file with the macro DEFINE_TRACE(name) and then add the trace point. This change cuts out the needing to add the DEFINE_TRACE(name). Every file that uses the tracepoint must still include the trace/.h file, but the one C file must also add a define before the including of that file. #define CREATE_TRACE_POINTS #include This will cause the trace/mytrace.h file to also produce the C code necessary to implement the trace point. Note, if more than one trace/.h is used to create the C code it is best to list them all together. #define CREATE_TRACE_POINTS #include #include #include Thanks to Mathieu Desnoyers and Christoph Hellwig for coming up with the cleaner solution of the define above the includes over my first design to have the C code include a "special" header. This patch converts sched, irq and lockdep and skb to use this new method. Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Neil Horman Cc: Zhao Lei Cc: Eduard - Gabriel Munteanu Cc: Pekka Enberg Signed-off-by: Steven Rostedt diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h new file mode 100644 index 0000000..de9dc7d --- /dev/null +++ b/include/trace/define_trace.h @@ -0,0 +1,75 @@ +/* + * Trace files that want to automate creationg of all tracepoints defined + * in their file should include this file. The following are macros that the + * trace file may define: + * + * TRACE_SYSTEM defines the system the tracepoint is for + * + * TRACE_INCLUDE_FILE if the file name is something other than TRACE_SYSTEM.h + * This macro may be defined to tell define_trace.h what file to include. + * Note, leave off the ".h". + * + * TRACE_INCLUDE_PATH if the path is something other than core kernel include/trace + * then this macro can define the path to use. Note, the path is relative to + * define_trace.h, not the file including it. Full path names for out of tree + * modules must be used. + */ + +#ifdef CREATE_TRACE_POINTS + +/* Prevent recursion */ +#undef CREATE_TRACE_POINTS + +#include + +#undef TRACE_EVENT +#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ + DEFINE_TRACE(name) + +#undef TRACE_FORMAT +#define TRACE_FORMAT(name, proto, args, print) \ + DEFINE_TRACE(name) + +#undef DECLARE_TRACE +#define DECLARE_TRACE(name, proto, args) \ + DEFINE_TRACE(name) + +#undef TRACE_INCLUDE +#undef __TRACE_INCLUDE + +#ifndef TRACE_INCLUDE_FILE +# define TRACE_INCLUDE_FILE TRACE_SYSTEM +# define UNDEF_TRACE_INCLUDE_FILE +#endif + +#ifndef TRACE_INCLUDE_PATH +# define __TRACE_INCLUDE(system) +# define UNDEF_TRACE_INCLUDE_FILE +#else +# define __TRACE_INCLUDE(system) __stringify(TRACE_INCLUDE_PATH/system.h) +#endif + +# define TRACE_INCLUDE(system) __TRACE_INCLUDE(system) + +/* Let the trace headers be reread */ +#define TRACE_HEADER_MULTI_READ + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +#undef TRACE_HEADER_MULTI_READ + +/* Only undef what we defined in this file */ +#ifdef UNDEF_TRACE_INCLUDE_FILE +# undef TRACE_INCLUDE_PATH +# undef UNDEF_TRACE_INCLUDE_FILE +#endif + +#ifdef UNDEF_TRACE_INCLUDE_FILE +# undef TRACE_INCLUDE_PATH +# undef UNDEF_TRACE_INCLUDE_FILE +#endif + +/* We may be processing more files */ +#define CREATE_TRACE_POINTS + +#endif /* CREATE_TRACE_POINTS */ diff --git a/include/trace/irq.h b/include/trace/irq.h index 04ab4c6..75e3468 100644 --- a/include/trace/irq.h +++ b/include/trace/irq.h @@ -51,4 +51,7 @@ TRACE_FORMAT(softirq_exit, TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) ); -#endif +#endif /* _TRACE_IRQ_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/kmem.h b/include/trace/kmem.h index d7d1218..c22c42f 100644 --- a/include/trace/kmem.h +++ b/include/trace/kmem.h @@ -188,5 +188,7 @@ TRACE_EVENT(kmem_cache_free, TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) ); +#endif /* _TRACE_KMEM_H */ -#endif +/* This part must be outside protection */ +#include diff --git a/include/trace/lockdep.h b/include/trace/lockdep.h index 8ee7900..4d301e7 100644 --- a/include/trace/lockdep.h +++ b/include/trace/lockdep.h @@ -55,3 +55,6 @@ TRACE_EVENT(lock_acquired, #endif #endif /* _TRACE_LOCKDEP_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/sched.h b/include/trace/sched.h index 5b1cf4a..ffa1cab 100644 --- a/include/trace/sched.h +++ b/include/trace/sched.h @@ -334,3 +334,6 @@ TRACE_EVENT(sched_signal_send, ); #endif /* _TRACE_SCHED_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/skb.h b/include/trace/skb.h index e6fd281..1e8fabb 100644 --- a/include/trace/skb.h +++ b/include/trace/skb.h @@ -35,3 +35,6 @@ TRACE_EVENT(kfree_skb, ); #endif /* _TRACE_SKB_H */ + +/* This part must be outside protection */ +#include diff --git a/kernel/exit.c b/kernel/exit.c index abf9cf3..2fe9d2c 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -56,10 +56,6 @@ #include #include "cred-internals.h" -DEFINE_TRACE(sched_process_free); -DEFINE_TRACE(sched_process_exit); -DEFINE_TRACE(sched_process_wait); - static void exit_mm(struct task_struct * tsk); static void __unhash_process(struct task_struct *p) diff --git a/kernel/fork.c b/kernel/fork.c index b9e2edd..4bebf26 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -83,8 +83,6 @@ DEFINE_PER_CPU(unsigned long, process_counts) = 0; __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ -DEFINE_TRACE(sched_process_fork); - int nr_processes(void) { int cpu; diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index d82142b..983d8be 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -17,9 +17,11 @@ #include #include #include -#include #include +#define CREATE_TRACE_POINTS +#include + #include "internals.h" /* @@ -348,9 +350,6 @@ static void warn_no_thread(unsigned int irq, struct irqaction *action) "but no thread function available.", irq, action->name); } -DEFINE_TRACE(irq_handler_entry); -DEFINE_TRACE(irq_handler_exit); - /** * handle_IRQ_event - irq action chain handler * @irq: the interrupt number diff --git a/kernel/kthread.c b/kernel/kthread.c index 4ebaf85..e1c7692 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -21,9 +21,6 @@ static DEFINE_SPINLOCK(kthread_create_lock); static LIST_HEAD(kthread_create_list); struct task_struct *kthreadd_task; -DEFINE_TRACE(sched_kthread_stop); -DEFINE_TRACE(sched_kthread_stop_ret); - struct kthread_create_info { /* Information passed to kthread() from kthreadd. */ diff --git a/kernel/lockdep.c b/kernel/lockdep.c index c4582a6..257f21a 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -42,12 +42,14 @@ #include #include #include -#include #include #include "lockdep_internals.h" +#define CREATE_TRACE_POINTS +#include + #ifdef CONFIG_PROVE_LOCKING int prove_locking = 1; module_param(prove_locking, int, 0644); @@ -2929,8 +2931,6 @@ void lock_set_class(struct lockdep_map *lock, const char *name, } EXPORT_SYMBOL_GPL(lock_set_class); -DEFINE_TRACE(lock_acquire); - /* * We are not always called with irqs disabled - do that here, * and also avoid lockdep recursion: @@ -2957,8 +2957,6 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass, } EXPORT_SYMBOL_GPL(lock_acquire); -DEFINE_TRACE(lock_release); - void lock_release(struct lockdep_map *lock, int nested, unsigned long ip) { @@ -3061,8 +3059,6 @@ found_it: put_lock_stats(stats); } -DEFINE_TRACE(lock_acquired); - static void __lock_acquired(struct lockdep_map *lock, unsigned long ip) { @@ -3118,8 +3114,6 @@ found_it: lock->ip = ip; } -DEFINE_TRACE(lock_contended); - void lock_contended(struct lockdep_map *lock, unsigned long ip) { unsigned long flags; diff --git a/kernel/sched.c b/kernel/sched.c index 5724508..e6d4518 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -72,13 +72,15 @@ #include #include #include -#include #include #include #include "sched_cpupri.h" +#define CREATE_TRACE_POINTS +#include + /* * Convert user-nice values [ -20 ... 0 ... 19 ] * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], @@ -118,12 +120,6 @@ */ #define RUNTIME_INF ((u64)~0ULL) -DEFINE_TRACE(sched_wait_task); -DEFINE_TRACE(sched_wakeup); -DEFINE_TRACE(sched_wakeup_new); -DEFINE_TRACE(sched_switch); -DEFINE_TRACE(sched_migrate_task); - #ifdef CONFIG_SMP static void double_rq_lock(struct rq *rq1, struct rq *rq2); diff --git a/kernel/signal.c b/kernel/signal.c index d803473..1d5703f 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -41,8 +41,6 @@ static struct kmem_cache *sigqueue_cachep; -DEFINE_TRACE(sched_signal_send); - static void __user *sig_handler(struct task_struct *t, int sig) { return t->sighand->action[sig - 1].sa.sa_handler; diff --git a/kernel/softirq.c b/kernel/softirq.c index 2fecefa..a2d9b45 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -186,9 +186,6 @@ EXPORT_SYMBOL(local_bh_enable_ip); */ #define MAX_SOFTIRQ_RESTART 10 -DEFINE_TRACE(softirq_entry); -DEFINE_TRACE(softirq_exit); - asmlinkage void __do_softirq(void) { struct softirq_action *h; diff --git a/mm/util.c b/mm/util.c index 2599e83..0e74a22 100644 --- a/mm/util.c +++ b/mm/util.c @@ -4,9 +4,11 @@ #include #include #include -#include #include +#define CREATE_TRACE_POINTS +#include + /** * kstrdup - allocate space for and copy an existing string * @s: the string to duplicate @@ -239,13 +241,6 @@ int __attribute__((weak)) get_user_pages_fast(unsigned long start, EXPORT_SYMBOL_GPL(get_user_pages_fast); /* Tracepoints definitions. */ -DEFINE_TRACE(kmalloc); -DEFINE_TRACE(kmem_cache_alloc); -DEFINE_TRACE(kmalloc_node); -DEFINE_TRACE(kmem_cache_alloc_node); -DEFINE_TRACE(kfree); -DEFINE_TRACE(kmem_cache_free); - EXPORT_TRACEPOINT_SYMBOL(kmalloc); EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc); EXPORT_TRACEPOINT_SYMBOL(kmalloc_node); diff --git a/net/core/net-traces.c b/net/core/net-traces.c index c8fb456..8017720 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c @@ -19,11 +19,11 @@ #include #include #include -#include #include #include +#define CREATE_TRACE_POINTS +#include -DEFINE_TRACE(kfree_skb); EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); -- cgit v0.10.2 From 9504504cbab29ecb694186b1c5b15d3579c43c51 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 11 Apr 2009 12:59:57 -0400 Subject: tracing: make trace_seq operations available for core kernel In the process to make TRACE_EVENT macro work for modules, the trace_seq operations must be available for core kernel code. These operations are quite useful and can be used for other implementations. The main idea is that we create a trace_seq handle that acts very much like the seq_file handle. struct trace_seq *s = kmalloc(sizeof(*s, GFP_KERNEL); trace_seq_init(s); trace_seq_printf(s, "some data %d\n", variable); printk("%s", s->buffer); The main use is to allow a top level function call several other functions that may store printf like data into the buffer. Then at the end, the top level function can process all the data with any method it would like to. It could be passed to userspace, output via printk or even use seq_file: trace_seq_to_user(s, ubuf, cnt); seq_puts(m, s->buffer); Signed-off-by: Steven Rostedt diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h new file mode 100644 index 0000000..28051da --- /dev/null +++ b/include/linux/trace_seq.h @@ -0,0 +1,89 @@ +#ifndef _LINUX_TRACE_SEQ_H +#define _LINUX_TRACE_SEQ_H + +/* + * Trace sequences are used to allow a function to call several other functions + * to create a string of data to use (up to a max of PAGE_SIZE. + */ + +struct trace_seq { + unsigned char buffer[PAGE_SIZE]; + unsigned int len; + unsigned int readpos; +}; + +static inline void +trace_seq_init(struct trace_seq *s) +{ + s->len = 0; + s->readpos = 0; +} + +/* + * Currently only defined when tracing is enabled. + */ +#ifdef CONFIG_TRACING +extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); +extern int +trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary); +extern void trace_print_seq(struct seq_file *m, struct trace_seq *s); +extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, + size_t cnt); +extern int trace_seq_puts(struct trace_seq *s, const char *str); +extern int trace_seq_putc(struct trace_seq *s, unsigned char c); +extern int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len); +extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, + size_t len); +extern void *trace_seq_reserve(struct trace_seq *s, size_t len); +extern int trace_seq_path(struct trace_seq *s, struct path *path); + +#else /* CONFIG_TRACING */ +static inline int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))) +{ + return 0; +} +static inline int +trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) +{ + return 0; +} + +static inline void trace_print_seq(struct seq_file *m, struct trace_seq *s) +{ +} +static inline ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, + size_t cnt) +{ + return 0; +} +static inline int trace_seq_puts(struct trace_seq *s, const char *str) +{ + return 0; +} +static inline int trace_seq_putc(struct trace_seq *s, unsigned char c); +{ + return 0; +} +static inline int +trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len) +{ + return 0; +} +static inline int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, + size_t len) +{ + return 0; +} +static inline void *trace_seq_reserve(struct trace_seq *s, size_t len) +{ + return NULL; +} +static inline int trace_seq_path(struct trace_seq *s, struct path *path) +{ + return 0; +} +#endif /* CONFIG_TRACING */ + +#endif /* _LINUX_TRACE_SEQ_H */ diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b05b6ac..1882846 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -12,6 +12,8 @@ #include #include +#include + enum trace_type { __TRACE_FIRST_TYPE = 0, @@ -423,19 +425,6 @@ struct tracer { struct tracer_stat *stats; }; -struct trace_seq { - unsigned char buffer[PAGE_SIZE]; - unsigned int len; - unsigned int readpos; -}; - -static inline void -trace_seq_init(struct trace_seq *s) -{ - s->len = 0; - s->readpos = 0; -} - #define TRACE_PIPE_ALL_CPU -1 diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h index 9163021..5c7cbfb 100644 --- a/kernel/trace/trace_output.h +++ b/kernel/trace/trace_output.h @@ -1,6 +1,7 @@ #ifndef __TRACE_EVENTS_H #define __TRACE_EVENTS_H +#include #include "trace.h" typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter, @@ -20,24 +21,9 @@ trace_print_bprintk_msg_only(struct trace_iterator *iter); extern enum print_line_t trace_print_printk_msg_only(struct trace_iterator *iter); -extern void trace_print_seq(struct seq_file *m, struct trace_seq *s); - -extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -extern int -trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary); extern int seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags); -extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, - size_t cnt); -extern int trace_seq_puts(struct trace_seq *s, const char *str); -extern int trace_seq_putc(struct trace_seq *s, unsigned char c); -extern int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len); -extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, - size_t len); -extern void *trace_seq_reserve(struct trace_seq *s, size_t len); -extern int trace_seq_path(struct trace_seq *s, struct path *path); extern int seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s, unsigned long sym_flags); extern int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, -- cgit v0.10.2 From 97f2025153499faa17267a0d4e18c7afaf73f39d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 13 Apr 2009 11:20:49 -0400 Subject: tracing/events: move declarations from trace directory to core include In preparation to allowing trace events to happen in modules, we need to move some of the local declarations in the kernel/trace directory into include/linux. This patch simply moves the declarations and performs no context changes. Signed-off-by: Steven Rostedt diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h new file mode 100644 index 0000000..496b76d --- /dev/null +++ b/include/linux/ftrace_event.h @@ -0,0 +1,146 @@ +#ifndef _LINUX_FTRACE_EVENT_H +#define _LINUX_FTRACE_EVENT_H + +#include +#include + + +struct trace_array; +struct tracer; + +/* + * The trace entry - the most basic unit of tracing. This is what + * is printed in the end as a single line in the trace output, such as: + * + * bash-15816 [01] 235.197585: idle_cpu <- irq_enter + */ +struct trace_entry { + unsigned char type; + unsigned char flags; + unsigned char preempt_count; + int pid; + int tgid; +}; + +/* + * Trace iterator - used by printout routines who present trace + * results to users and which routines might sleep, etc: + */ +struct trace_iterator { + struct trace_array *tr; + struct tracer *trace; + void *private; + int cpu_file; + struct mutex mutex; + struct ring_buffer_iter *buffer_iter[NR_CPUS]; + + /* The below is zeroed out in pipe_read */ + struct trace_seq seq; + struct trace_entry *ent; + int cpu; + u64 ts; + + unsigned long iter_flags; + loff_t pos; + long idx; + + cpumask_var_t started; +}; + + +typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter, + int flags); +struct trace_event { + struct hlist_node node; + int type; + trace_print_func trace; + trace_print_func raw; + trace_print_func hex; + trace_print_func binary; +}; + +extern int register_ftrace_event(struct trace_event *event); +extern int unregister_ftrace_event(struct trace_event *event); + +/* Return values for print_line callback */ +enum print_line_t { + TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */ + TRACE_TYPE_HANDLED = 1, + TRACE_TYPE_UNHANDLED = 2, /* Relay to other output functions */ + TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */ +}; + + +struct ring_buffer_event * +trace_current_buffer_lock_reserve(unsigned char type, unsigned long len, + unsigned long flags, int pc); +void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, + unsigned long flags, int pc); +void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event, + unsigned long flags, int pc); +void trace_current_buffer_discard_commit(struct ring_buffer_event *event); + +void tracing_record_cmdline(struct task_struct *tsk); + +struct ftrace_event_call { + char *name; + char *system; + struct dentry *dir; + int enabled; + int (*regfunc)(void); + void (*unregfunc)(void); + int id; + int (*raw_init)(void); + int (*show_format)(struct trace_seq *s); + int (*define_fields)(void); + struct list_head fields; + int n_preds; + struct filter_pred **preds; + +#ifdef CONFIG_EVENT_PROFILE + atomic_t profile_count; + int (*profile_enable)(struct ftrace_event_call *); + void (*profile_disable)(struct ftrace_event_call *); +#endif +}; + +#define MAX_FILTER_PRED 8 +#define MAX_FILTER_STR_VAL 128 + +extern int init_preds(struct ftrace_event_call *call); +extern int filter_match_preds(struct ftrace_event_call *call, void *rec); +extern int filter_current_check_discard(struct ftrace_event_call *call, + void *rec, + struct ring_buffer_event *event); + +extern int trace_define_field(struct ftrace_event_call *call, char *type, + char *name, int offset, int size); + + +/* + * The double __builtin_constant_p is because gcc will give us an error + * if we try to allocate the static variable to fmt if it is not a + * constant. Even with the outer if statement optimizing out. + */ +#define event_trace_printk(ip, fmt, args...) \ +do { \ + __trace_printk_check_format(fmt, ##args); \ + tracing_record_cmdline(current); \ + if (__builtin_constant_p(fmt)) { \ + static const char *trace_printk_fmt \ + __attribute__((section("__trace_printk_fmt"))) = \ + __builtin_constant_p(fmt) ? fmt : NULL; \ + \ + __trace_bprintk(ip, trace_printk_fmt, ##args); \ + } else \ + __trace_printk(ip, fmt, ##args); \ +} while (0) + +#define __common_field(type, item) \ + ret = trace_define_field(event_call, #type, "common_" #item, \ + offsetof(typeof(field.ent), item), \ + sizeof(field.ent.item)); \ + if (ret) \ + return ret; + +#endif /* _LINUX_FTRACE_EVENT_H */ diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 1882846..6bcdf4a 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -13,6 +13,7 @@ #include #include +#include enum trace_type { __TRACE_FIRST_TYPE = 0, @@ -44,20 +45,6 @@ enum trace_type { }; /* - * The trace entry - the most basic unit of tracing. This is what - * is printed in the end as a single line in the trace output, such as: - * - * bash-15816 [01] 235.197585: idle_cpu <- irq_enter - */ -struct trace_entry { - unsigned char type; - unsigned char flags; - unsigned char preempt_count; - int pid; - int tgid; -}; - -/* * Function trace entry - function address and parent function addres: */ struct ftrace_entry { @@ -265,8 +252,6 @@ struct trace_array_cpu { char comm[TASK_COMM_LEN]; }; -struct trace_iterator; - /* * The trace array - an array of per-CPU trace arrays. This is the * highest level data structure that individual tracers deal with. @@ -341,15 +326,6 @@ extern void __ftrace_bad_type(void); __ftrace_bad_type(); \ } while (0) -/* Return values for print_line callback */ -enum print_line_t { - TRACE_TYPE_PARTIAL_LINE = 0, /* Retry after flushing the seq */ - TRACE_TYPE_HANDLED = 1, - TRACE_TYPE_UNHANDLED = 2, /* Relay to other output functions */ - TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */ -}; - - /* * An option specific to a tracer. This is a boolean value. * The bit is the bit index that sets its value on the @@ -428,31 +404,6 @@ struct tracer { #define TRACE_PIPE_ALL_CPU -1 -/* - * Trace iterator - used by printout routines who present trace - * results to users and which routines might sleep, etc: - */ -struct trace_iterator { - struct trace_array *tr; - struct tracer *trace; - void *private; - int cpu_file; - struct mutex mutex; - struct ring_buffer_iter *buffer_iter[NR_CPUS]; - - /* The below is zeroed out in pipe_read */ - struct trace_seq seq; - struct trace_entry *ent; - int cpu; - u64 ts; - - unsigned long iter_flags; - loff_t pos; - long idx; - - cpumask_var_t started; -}; - int tracer_init(struct tracer *t, struct trace_array *tr); int tracing_is_enabled(void); void trace_wake_up(void); @@ -479,15 +430,6 @@ void trace_buffer_unlock_commit(struct trace_array *tr, struct ring_buffer_event *event, unsigned long flags, int pc); -struct ring_buffer_event * -trace_current_buffer_lock_reserve(unsigned char type, unsigned long len, - unsigned long flags, int pc); -void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, - unsigned long flags, int pc); -void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event, - unsigned long flags, int pc); -void trace_current_buffer_discard_commit(struct ring_buffer_event *event); - struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data); @@ -510,7 +452,6 @@ void tracing_sched_switch_trace(struct trace_array *tr, struct task_struct *prev, struct task_struct *next, unsigned long flags, int pc); -void tracing_record_cmdline(struct task_struct *tsk); void tracing_sched_wakeup_trace(struct trace_array *tr, struct task_struct *wakee, @@ -790,28 +731,6 @@ struct ftrace_event_field { int size; }; -struct ftrace_event_call { - char *name; - char *system; - struct dentry *dir; - int enabled; - int (*regfunc)(void); - void (*unregfunc)(void); - int id; - int (*raw_init)(void); - int (*show_format)(struct trace_seq *s); - int (*define_fields)(void); - struct list_head fields; - int n_preds; - struct filter_pred **preds; - -#ifdef CONFIG_EVENT_PROFILE - atomic_t profile_count; - int (*profile_enable)(struct ftrace_event_call *); - void (*profile_disable)(struct ftrace_event_call *); -#endif -}; - struct event_subsystem { struct list_head list; const char *name; @@ -825,9 +744,6 @@ struct event_subsystem { (unsigned long)event < (unsigned long)__stop_ftrace_events; \ event++) -#define MAX_FILTER_PRED 8 -#define MAX_FILTER_STR_VAL 128 - struct filter_pred; typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event); @@ -845,9 +761,6 @@ struct filter_pred { int clear; }; -int trace_define_field(struct ftrace_event_call *call, char *type, - char *name, int offset, int size); -extern int init_preds(struct ftrace_event_call *call); extern void filter_free_pred(struct filter_pred *pred); extern void filter_print_preds(struct filter_pred **preds, int n_preds, struct trace_seq *s); @@ -855,13 +768,9 @@ extern int filter_parse(char **pbuf, struct filter_pred *pred); extern int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred); extern void filter_disable_preds(struct ftrace_event_call *call); -extern int filter_match_preds(struct ftrace_event_call *call, void *rec); extern void filter_free_subsystem_preds(struct event_subsystem *system); extern int filter_add_subsystem_pred(struct event_subsystem *system, struct filter_pred *pred); -extern int filter_current_check_discard(struct ftrace_event_call *call, - void *rec, - struct ring_buffer_event *event); static inline int filter_check_discard(struct ftrace_event_call *call, void *rec, @@ -876,14 +785,6 @@ filter_check_discard(struct ftrace_event_call *call, void *rec, return 0; } -#define __common_field(type, item) \ - ret = trace_define_field(event_call, #type, "common_" #item, \ - offsetof(typeof(field.ent), item), \ - sizeof(field.ent.item)); \ - if (ret) \ - return ret; - -void event_trace_printk(unsigned long ip, const char *fmt, ...); extern struct ftrace_event_call __start_ftrace_events[]; extern struct ftrace_event_call __stop_ftrace_events[]; @@ -895,25 +796,6 @@ extern struct ftrace_event_call __stop_ftrace_events[]; extern const char *__start___trace_bprintk_fmt[]; extern const char *__stop___trace_bprintk_fmt[]; -/* - * The double __builtin_constant_p is because gcc will give us an error - * if we try to allocate the static variable to fmt if it is not a - * constant. Even with the outer if statement optimizing out. - */ -#define event_trace_printk(ip, fmt, args...) \ -do { \ - __trace_printk_check_format(fmt, ##args); \ - tracing_record_cmdline(current); \ - if (__builtin_constant_p(fmt)) { \ - static const char *trace_printk_fmt \ - __attribute__((section("__trace_printk_fmt"))) = \ - __builtin_constant_p(fmt) ? fmt : NULL; \ - \ - __trace_bprintk(ip, trace_printk_fmt, ##args); \ - } else \ - __trace_printk(ip, fmt, ##args); \ -} while (0) - #undef TRACE_EVENT_FORMAT #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ extern struct ftrace_event_call event_##call; diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h index 5c7cbfb..6e220a8 100644 --- a/kernel/trace/trace_output.h +++ b/kernel/trace/trace_output.h @@ -4,18 +4,6 @@ #include #include "trace.h" -typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter, - int flags); - -struct trace_event { - struct hlist_node node; - int type; - trace_print_func trace; - trace_print_func raw; - trace_print_func hex; - trace_print_func binary; -}; - extern enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter); extern enum print_line_t @@ -33,8 +21,6 @@ extern int trace_print_context(struct trace_iterator *iter); extern int trace_print_lat_context(struct trace_iterator *iter); extern struct trace_event *ftrace_find_event(int type); -extern int register_ftrace_event(struct trace_event *event); -extern int unregister_ftrace_event(struct trace_event *event); extern enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags); -- cgit v0.10.2 From f42c85e74faa422cf0bc747ed808681145448f88 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 13 Apr 2009 12:25:37 -0400 Subject: tracing/events: move the ftrace event tracing code to core This patch moves the ftrace creation into include/trace/ftrace.h and simplifies the work of developers in adding new tracepoints. Just the act of creating the trace points in include/trace and including define_trace.h will create the events in the debugfs/tracing/events directory. This patch removes the need of include/trace/trace_events.h Signed-off-by: Steven Rostedt diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index de9dc7d..980eb66 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -56,6 +56,10 @@ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) +#ifdef CONFIG_EVENT_TRACER +#include +#endif + #undef TRACE_HEADER_MULTI_READ /* Only undef what we defined in this file */ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h new file mode 100644 index 0000000..955b967 --- /dev/null +++ b/include/trace/ftrace.h @@ -0,0 +1,492 @@ +/* + * Stage 1 of the trace events. + * + * Override the macros in to include the following: + * + * struct ftrace_raw_ { + * struct trace_entry ent; + * ; + * []; + * [...] + * }; + * + * The is created by the __field(type, item) macro or + * the __array(type2, item2, len) macro. + * We simply do "type item;", and that will create the fields + * in the structure. + */ + +#include + +#undef TRACE_FORMAT +#define TRACE_FORMAT(call, proto, args, fmt) + +#undef __array +#define __array(type, item, len) type item[len]; + +#undef __field +#define __field(type, item) type item; + +#undef TP_STRUCT__entry +#define TP_STRUCT__entry(args...) args + +#undef TRACE_EVENT +#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ + struct ftrace_raw_##name { \ + struct trace_entry ent; \ + tstruct \ + }; \ + static struct ftrace_event_call event_##name + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +/* + * Stage 2 of the trace events. + * + * Override the macros in to include the following: + * + * enum print_line_t + * ftrace_raw_output_(struct trace_iterator *iter, int flags) + * { + * struct trace_seq *s = &iter->seq; + * struct ftrace_raw_ *field; <-- defined in stage 1 + * struct trace_entry *entry; + * int ret; + * + * entry = iter->ent; + * + * if (entry->type != event_.id) { + * WARN_ON_ONCE(1); + * return TRACE_TYPE_UNHANDLED; + * } + * + * field = (typeof(field))entry; + * + * ret = trace_seq_printf(s, "\n"); + * if (!ret) + * return TRACE_TYPE_PARTIAL_LINE; + * + * return TRACE_TYPE_HANDLED; + * } + * + * This is the method used to print the raw event to the trace + * output format. Note, this is not needed if the data is read + * in binary. + */ + +#undef __entry +#define __entry field + +#undef TP_printk +#define TP_printk(fmt, args...) fmt "\n", args + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +enum print_line_t \ +ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ +{ \ + struct trace_seq *s = &iter->seq; \ + struct ftrace_raw_##call *field; \ + struct trace_entry *entry; \ + int ret; \ + \ + entry = iter->ent; \ + \ + if (entry->type != event_##call.id) { \ + WARN_ON_ONCE(1); \ + return TRACE_TYPE_UNHANDLED; \ + } \ + \ + field = (typeof(field))entry; \ + \ + ret = trace_seq_printf(s, #call ": " print); \ + if (!ret) \ + return TRACE_TYPE_PARTIAL_LINE; \ + \ + return TRACE_TYPE_HANDLED; \ +} + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +/* + * Setup the showing format of trace point. + * + * int + * ftrace_format_##call(struct trace_seq *s) + * { + * struct ftrace_raw_##call field; + * int ret; + * + * ret = trace_seq_printf(s, #type " " #item ";" + * " offset:%u; size:%u;\n", + * offsetof(struct ftrace_raw_##call, item), + * sizeof(field.type)); + * + * } + */ + +#undef TP_STRUCT__entry +#define TP_STRUCT__entry(args...) args + +#undef __field +#define __field(type, item) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ + "offset:%u;\tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), item), \ + (unsigned int)sizeof(field.item)); \ + if (!ret) \ + return 0; + +#undef __array +#define __array(type, item, len) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ + "offset:%u;\tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), item), \ + (unsigned int)sizeof(field.item)); \ + if (!ret) \ + return 0; + +#undef __entry +#define __entry REC + +#undef TP_printk +#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args) + +#undef TP_fast_assign +#define TP_fast_assign(args...) args + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ +static int \ +ftrace_format_##call(struct trace_seq *s) \ +{ \ + struct ftrace_raw_##call field; \ + int ret; \ + \ + tstruct; \ + \ + trace_seq_printf(s, "\nprint fmt: " print); \ + \ + return ret; \ +} + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +#undef __field +#define __field(type, item) \ + ret = trace_define_field(event_call, #type, #item, \ + offsetof(typeof(field), item), \ + sizeof(field.item)); \ + if (ret) \ + return ret; + +#undef __array +#define __array(type, item, len) \ + BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ + ret = trace_define_field(event_call, #type "[" #len "]", #item, \ + offsetof(typeof(field), item), \ + sizeof(field.item)); \ + if (ret) \ + return ret; + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ +int \ +ftrace_define_fields_##call(void) \ +{ \ + struct ftrace_raw_##call field; \ + struct ftrace_event_call *event_call = &event_##call; \ + int ret; \ + \ + __common_field(unsigned char, type); \ + __common_field(unsigned char, flags); \ + __common_field(unsigned char, preempt_count); \ + __common_field(int, pid); \ + __common_field(int, tgid); \ + \ + tstruct; \ + \ + return ret; \ +} + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +/* + * Stage 3 of the trace events. + * + * Override the macros in to include the following: + * + * static void ftrace_event_(proto) + * { + * event_trace_printk(_RET_IP_, ": " ); + * } + * + * static int ftrace_reg_event_(void) + * { + * int ret; + * + * ret = register_trace_(ftrace_event_); + * if (!ret) + * pr_info("event trace: Could not activate trace point " + * "probe to "); + * return ret; + * } + * + * static void ftrace_unreg_event_(void) + * { + * unregister_trace_(ftrace_event_); + * } + * + * For those macros defined with TRACE_FORMAT: + * + * static struct ftrace_event_call __used + * __attribute__((__aligned__(4))) + * __attribute__((section("_ftrace_events"))) event_ = { + * .name = "", + * .regfunc = ftrace_reg_event_, + * .unregfunc = ftrace_unreg_event_, + * } + * + * + * For those macros defined with TRACE_EVENT: + * + * static struct ftrace_event_call event_; + * + * static void ftrace_raw_event_(proto) + * { + * struct ring_buffer_event *event; + * struct ftrace_raw_ *entry; <-- defined in stage 1 + * unsigned long irq_flags; + * int pc; + * + * local_save_flags(irq_flags); + * pc = preempt_count(); + * + * event = trace_current_buffer_lock_reserve(event_.id, + * sizeof(struct ftrace_raw_), + * irq_flags, pc); + * if (!event) + * return; + * entry = ring_buffer_event_data(event); + * + * ; <-- Here we assign the entries by the __field and + * __array macros. + * + * trace_current_buffer_unlock_commit(event, irq_flags, pc); + * } + * + * static int ftrace_raw_reg_event_(void) + * { + * int ret; + * + * ret = register_trace_(ftrace_raw_event_); + * if (!ret) + * pr_info("event trace: Could not activate trace point " + * "probe to "); + * return ret; + * } + * + * static void ftrace_unreg_event_(void) + * { + * unregister_trace_(ftrace_raw_event_); + * } + * + * static struct trace_event ftrace_event_type_ = { + * .trace = ftrace_raw_output_, <-- stage 2 + * }; + * + * static int ftrace_raw_init_event_(void) + * { + * int id; + * + * id = register_ftrace_event(&ftrace_event_type_); + * if (!id) + * return -ENODEV; + * event_.id = id; + * return 0; + * } + * + * static struct ftrace_event_call __used + * __attribute__((__aligned__(4))) + * __attribute__((section("_ftrace_events"))) event_ = { + * .name = "", + * .system = "", + * .raw_init = ftrace_raw_init_event_, + * .regfunc = ftrace_reg_event_, + * .unregfunc = ftrace_unreg_event_, + * .show_format = ftrace_format_, + * } + * + */ + +#undef TP_FMT +#define TP_FMT(fmt, args...) fmt "\n", ##args + +#ifdef CONFIG_EVENT_PROFILE +#define _TRACE_PROFILE(call, proto, args) \ +static void ftrace_profile_##call(proto) \ +{ \ + extern void perf_tpcounter_event(int); \ + perf_tpcounter_event(event_##call.id); \ +} \ + \ +static int ftrace_profile_enable_##call(struct ftrace_event_call *call) \ +{ \ + int ret = 0; \ + \ + if (!atomic_inc_return(&call->profile_count)) \ + ret = register_trace_##call(ftrace_profile_##call); \ + \ + return ret; \ +} \ + \ +static void ftrace_profile_disable_##call(struct ftrace_event_call *call) \ +{ \ + if (atomic_add_negative(-1, &call->profile_count)) \ + unregister_trace_##call(ftrace_profile_##call); \ +} + +#define _TRACE_PROFILE_INIT(call) \ + .profile_count = ATOMIC_INIT(-1), \ + .profile_enable = ftrace_profile_enable_##call, \ + .profile_disable = ftrace_profile_disable_##call, + +#else +#define _TRACE_PROFILE(call, proto, args) +#define _TRACE_PROFILE_INIT(call) +#endif + +#define _TRACE_FORMAT(call, proto, args, fmt) \ +static void ftrace_event_##call(proto) \ +{ \ + event_trace_printk(_RET_IP_, #call ": " fmt); \ +} \ + \ +static int ftrace_reg_event_##call(void) \ +{ \ + int ret; \ + \ + ret = register_trace_##call(ftrace_event_##call); \ + if (ret) \ + pr_info("event trace: Could not activate trace point " \ + "probe to " #call "\n"); \ + return ret; \ +} \ + \ +static void ftrace_unreg_event_##call(void) \ +{ \ + unregister_trace_##call(ftrace_event_##call); \ +} \ + \ +static struct ftrace_event_call event_##call; \ + \ +static int ftrace_init_event_##call(void) \ +{ \ + int id; \ + \ + id = register_ftrace_event(NULL); \ + if (!id) \ + return -ENODEV; \ + event_##call.id = id; \ + return 0; \ +} + +#undef TRACE_FORMAT +#define TRACE_FORMAT(call, proto, args, fmt) \ +_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) \ +_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \ +static struct ftrace_event_call __used \ +__attribute__((__aligned__(4))) \ +__attribute__((section("_ftrace_events"))) event_##call = { \ + .name = #call, \ + .system = __stringify(TRACE_SYSTEM), \ + .raw_init = ftrace_init_event_##call, \ + .regfunc = ftrace_reg_event_##call, \ + .unregfunc = ftrace_unreg_event_##call, \ + _TRACE_PROFILE_INIT(call) \ +} + +#undef __entry +#define __entry entry + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \ + \ +static struct ftrace_event_call event_##call; \ + \ +static void ftrace_raw_event_##call(proto) \ +{ \ + struct ftrace_event_call *call = &event_##call; \ + struct ring_buffer_event *event; \ + struct ftrace_raw_##call *entry; \ + unsigned long irq_flags; \ + int pc; \ + \ + local_save_flags(irq_flags); \ + pc = preempt_count(); \ + \ + event = trace_current_buffer_lock_reserve(event_##call.id, \ + sizeof(struct ftrace_raw_##call), \ + irq_flags, pc); \ + if (!event) \ + return; \ + entry = ring_buffer_event_data(event); \ + \ + assign; \ + \ + if (!filter_current_check_discard(call, entry, event)) \ + trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \ +} \ + \ +static int ftrace_raw_reg_event_##call(void) \ +{ \ + int ret; \ + \ + ret = register_trace_##call(ftrace_raw_event_##call); \ + if (ret) \ + pr_info("event trace: Could not activate trace point " \ + "probe to " #call "\n"); \ + return ret; \ +} \ + \ +static void ftrace_raw_unreg_event_##call(void) \ +{ \ + unregister_trace_##call(ftrace_raw_event_##call); \ +} \ + \ +static struct trace_event ftrace_event_type_##call = { \ + .trace = ftrace_raw_output_##call, \ +}; \ + \ +static int ftrace_raw_init_event_##call(void) \ +{ \ + int id; \ + \ + id = register_ftrace_event(&ftrace_event_type_##call); \ + if (!id) \ + return -ENODEV; \ + event_##call.id = id; \ + INIT_LIST_HEAD(&event_##call.fields); \ + init_preds(&event_##call); \ + return 0; \ +} \ + \ +static struct ftrace_event_call __used \ +__attribute__((__aligned__(4))) \ +__attribute__((section("_ftrace_events"))) event_##call = { \ + .name = #call, \ + .system = __stringify(TRACE_SYSTEM), \ + .raw_init = ftrace_raw_init_event_##call, \ + .regfunc = ftrace_raw_reg_event_##call, \ + .unregfunc = ftrace_raw_unreg_event_##call, \ + .show_format = ftrace_format_##call, \ + .define_fields = ftrace_define_fields_##call, \ + _TRACE_PROFILE_INIT(call) \ +} + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +#undef _TRACE_PROFILE +#undef _TRACE_PROFILE_INIT + diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h deleted file mode 100644 index 13d6b85..0000000 --- a/include/trace/trace_events.h +++ /dev/null @@ -1,7 +0,0 @@ -/* trace/.h here */ - -#include -#include -#include -#include -#include diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 3ad367e..fb9d7f9 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -41,7 +41,6 @@ obj-$(CONFIG_KMEMTRACE) += kmemtrace.o obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o obj-$(CONFIG_EVENT_TRACING) += trace_events.o -obj-$(CONFIG_EVENT_TRACER) += events.o obj-$(CONFIG_EVENT_TRACING) += trace_export.o obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o diff --git a/kernel/trace/events.c b/kernel/trace/events.c deleted file mode 100644 index 5a35a91..0000000 --- a/kernel/trace/events.c +++ /dev/null @@ -1,15 +0,0 @@ -/* - * This is the place to register all trace points as events. - */ - -#include - -#include - -#include "trace_output.h" - -#define TRACE_HEADER_MULTI_READ -#include "trace_events_stage_1.h" -#include "trace_events_stage_2.h" -#include "trace_events_stage_3.h" - diff --git a/kernel/trace/trace_events_stage_1.h b/kernel/trace/trace_events_stage_1.h deleted file mode 100644 index 475f46a..0000000 --- a/kernel/trace/trace_events_stage_1.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Stage 1 of the trace events. - * - * Override the macros in to include the following: - * - * struct ftrace_raw_ { - * struct trace_entry ent; - * ; - * []; - * [...] - * }; - * - * The is created by the __field(type, item) macro or - * the __array(type2, item2, len) macro. - * We simply do "type item;", and that will create the fields - * in the structure. - */ - -#undef TRACE_FORMAT -#define TRACE_FORMAT(call, proto, args, fmt) - -#undef __array -#define __array(type, item, len) type item[len]; - -#undef __field -#define __field(type, item) type item; - -#undef TP_STRUCT__entry -#define TP_STRUCT__entry(args...) args - -#undef TRACE_EVENT -#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ - struct ftrace_raw_##name { \ - struct trace_entry ent; \ - tstruct \ - }; \ - static struct ftrace_event_call event_##name - -#include diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h deleted file mode 100644 index aa4a67a..0000000 --- a/kernel/trace/trace_events_stage_2.h +++ /dev/null @@ -1,170 +0,0 @@ -/* - * Stage 2 of the trace events. - * - * Override the macros in to include the following: - * - * enum print_line_t - * ftrace_raw_output_(struct trace_iterator *iter, int flags) - * { - * struct trace_seq *s = &iter->seq; - * struct ftrace_raw_ *field; <-- defined in stage 1 - * struct trace_entry *entry; - * int ret; - * - * entry = iter->ent; - * - * if (entry->type != event_.id) { - * WARN_ON_ONCE(1); - * return TRACE_TYPE_UNHANDLED; - * } - * - * field = (typeof(field))entry; - * - * ret = trace_seq_printf(s, "\n"); - * if (!ret) - * return TRACE_TYPE_PARTIAL_LINE; - * - * return TRACE_TYPE_HANDLED; - * } - * - * This is the method used to print the raw event to the trace - * output format. Note, this is not needed if the data is read - * in binary. - */ - -#undef __entry -#define __entry field - -#undef TP_printk -#define TP_printk(fmt, args...) fmt "\n", args - -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ -enum print_line_t \ -ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ -{ \ - struct trace_seq *s = &iter->seq; \ - struct ftrace_raw_##call *field; \ - struct trace_entry *entry; \ - int ret; \ - \ - entry = iter->ent; \ - \ - if (entry->type != event_##call.id) { \ - WARN_ON_ONCE(1); \ - return TRACE_TYPE_UNHANDLED; \ - } \ - \ - field = (typeof(field))entry; \ - \ - ret = trace_seq_printf(s, #call ": " print); \ - if (!ret) \ - return TRACE_TYPE_PARTIAL_LINE; \ - \ - return TRACE_TYPE_HANDLED; \ -} - -#include - -/* - * Setup the showing format of trace point. - * - * int - * ftrace_format_##call(struct trace_seq *s) - * { - * struct ftrace_raw_##call field; - * int ret; - * - * ret = trace_seq_printf(s, #type " " #item ";" - * " offset:%u; size:%u;\n", - * offsetof(struct ftrace_raw_##call, item), - * sizeof(field.type)); - * - * } - */ - -#undef TP_STRUCT__entry -#define TP_STRUCT__entry(args...) args - -#undef __field -#define __field(type, item) \ - ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%u;\tsize:%u;\n", \ - (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item)); \ - if (!ret) \ - return 0; - -#undef __array -#define __array(type, item, len) \ - ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ - "offset:%u;\tsize:%u;\n", \ - (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item)); \ - if (!ret) \ - return 0; - -#undef __entry -#define __entry REC - -#undef TP_printk -#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args) - -#undef TP_fast_assign -#define TP_fast_assign(args...) args - -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ -static int \ -ftrace_format_##call(struct trace_seq *s) \ -{ \ - struct ftrace_raw_##call field; \ - int ret; \ - \ - tstruct; \ - \ - trace_seq_printf(s, "\nprint fmt: " print); \ - \ - return ret; \ -} - -#include - -#undef __field -#define __field(type, item) \ - ret = trace_define_field(event_call, #type, #item, \ - offsetof(typeof(field), item), \ - sizeof(field.item)); \ - if (ret) \ - return ret; - -#undef __array -#define __array(type, item, len) \ - BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ - ret = trace_define_field(event_call, #type "[" #len "]", #item, \ - offsetof(typeof(field), item), \ - sizeof(field.item)); \ - if (ret) \ - return ret; - -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ -int \ -ftrace_define_fields_##call(void) \ -{ \ - struct ftrace_raw_##call field; \ - struct ftrace_event_call *event_call = &event_##call; \ - int ret; \ - \ - __common_field(unsigned char, type); \ - __common_field(unsigned char, flags); \ - __common_field(unsigned char, preempt_count); \ - __common_field(int, pid); \ - __common_field(int, tgid); \ - \ - tstruct; \ - \ - return ret; \ -} - -#include diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h deleted file mode 100644 index 45c04e1..0000000 --- a/kernel/trace/trace_events_stage_3.h +++ /dev/null @@ -1,279 +0,0 @@ -/* - * Stage 3 of the trace events. - * - * Override the macros in to include the following: - * - * static void ftrace_event_(proto) - * { - * event_trace_printk(_RET_IP_, ": " ); - * } - * - * static int ftrace_reg_event_(void) - * { - * int ret; - * - * ret = register_trace_(ftrace_event_); - * if (!ret) - * pr_info("event trace: Could not activate trace point " - * "probe to "); - * return ret; - * } - * - * static void ftrace_unreg_event_(void) - * { - * unregister_trace_(ftrace_event_); - * } - * - * For those macros defined with TRACE_FORMAT: - * - * static struct ftrace_event_call __used - * __attribute__((__aligned__(4))) - * __attribute__((section("_ftrace_events"))) event_ = { - * .name = "", - * .regfunc = ftrace_reg_event_, - * .unregfunc = ftrace_unreg_event_, - * } - * - * - * For those macros defined with TRACE_EVENT: - * - * static struct ftrace_event_call event_; - * - * static void ftrace_raw_event_(proto) - * { - * struct ring_buffer_event *event; - * struct ftrace_raw_ *entry; <-- defined in stage 1 - * unsigned long irq_flags; - * int pc; - * - * local_save_flags(irq_flags); - * pc = preempt_count(); - * - * event = trace_current_buffer_lock_reserve(event_.id, - * sizeof(struct ftrace_raw_), - * irq_flags, pc); - * if (!event) - * return; - * entry = ring_buffer_event_data(event); - * - * ; <-- Here we assign the entries by the __field and - * __array macros. - * - * trace_current_buffer_unlock_commit(event, irq_flags, pc); - * } - * - * static int ftrace_raw_reg_event_(void) - * { - * int ret; - * - * ret = register_trace_(ftrace_raw_event_); - * if (!ret) - * pr_info("event trace: Could not activate trace point " - * "probe to "); - * return ret; - * } - * - * static void ftrace_unreg_event_(void) - * { - * unregister_trace_(ftrace_raw_event_); - * } - * - * static struct trace_event ftrace_event_type_ = { - * .trace = ftrace_raw_output_, <-- stage 2 - * }; - * - * static int ftrace_raw_init_event_(void) - * { - * int id; - * - * id = register_ftrace_event(&ftrace_event_type_); - * if (!id) - * return -ENODEV; - * event_.id = id; - * return 0; - * } - * - * static struct ftrace_event_call __used - * __attribute__((__aligned__(4))) - * __attribute__((section("_ftrace_events"))) event_ = { - * .name = "", - * .system = "", - * .raw_init = ftrace_raw_init_event_, - * .regfunc = ftrace_reg_event_, - * .unregfunc = ftrace_unreg_event_, - * .show_format = ftrace_format_, - * } - * - */ - -#undef TP_FMT -#define TP_FMT(fmt, args...) fmt "\n", ##args - -#ifdef CONFIG_EVENT_PROFILE -#define _TRACE_PROFILE(call, proto, args) \ -static void ftrace_profile_##call(proto) \ -{ \ - extern void perf_tpcounter_event(int); \ - perf_tpcounter_event(event_##call.id); \ -} \ - \ -static int ftrace_profile_enable_##call(struct ftrace_event_call *call) \ -{ \ - int ret = 0; \ - \ - if (!atomic_inc_return(&call->profile_count)) \ - ret = register_trace_##call(ftrace_profile_##call); \ - \ - return ret; \ -} \ - \ -static void ftrace_profile_disable_##call(struct ftrace_event_call *call) \ -{ \ - if (atomic_add_negative(-1, &call->profile_count)) \ - unregister_trace_##call(ftrace_profile_##call); \ -} - -#define _TRACE_PROFILE_INIT(call) \ - .profile_count = ATOMIC_INIT(-1), \ - .profile_enable = ftrace_profile_enable_##call, \ - .profile_disable = ftrace_profile_disable_##call, - -#else -#define _TRACE_PROFILE(call, proto, args) -#define _TRACE_PROFILE_INIT(call) -#endif - -#define _TRACE_FORMAT(call, proto, args, fmt) \ -static void ftrace_event_##call(proto) \ -{ \ - event_trace_printk(_RET_IP_, #call ": " fmt); \ -} \ - \ -static int ftrace_reg_event_##call(void) \ -{ \ - int ret; \ - \ - ret = register_trace_##call(ftrace_event_##call); \ - if (ret) \ - pr_info("event trace: Could not activate trace point " \ - "probe to " #call "\n"); \ - return ret; \ -} \ - \ -static void ftrace_unreg_event_##call(void) \ -{ \ - unregister_trace_##call(ftrace_event_##call); \ -} \ - \ -static struct ftrace_event_call event_##call; \ - \ -static int ftrace_init_event_##call(void) \ -{ \ - int id; \ - \ - id = register_ftrace_event(NULL); \ - if (!id) \ - return -ENODEV; \ - event_##call.id = id; \ - return 0; \ -} - -#undef TRACE_FORMAT -#define TRACE_FORMAT(call, proto, args, fmt) \ -_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) \ -_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \ -static struct ftrace_event_call __used \ -__attribute__((__aligned__(4))) \ -__attribute__((section("_ftrace_events"))) event_##call = { \ - .name = #call, \ - .system = __stringify(TRACE_SYSTEM), \ - .raw_init = ftrace_init_event_##call, \ - .regfunc = ftrace_reg_event_##call, \ - .unregfunc = ftrace_unreg_event_##call, \ - _TRACE_PROFILE_INIT(call) \ -} - -#undef __entry -#define __entry entry - -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ -_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \ - \ -static struct ftrace_event_call event_##call; \ - \ -static void ftrace_raw_event_##call(proto) \ -{ \ - struct ftrace_event_call *call = &event_##call; \ - struct ring_buffer_event *event; \ - struct ftrace_raw_##call *entry; \ - unsigned long irq_flags; \ - int pc; \ - \ - local_save_flags(irq_flags); \ - pc = preempt_count(); \ - \ - event = trace_current_buffer_lock_reserve(event_##call.id, \ - sizeof(struct ftrace_raw_##call), \ - irq_flags, pc); \ - if (!event) \ - return; \ - entry = ring_buffer_event_data(event); \ - \ - assign; \ - \ - if (!filter_current_check_discard(call, entry, event)) \ - trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \ -} \ - \ -static int ftrace_raw_reg_event_##call(void) \ -{ \ - int ret; \ - \ - ret = register_trace_##call(ftrace_raw_event_##call); \ - if (ret) \ - pr_info("event trace: Could not activate trace point " \ - "probe to " #call "\n"); \ - return ret; \ -} \ - \ -static void ftrace_raw_unreg_event_##call(void) \ -{ \ - unregister_trace_##call(ftrace_raw_event_##call); \ -} \ - \ -static struct trace_event ftrace_event_type_##call = { \ - .trace = ftrace_raw_output_##call, \ -}; \ - \ -static int ftrace_raw_init_event_##call(void) \ -{ \ - int id; \ - \ - id = register_ftrace_event(&ftrace_event_type_##call); \ - if (!id) \ - return -ENODEV; \ - event_##call.id = id; \ - INIT_LIST_HEAD(&event_##call.fields); \ - init_preds(&event_##call); \ - return 0; \ -} \ - \ -static struct ftrace_event_call __used \ -__attribute__((__aligned__(4))) \ -__attribute__((section("_ftrace_events"))) event_##call = { \ - .name = #call, \ - .system = __stringify(TRACE_SYSTEM), \ - .raw_init = ftrace_raw_init_event_##call, \ - .regfunc = ftrace_raw_reg_event_##call, \ - .unregfunc = ftrace_raw_unreg_event_##call, \ - .show_format = ftrace_format_##call, \ - .define_fields = ftrace_define_fields_##call, \ - _TRACE_PROFILE_INIT(call) \ -} - -#include - -#undef _TRACE_PROFILE -#undef _TRACE_PROFILE_INIT - -- cgit v0.10.2 From a59fd6027218bd7c994e39d14afe0242f895144f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 10 Apr 2009 13:52:20 -0400 Subject: tracing/events: convert event call sites to use a link list Impact: makes it possible to define events in modules The events are created by reading down the section that they are linked in by the macros. But this is not scalable to modules. This patch converts the manipulations to use a global link list, and on boot up it adds the items in the section to the list. This change will allow modules to add their tracing events to the list as well. Note, this change alone does not permit modules to use the TRACE_EVENT macros, but the change is needed for them to eventually do so. Signed-off-by: Steven Rostedt diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 496b76d..1781085 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -83,6 +83,7 @@ void trace_current_buffer_discard_commit(struct ring_buffer_event *event); void tracing_record_cmdline(struct task_struct *tsk); struct ftrace_event_call { + struct list_head list; char *name; char *system; struct dentry *dir; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 6bcdf4a..8817c18 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -739,11 +739,6 @@ struct event_subsystem { struct filter_pred **preds; }; -#define events_for_each(event) \ - for (event = __start_ftrace_events; \ - (unsigned long)event < (unsigned long)__stop_ftrace_events; \ - event++) - struct filter_pred; typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event); @@ -785,13 +780,7 @@ filter_check_discard(struct ftrace_event_call *call, void *rec, return 0; } -extern struct ftrace_event_call __start_ftrace_events[]; -extern struct ftrace_event_call __stop_ftrace_events[]; - -#define for_each_event(event) \ - for (event = __start_ftrace_events; \ - (unsigned long)event < (unsigned long)__stop_ftrace_events; \ - event++) +extern struct list_head ftrace_events; extern const char *__start___trace_bprintk_fmt[]; extern const char *__stop___trace_bprintk_fmt[]; diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index 199de9c..7bf2ad6 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c @@ -11,7 +11,7 @@ int ftrace_profile_enable(int event_id) { struct ftrace_event_call *event; - for_each_event(event) { + list_for_each_entry(event, &ftrace_events, list) { if (event->id == event_id) return event->profile_enable(event); } @@ -23,7 +23,7 @@ void ftrace_profile_disable(int event_id) { struct ftrace_event_call *event; - for_each_event(event) { + list_for_each_entry(event, &ftrace_events, list) { if (event->id == event_id) return event->profile_disable(event); } diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index ead68ac..5c66aaf 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -19,6 +19,8 @@ static DEFINE_MUTEX(event_mutex); +LIST_HEAD(ftrace_events); + int trace_define_field(struct ftrace_event_call *call, char *type, char *name, int offset, int size) { @@ -54,16 +56,14 @@ err: static void ftrace_clear_events(void) { - struct ftrace_event_call *call = (void *)__start_ftrace_events; - + struct ftrace_event_call *call; - while ((unsigned long)call < (unsigned long)__stop_ftrace_events) { + list_for_each_entry(call, &ftrace_events, list) { if (call->enabled) { call->enabled = 0; call->unregfunc(); } - call++; } } @@ -89,7 +89,7 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call, static int ftrace_set_clr_event(char *buf, int set) { - struct ftrace_event_call *call = __start_ftrace_events; + struct ftrace_event_call *call; char *event = NULL, *sub = NULL, *match; int ret = -EINVAL; @@ -118,7 +118,7 @@ static int ftrace_set_clr_event(char *buf, int set) } mutex_lock(&event_mutex); - for_each_event(call) { + list_for_each_entry(call, &ftrace_events, list) { if (!call->name || !call->regfunc) continue; @@ -224,15 +224,17 @@ ftrace_event_write(struct file *file, const char __user *ubuf, static void * t_next(struct seq_file *m, void *v, loff_t *pos) { - struct ftrace_event_call *call = m->private; - struct ftrace_event_call *next = call; + struct list_head *list = m->private; + struct ftrace_event_call *call; (*pos)++; for (;;) { - if ((unsigned long)call >= (unsigned long)__stop_ftrace_events) + if (list == &ftrace_events) return NULL; + call = list_entry(list, struct ftrace_event_call, list); + /* * The ftrace subsystem is for showing formats only. * They can not be enabled or disabled via the event files. @@ -240,11 +242,10 @@ t_next(struct seq_file *m, void *v, loff_t *pos) if (call->regfunc) break; - call++; - next = call; + list = list->next; } - m->private = ++next; + m->private = list->next; return call; } @@ -257,22 +258,23 @@ static void *t_start(struct seq_file *m, loff_t *pos) static void * s_next(struct seq_file *m, void *v, loff_t *pos) { - struct ftrace_event_call *call = m->private; - struct ftrace_event_call *next; + struct list_head *list = m->private; + struct ftrace_event_call *call; (*pos)++; retry: - if ((unsigned long)call >= (unsigned long)__stop_ftrace_events) + if (list == &ftrace_events) return NULL; + call = list_entry(list, struct ftrace_event_call, list); + if (!call->enabled) { - call++; + list = list->next; goto retry; } - next = call; - m->private = ++next; + m->private = list->next; return call; } @@ -312,7 +314,7 @@ ftrace_event_seq_open(struct inode *inode, struct file *file) if (!ret) { struct seq_file *m = file->private_data; - m->private = __start_ftrace_events; + m->private = ftrace_events.next; } return ret; } @@ -797,9 +799,17 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) return 0; } +extern struct ftrace_event_call __start_ftrace_events[]; +extern struct ftrace_event_call __stop_ftrace_events[]; + +#define for_each_event(event) \ + for (event = __start_ftrace_events; \ + (unsigned long)event < (unsigned long)__stop_ftrace_events; \ + event++) + static __init int event_trace_init(void) { - struct ftrace_event_call *call = __start_ftrace_events; + struct ftrace_event_call *call; struct dentry *d_tracer; struct dentry *entry; struct dentry *d_events; @@ -830,6 +840,7 @@ static __init int event_trace_init(void) /* The linker may leave blanks */ if (!call->name) continue; + list_add(&call->list, &ftrace_events); event_create_dir(call, d_events); } diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index de42dad..d30b06b 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -223,7 +223,7 @@ oom: void filter_free_subsystem_preds(struct event_subsystem *system) { - struct ftrace_event_call *call = __start_ftrace_events; + struct ftrace_event_call *call; int i; if (system->n_preds) { @@ -234,7 +234,7 @@ void filter_free_subsystem_preds(struct event_subsystem *system) system->n_preds = 0; } - events_for_each(call) { + list_for_each_entry(call, &ftrace_events, list) { if (!call->define_fields) continue; @@ -320,7 +320,7 @@ int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred) int filter_add_subsystem_pred(struct event_subsystem *system, struct filter_pred *pred) { - struct ftrace_event_call *call = __start_ftrace_events; + struct ftrace_event_call *call; if (system->n_preds && !pred->compound) filter_free_subsystem_preds(system); @@ -337,7 +337,7 @@ int filter_add_subsystem_pred(struct event_subsystem *system, system->preds[system->n_preds] = pred; - events_for_each(call) { + list_for_each_entry(call, &ftrace_events, list) { int err; if (!call->define_fields) -- cgit v0.10.2 From 17c873ec280a03894bc718af817f7f24fa787ae1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 10 Apr 2009 18:12:50 -0400 Subject: tracing/events: add export symbols for trace events in modules Impact: let modules add trace events The trace event code requires some functions to be exported to allow modules to use TRACE_EVENT. This patch adds EXPORT_SYMBOL_GPL to the necessary functions. Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c0047fc..2d69b26 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -176,6 +176,7 @@ int filter_current_check_discard(struct ftrace_event_call *call, void *rec, { return filter_check_discard(call, rec, global_trace.buffer, event); } +EXPORT_SYMBOL_GPL(filter_current_check_discard); cycle_t ftrace_now(int cpu) { @@ -886,6 +887,7 @@ trace_current_buffer_lock_reserve(unsigned char type, unsigned long len, return trace_buffer_lock_reserve(&global_trace, type, len, flags, pc); } +EXPORT_SYMBOL(trace_current_buffer_lock_reserve); void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, unsigned long flags, int pc) @@ -903,6 +905,7 @@ void trace_current_buffer_discard_commit(struct ring_buffer_event *event) { ring_buffer_discard_commit(global_trace.buffer, event); } +EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit); void trace_function(struct trace_array *tr, diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 5c66aaf..8b9e621 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -53,6 +53,7 @@ err: return -ENOMEM; } +EXPORT_SYMBOL_GPL(trace_define_field); static void ftrace_clear_events(void) { diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index d30b06b..f8e5eab 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -110,6 +110,7 @@ int filter_match_preds(struct ftrace_event_call *call, void *rec) return 1; } +EXPORT_SYMBOL_GPL(filter_match_preds); void filter_print_preds(struct filter_pred **preds, int n_preds, struct trace_seq *s) @@ -220,6 +221,7 @@ oom: return -ENOMEM; } +EXPORT_SYMBOL_GPL(init_preds); void filter_free_subsystem_preds(struct event_subsystem *system) { diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 0e70fb0..83a8abb 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -94,6 +94,7 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...) return len; } +EXPORT_SYMBOL_GPL(trace_seq_printf); int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) { @@ -538,6 +539,7 @@ int register_ftrace_event(struct trace_event *event) return ret; } +EXPORT_SYMBOL_GPL(register_ftrace_event); /** * unregister_ftrace_event - remove a no longer used event @@ -551,6 +553,7 @@ int unregister_ftrace_event(struct trace_event *event) return 0; } +EXPORT_SYMBOL_GPL(unregister_ftrace_event); /* * Standard events -- cgit v0.10.2 From 6d723736e472f7a0cd5b62c84152fceead241328 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 10 Apr 2009 14:53:50 -0400 Subject: tracing/events: add support for modules to TRACE_EVENT Impact: allow modules to add TRACE_EVENTS on load This patch adds the final hooks to allow modules to use the TRACE_EVENT macro. A notifier and a data structure are used to link the TRACE_EVENTs defined in the module to connect them with the ftrace event tracing system. It also adds the necessary automated clean ups to the trace events when a module is removed. Cc: Rusty Russell Signed-off-by: Steven Rostedt diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 1781085..75f3ac0 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -7,6 +7,7 @@ struct trace_array; struct tracer; +struct dentry; /* * The trace entry - the most basic unit of tracing. This is what @@ -87,6 +88,7 @@ struct ftrace_event_call { char *name; char *system; struct dentry *dir; + struct trace_event *event; int enabled; int (*regfunc)(void); void (*unregfunc)(void); @@ -97,6 +99,7 @@ struct ftrace_event_call { struct list_head fields; int n_preds; struct filter_pred **preds; + void *mod; #ifdef CONFIG_EVENT_PROFILE atomic_t profile_count; diff --git a/include/linux/module.h b/include/linux/module.h index 627ac08..6155fa4 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -337,6 +337,10 @@ struct module const char **trace_bprintk_fmt_start; unsigned int num_trace_bprintk_fmt; #endif +#ifdef CONFIG_EVENT_TRACING + struct ftrace_event_call *trace_events; + unsigned int num_trace_events; +#endif #ifdef CONFIG_MODULE_UNLOAD /* What modules depend on me? */ diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index 28051da..15ca2c7 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -1,6 +1,8 @@ #ifndef _LINUX_TRACE_SEQ_H #define _LINUX_TRACE_SEQ_H +#include + /* * Trace sequences are used to allow a function to call several other functions * to create a string of data to use (up to a max of PAGE_SIZE. diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 955b967..60c5323 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -477,6 +477,7 @@ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) event_##call = { \ .name = #call, \ .system = __stringify(TRACE_SYSTEM), \ + .event = &ftrace_event_type_##call, \ .raw_init = ftrace_raw_init_event_##call, \ .regfunc = ftrace_raw_reg_event_##call, \ .unregfunc = ftrace_raw_unreg_event_##call, \ diff --git a/kernel/module.c b/kernel/module.c index e797812..a039470 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -18,6 +18,7 @@ */ #include #include +#include #include #include #include @@ -2172,6 +2173,12 @@ static noinline struct module *load_module(void __user *umod, sizeof(*mod->tracepoints), &mod->num_tracepoints); #endif +#ifdef CONFIG_EVENT_TRACING + mod->trace_events = section_objs(hdr, sechdrs, secstrings, + "_ftrace_events", + sizeof(*mod->trace_events), + &mod->num_trace_events); +#endif #ifdef CONFIG_MODVERSIONS if ((mod->num_syms && !mod->crcs) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 8b9e621..a4b1777 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -713,7 +713,13 @@ event_subsystem_dir(const char *name, struct dentry *d_events) return d_events; } - system->name = name; + system->name = kstrdup(name, GFP_KERNEL); + if (!system->name) { + debugfs_remove(system->entry); + kfree(system); + return d_events; + } + list_add(&system->list, &event_subsystems); system->preds = NULL; @@ -738,7 +744,7 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) * If the trace point header did not define TRACE_SYSTEM * then the system would be called "TRACE_SYSTEM". */ - if (strcmp(call->system, "TRACE_SYSTEM") != 0) + if (strcmp(call->system, TRACE_SYSTEM) != 0) d_events = event_subsystem_dir(call->system, d_events); if (call->raw_init) { @@ -757,21 +763,13 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) return -1; } - if (call->regfunc) { - entry = debugfs_create_file("enable", 0644, call->dir, call, - &ftrace_enable_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'%s/enable' entry\n", call->name); - } + if (call->regfunc) + entry = trace_create_file("enable", 0644, call->dir, call, + &ftrace_enable_fops); - if (call->id) { - entry = debugfs_create_file("id", 0444, call->dir, call, - &ftrace_event_id_fops); - if (!entry) - pr_warning("Could not create debugfs '%s/id' entry\n", - call->name); - } + if (call->id) + entry = trace_create_file("id", 0444, call->dir, call, + &ftrace_event_id_fops); if (call->define_fields) { ret = call->define_fields(); @@ -780,40 +778,102 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) " events/%s\n", call->name); return ret; } - entry = debugfs_create_file("filter", 0644, call->dir, call, - &ftrace_event_filter_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'%s/filter' entry\n", call->name); + entry = trace_create_file("filter", 0644, call->dir, call, + &ftrace_event_filter_fops); } /* A trace may not want to export its format */ if (!call->show_format) return 0; - entry = debugfs_create_file("format", 0444, call->dir, call, - &ftrace_event_format_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'%s/format' entry\n", call->name); + entry = trace_create_file("format", 0444, call->dir, call, + &ftrace_event_format_fops); + + return 0; +} + +#define for_each_event(event, start, end) \ + for (event = start; \ + (unsigned long)event < (unsigned long)end; \ + event++) + +static void trace_module_add_events(struct module *mod) +{ + struct ftrace_event_call *call, *start, *end; + struct dentry *d_events; + + start = mod->trace_events; + end = mod->trace_events + mod->num_trace_events; + + if (start == end) + return; + + d_events = event_trace_events_dir(); + if (!d_events) + return; + + for_each_event(call, start, end) { + /* The linker may leave blanks */ + if (!call->name) + continue; + call->mod = mod; + list_add(&call->list, &ftrace_events); + event_create_dir(call, d_events); + } +} + +static void trace_module_remove_events(struct module *mod) +{ + struct ftrace_event_call *call, *p; + + list_for_each_entry_safe(call, p, &ftrace_events, list) { + if (call->mod == mod) { + if (call->enabled) { + call->enabled = 0; + call->unregfunc(); + } + if (call->event) + unregister_ftrace_event(call->event); + debugfs_remove_recursive(call->dir); + list_del(&call->list); + } + } +} + +int trace_module_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + struct module *mod = data; + + mutex_lock(&event_mutex); + switch (val) { + case MODULE_STATE_COMING: + trace_module_add_events(mod); + break; + case MODULE_STATE_GOING: + trace_module_remove_events(mod); + break; + } + mutex_unlock(&event_mutex); return 0; } +struct notifier_block trace_module_nb = { + .notifier_call = trace_module_notify, + .priority = 0, +}; + extern struct ftrace_event_call __start_ftrace_events[]; extern struct ftrace_event_call __stop_ftrace_events[]; -#define for_each_event(event) \ - for (event = __start_ftrace_events; \ - (unsigned long)event < (unsigned long)__stop_ftrace_events; \ - event++) - static __init int event_trace_init(void) { struct ftrace_event_call *call; struct dentry *d_tracer; struct dentry *entry; struct dentry *d_events; + int ret; d_tracer = tracing_init_dentry(); if (!d_tracer) @@ -837,7 +897,7 @@ static __init int event_trace_init(void) if (!d_events) return 0; - for_each_event(call) { + for_each_event(call, __start_ftrace_events, __stop_ftrace_events) { /* The linker may leave blanks */ if (!call->name) continue; @@ -845,6 +905,10 @@ static __init int event_trace_init(void) event_create_dir(call, d_events); } + ret = register_module_notifier(&trace_module_nb); + if (!ret) + pr_warning("Failed to register trace events module notifier\n"); + return 0; } fs_initcall(event_trace_init); -- cgit v0.10.2 From 19e4529ee7345079eeacc8e40cf69a304a64dc23 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 14 Apr 2009 17:27:18 +1000 Subject: modules: Fix up build when CONFIG_MODULE_UNLOAD=n. Commit 3d43321b7015387cfebbe26436d0e9d299162ea1 ("modules: sysctl to block module loading") introduces a modules_disabled variable that is only defined if CONFIG_MODULE_UNLOAD is enabled, despite being used in other places. This moves it up and fixes up the build. CC kernel/module.o kernel/module.c: In function 'sys_init_module': kernel/module.c:2401: error: 'modules_disabled' undeclared (first use in this function) kernel/module.c:2401: error: (Each undeclared identifier is reported only once kernel/module.c:2401: error: for each function it appears in.) make[1]: *** [kernel/module.o] Error 1 make: *** [kernel/module.o] Error 2 Signed-off-by: Paul Mundt Signed-off-by: James Morris diff --git a/kernel/module.c b/kernel/module.c index eeb3f7b..ee7ab61 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -71,6 +71,9 @@ static DEFINE_MUTEX(module_mutex); static LIST_HEAD(modules); +/* Block module loading/unloading? */ +int modules_disabled = 0; + /* Waiting for a module to finish initializing? */ static DECLARE_WAIT_QUEUE_HEAD(module_wq); @@ -778,9 +781,6 @@ static void wait_for_zero_refcount(struct module *mod) mutex_lock(&module_mutex); } -/* Block module loading/unloading? */ -int modules_disabled = 0; - SYSCALL_DEFINE2(delete_module, const char __user *, name_user, unsigned int, flags) { -- cgit v0.10.2 From 61f919a12fbdc3fd20f980a34a118d597198a392 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 14 Apr 2009 18:22:32 -0400 Subject: tracing/events: fix compile for modules disabled Impact: compile fix The addition of TRACE_EVENT for modules breaks the build for when modules are disabled. This code fixes that. Reported-by: Ingo Molnar Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index a4b1777..6591d83 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -797,6 +797,7 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) (unsigned long)event < (unsigned long)end; \ event++) +#ifdef CONFIG_MODULES static void trace_module_add_events(struct module *mod) { struct ftrace_event_call *call, *start, *end; @@ -840,8 +841,8 @@ static void trace_module_remove_events(struct module *mod) } } -int trace_module_notify(struct notifier_block *self, - unsigned long val, void *data) +static int trace_module_notify(struct notifier_block *self, + unsigned long val, void *data) { struct module *mod = data; @@ -858,6 +859,13 @@ int trace_module_notify(struct notifier_block *self, return 0; } +#else +static int trace_module_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + return 0; +} +#endif /* CONFIG_MODULES */ struct notifier_block trace_module_nb = { .notifier_call = trace_module_notify, -- cgit v0.10.2 From ecda8ae02a08ef065ff387f5cb2a2d4999da2408 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 14 Apr 2009 18:49:38 -0400 Subject: tracing/events: fix lockdep system name Impact: fix compile error of lockdep event tracer Ingo Molnar pointed out that the system name for the lockdep tracer was "lock" which is used to include the event trace file name. It should be "lockdep" Reported-by: Ingo Molnar Signed-off-by: Steven Rostedt diff --git a/include/trace/lockdep.h b/include/trace/lockdep.h index 4d301e7..45e326b 100644 --- a/include/trace/lockdep.h +++ b/include/trace/lockdep.h @@ -5,7 +5,7 @@ #include #undef TRACE_SYSTEM -#define TRACE_SYSTEM lock +#define TRACE_SYSTEM lockdep #ifdef CONFIG_LOCKDEP -- cgit v0.10.2 From ad8d75fff811a6a230f7f43b05a6483099349533 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 14 Apr 2009 19:39:12 -0400 Subject: tracing/events: move trace point headers into include/trace/events Impact: clean up Create a sub directory in include/trace called events to keep the trace point headers in their own separate directory. Only headers that declare trace points should be defined in this directory. Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Neil Horman Cc: Zhao Lei Cc: Eduard - Gabriel Munteanu Cc: Pekka Enberg Signed-off-by: Steven Rostedt diff --git a/include/linux/kmemtrace.h b/include/linux/kmemtrace.h index 15c45a2..b616d39 100644 --- a/include/linux/kmemtrace.h +++ b/include/linux/kmemtrace.h @@ -9,7 +9,7 @@ #ifdef __KERNEL__ -#include +#include #ifdef CONFIG_KMEMTRACE extern void kmemtrace_init(void); diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index 980eb66..1886941 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -43,7 +43,7 @@ #endif #ifndef TRACE_INCLUDE_PATH -# define __TRACE_INCLUDE(system) +# define __TRACE_INCLUDE(system) # define UNDEF_TRACE_INCLUDE_FILE #else # define __TRACE_INCLUDE(system) __stringify(TRACE_INCLUDE_PATH/system.h) diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h new file mode 100644 index 0000000..75e3468 --- /dev/null +++ b/include/trace/events/irq.h @@ -0,0 +1,57 @@ +#if !defined(_TRACE_IRQ_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_IRQ_H + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM irq + +/* + * Tracepoint for entry of interrupt handler: + */ +TRACE_FORMAT(irq_handler_entry, + TP_PROTO(int irq, struct irqaction *action), + TP_ARGS(irq, action), + TP_FMT("irq=%d handler=%s", irq, action->name) + ); + +/* + * Tracepoint for return of an interrupt handler: + */ +TRACE_EVENT(irq_handler_exit, + + TP_PROTO(int irq, struct irqaction *action, int ret), + + TP_ARGS(irq, action, ret), + + TP_STRUCT__entry( + __field( int, irq ) + __field( int, ret ) + ), + + TP_fast_assign( + __entry->irq = irq; + __entry->ret = ret; + ), + + TP_printk("irq=%d return=%s", + __entry->irq, __entry->ret ? "handled" : "unhandled") +); + +TRACE_FORMAT(softirq_entry, + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + TP_ARGS(h, vec), + TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) + ); + +TRACE_FORMAT(softirq_exit, + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + TP_ARGS(h, vec), + TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) + ); + +#endif /* _TRACE_IRQ_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h new file mode 100644 index 0000000..c22c42f --- /dev/null +++ b/include/trace/events/kmem.h @@ -0,0 +1,194 @@ +#if !defined(_TRACE_KMEM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_KMEM_H + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kmem + +TRACE_EVENT(kmalloc, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags) +); + +TRACE_EVENT(kmem_cache_alloc, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags) +); + +TRACE_EVENT(kmalloc_node, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + __field( int, node ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + __entry->node = node; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags, + __entry->node) +); + +TRACE_EVENT(kmem_cache_alloc_node, + + TP_PROTO(unsigned long call_site, + const void *ptr, + size_t bytes_req, + size_t bytes_alloc, + gfp_t gfp_flags, + int node), + + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( size_t, bytes_req ) + __field( size_t, bytes_alloc ) + __field( gfp_t, gfp_flags ) + __field( int, node ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->bytes_req = bytes_req; + __entry->bytes_alloc = bytes_alloc; + __entry->gfp_flags = gfp_flags; + __entry->node = node; + ), + + TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", + __entry->call_site, + __entry->ptr, + __entry->bytes_req, + __entry->bytes_alloc, + __entry->gfp_flags, + __entry->node) +); + +TRACE_EVENT(kfree, + + TP_PROTO(unsigned long call_site, const void *ptr), + + TP_ARGS(call_site, ptr), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + ), + + TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) +); + +TRACE_EVENT(kmem_cache_free, + + TP_PROTO(unsigned long call_site, const void *ptr), + + TP_ARGS(call_site, ptr), + + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + ), + + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + ), + + TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) +); +#endif /* _TRACE_KMEM_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/lockdep.h b/include/trace/events/lockdep.h new file mode 100644 index 0000000..45e326b --- /dev/null +++ b/include/trace/events/lockdep.h @@ -0,0 +1,60 @@ +#if !defined(_TRACE_LOCKDEP_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_LOCKDEP_H + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM lockdep + +#ifdef CONFIG_LOCKDEP + +TRACE_FORMAT(lock_acquire, + TP_PROTO(struct lockdep_map *lock, unsigned int subclass, + int trylock, int read, int check, + struct lockdep_map *next_lock, unsigned long ip), + TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), + TP_FMT("%s%s%s", trylock ? "try " : "", + read ? "read " : "", lock->name) + ); + +TRACE_FORMAT(lock_release, + TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), + TP_ARGS(lock, nested, ip), + TP_FMT("%s", lock->name) + ); + +#ifdef CONFIG_LOCK_STAT + +TRACE_FORMAT(lock_contended, + TP_PROTO(struct lockdep_map *lock, unsigned long ip), + TP_ARGS(lock, ip), + TP_FMT("%s", lock->name) + ); + +TRACE_EVENT(lock_acquired, + TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime), + + TP_ARGS(lock, ip, waittime), + + TP_STRUCT__entry( + __field(const char *, name) + __field(unsigned long, wait_usec) + __field(unsigned long, wait_nsec_rem) + ), + TP_fast_assign( + __entry->name = lock->name; + __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC); + __entry->wait_usec = (unsigned long) waittime; + ), + TP_printk("%s (%lu.%03lu us)", __entry->name, __entry->wait_usec, + __entry->wait_nsec_rem) +); + +#endif +#endif + +#endif /* _TRACE_LOCKDEP_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h new file mode 100644 index 0000000..ffa1cab --- /dev/null +++ b/include/trace/events/sched.h @@ -0,0 +1,339 @@ +#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_SCHED_H + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM sched + +/* + * Tracepoint for calling kthread_stop, performed to end a kthread: + */ +TRACE_EVENT(sched_kthread_stop, + + TP_PROTO(struct task_struct *t), + + TP_ARGS(t), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + ), + + TP_fast_assign( + memcpy(__entry->comm, t->comm, TASK_COMM_LEN); + __entry->pid = t->pid; + ), + + TP_printk("task %s:%d", __entry->comm, __entry->pid) +); + +/* + * Tracepoint for the return value of the kthread stopping: + */ +TRACE_EVENT(sched_kthread_stop_ret, + + TP_PROTO(int ret), + + TP_ARGS(ret), + + TP_STRUCT__entry( + __field( int, ret ) + ), + + TP_fast_assign( + __entry->ret = ret; + ), + + TP_printk("ret %d", __entry->ret) +); + +/* + * Tracepoint for waiting on task to unschedule: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_wait_task, + + TP_PROTO(struct rq *rq, struct task_struct *p), + + TP_ARGS(rq, p), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) +); + +/* + * Tracepoint for waking up a task: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_wakeup, + + TP_PROTO(struct rq *rq, struct task_struct *p, int success), + + TP_ARGS(rq, p, success), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + __field( int, success ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + __entry->success = success; + ), + + TP_printk("task %s:%d [%d] success=%d", + __entry->comm, __entry->pid, __entry->prio, + __entry->success) +); + +/* + * Tracepoint for waking up a new task: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_wakeup_new, + + TP_PROTO(struct rq *rq, struct task_struct *p, int success), + + TP_ARGS(rq, p, success), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + __field( int, success ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + __entry->success = success; + ), + + TP_printk("task %s:%d [%d] success=%d", + __entry->comm, __entry->pid, __entry->prio, + __entry->success) +); + +/* + * Tracepoint for task switches, performed by the scheduler: + * + * (NOTE: the 'rq' argument is not used by generic trace events, + * but used by the latency tracer plugin. ) + */ +TRACE_EVENT(sched_switch, + + TP_PROTO(struct rq *rq, struct task_struct *prev, + struct task_struct *next), + + TP_ARGS(rq, prev, next), + + TP_STRUCT__entry( + __array( char, prev_comm, TASK_COMM_LEN ) + __field( pid_t, prev_pid ) + __field( int, prev_prio ) + __array( char, next_comm, TASK_COMM_LEN ) + __field( pid_t, next_pid ) + __field( int, next_prio ) + ), + + TP_fast_assign( + memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); + __entry->prev_pid = prev->pid; + __entry->prev_prio = prev->prio; + memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); + __entry->next_pid = next->pid; + __entry->next_prio = next->prio; + ), + + TP_printk("task %s:%d [%d] ==> %s:%d [%d]", + __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, + __entry->next_comm, __entry->next_pid, __entry->next_prio) +); + +/* + * Tracepoint for a task being migrated: + */ +TRACE_EVENT(sched_migrate_task, + + TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu), + + TP_ARGS(p, orig_cpu, dest_cpu), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + __field( int, orig_cpu ) + __field( int, dest_cpu ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + __entry->orig_cpu = orig_cpu; + __entry->dest_cpu = dest_cpu; + ), + + TP_printk("task %s:%d [%d] from: %d to: %d", + __entry->comm, __entry->pid, __entry->prio, + __entry->orig_cpu, __entry->dest_cpu) +); + +/* + * Tracepoint for freeing a task: + */ +TRACE_EVENT(sched_process_free, + + TP_PROTO(struct task_struct *p), + + TP_ARGS(p), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) +); + +/* + * Tracepoint for a task exiting: + */ +TRACE_EVENT(sched_process_exit, + + TP_PROTO(struct task_struct *p), + + TP_ARGS(p), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) +); + +/* + * Tracepoint for a waiting task: + */ +TRACE_EVENT(sched_process_wait, + + TP_PROTO(struct pid *pid), + + TP_ARGS(pid), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, prio ) + ), + + TP_fast_assign( + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + __entry->pid = pid_nr(pid); + __entry->prio = current->prio; + ), + + TP_printk("task %s:%d [%d]", + __entry->comm, __entry->pid, __entry->prio) +); + +/* + * Tracepoint for do_fork: + */ +TRACE_EVENT(sched_process_fork, + + TP_PROTO(struct task_struct *parent, struct task_struct *child), + + TP_ARGS(parent, child), + + TP_STRUCT__entry( + __array( char, parent_comm, TASK_COMM_LEN ) + __field( pid_t, parent_pid ) + __array( char, child_comm, TASK_COMM_LEN ) + __field( pid_t, child_pid ) + ), + + TP_fast_assign( + memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN); + __entry->parent_pid = parent->pid; + memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN); + __entry->child_pid = child->pid; + ), + + TP_printk("parent %s:%d child %s:%d", + __entry->parent_comm, __entry->parent_pid, + __entry->child_comm, __entry->child_pid) +); + +/* + * Tracepoint for sending a signal: + */ +TRACE_EVENT(sched_signal_send, + + TP_PROTO(int sig, struct task_struct *p), + + TP_ARGS(sig, p), + + TP_STRUCT__entry( + __field( int, sig ) + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->sig = sig; + ), + + TP_printk("sig: %d task %s:%d", + __entry->sig, __entry->comm, __entry->pid) +); + +#endif /* _TRACE_SCHED_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h new file mode 100644 index 0000000..1e8fabb --- /dev/null +++ b/include/trace/events/skb.h @@ -0,0 +1,40 @@ +#if !defined(_TRACE_SKB_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_SKB_H + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM skb + +/* + * Tracepoint for free an sk_buff: + */ +TRACE_EVENT(kfree_skb, + + TP_PROTO(struct sk_buff *skb, void *location), + + TP_ARGS(skb, location), + + TP_STRUCT__entry( + __field( void *, skbaddr ) + __field( unsigned short, protocol ) + __field( void *, location ) + ), + + TP_fast_assign( + __entry->skbaddr = skb; + if (skb) { + __entry->protocol = ntohs(skb->protocol); + } + __entry->location = location; + ), + + TP_printk("skbaddr=%p protocol=%u location=%p", + __entry->skbaddr, __entry->protocol, __entry->location) +); + +#endif /* _TRACE_SKB_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/irq.h b/include/trace/irq.h deleted file mode 100644 index 75e3468..0000000 --- a/include/trace/irq.h +++ /dev/null @@ -1,57 +0,0 @@ -#if !defined(_TRACE_IRQ_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_IRQ_H - -#include -#include - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM irq - -/* - * Tracepoint for entry of interrupt handler: - */ -TRACE_FORMAT(irq_handler_entry, - TP_PROTO(int irq, struct irqaction *action), - TP_ARGS(irq, action), - TP_FMT("irq=%d handler=%s", irq, action->name) - ); - -/* - * Tracepoint for return of an interrupt handler: - */ -TRACE_EVENT(irq_handler_exit, - - TP_PROTO(int irq, struct irqaction *action, int ret), - - TP_ARGS(irq, action, ret), - - TP_STRUCT__entry( - __field( int, irq ) - __field( int, ret ) - ), - - TP_fast_assign( - __entry->irq = irq; - __entry->ret = ret; - ), - - TP_printk("irq=%d return=%s", - __entry->irq, __entry->ret ? "handled" : "unhandled") -); - -TRACE_FORMAT(softirq_entry, - TP_PROTO(struct softirq_action *h, struct softirq_action *vec), - TP_ARGS(h, vec), - TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) - ); - -TRACE_FORMAT(softirq_exit, - TP_PROTO(struct softirq_action *h, struct softirq_action *vec), - TP_ARGS(h, vec), - TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) - ); - -#endif /* _TRACE_IRQ_H */ - -/* This part must be outside protection */ -#include diff --git a/include/trace/kmem.h b/include/trace/kmem.h deleted file mode 100644 index c22c42f..0000000 --- a/include/trace/kmem.h +++ /dev/null @@ -1,194 +0,0 @@ -#if !defined(_TRACE_KMEM_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_KMEM_H - -#include -#include - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM kmem - -TRACE_EVENT(kmalloc, - - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), - - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - ), - - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - __entry->gfp_flags) -); - -TRACE_EVENT(kmem_cache_alloc, - - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), - - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - ), - - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - __entry->gfp_flags) -); - -TRACE_EVENT(kmalloc_node, - - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), - - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - __field( int, node ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - __entry->node = node; - ), - - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - __entry->gfp_flags, - __entry->node) -); - -TRACE_EVENT(kmem_cache_alloc_node, - - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), - - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - __field( int, node ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - __entry->node = node; - ), - - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - __entry->gfp_flags, - __entry->node) -); - -TRACE_EVENT(kfree, - - TP_PROTO(unsigned long call_site, const void *ptr), - - TP_ARGS(call_site, ptr), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - ), - - TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) -); - -TRACE_EVENT(kmem_cache_free, - - TP_PROTO(unsigned long call_site, const void *ptr), - - TP_ARGS(call_site, ptr), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - ), - - TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) -); -#endif /* _TRACE_KMEM_H */ - -/* This part must be outside protection */ -#include diff --git a/include/trace/lockdep.h b/include/trace/lockdep.h deleted file mode 100644 index 45e326b..0000000 --- a/include/trace/lockdep.h +++ /dev/null @@ -1,60 +0,0 @@ -#if !defined(_TRACE_LOCKDEP_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_LOCKDEP_H - -#include -#include - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM lockdep - -#ifdef CONFIG_LOCKDEP - -TRACE_FORMAT(lock_acquire, - TP_PROTO(struct lockdep_map *lock, unsigned int subclass, - int trylock, int read, int check, - struct lockdep_map *next_lock, unsigned long ip), - TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), - TP_FMT("%s%s%s", trylock ? "try " : "", - read ? "read " : "", lock->name) - ); - -TRACE_FORMAT(lock_release, - TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), - TP_ARGS(lock, nested, ip), - TP_FMT("%s", lock->name) - ); - -#ifdef CONFIG_LOCK_STAT - -TRACE_FORMAT(lock_contended, - TP_PROTO(struct lockdep_map *lock, unsigned long ip), - TP_ARGS(lock, ip), - TP_FMT("%s", lock->name) - ); - -TRACE_EVENT(lock_acquired, - TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime), - - TP_ARGS(lock, ip, waittime), - - TP_STRUCT__entry( - __field(const char *, name) - __field(unsigned long, wait_usec) - __field(unsigned long, wait_nsec_rem) - ), - TP_fast_assign( - __entry->name = lock->name; - __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC); - __entry->wait_usec = (unsigned long) waittime; - ), - TP_printk("%s (%lu.%03lu us)", __entry->name, __entry->wait_usec, - __entry->wait_nsec_rem) -); - -#endif -#endif - -#endif /* _TRACE_LOCKDEP_H */ - -/* This part must be outside protection */ -#include diff --git a/include/trace/sched.h b/include/trace/sched.h deleted file mode 100644 index ffa1cab..0000000 --- a/include/trace/sched.h +++ /dev/null @@ -1,339 +0,0 @@ -#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_SCHED_H - -#include -#include - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM sched - -/* - * Tracepoint for calling kthread_stop, performed to end a kthread: - */ -TRACE_EVENT(sched_kthread_stop, - - TP_PROTO(struct task_struct *t), - - TP_ARGS(t), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - ), - - TP_fast_assign( - memcpy(__entry->comm, t->comm, TASK_COMM_LEN); - __entry->pid = t->pid; - ), - - TP_printk("task %s:%d", __entry->comm, __entry->pid) -); - -/* - * Tracepoint for the return value of the kthread stopping: - */ -TRACE_EVENT(sched_kthread_stop_ret, - - TP_PROTO(int ret), - - TP_ARGS(ret), - - TP_STRUCT__entry( - __field( int, ret ) - ), - - TP_fast_assign( - __entry->ret = ret; - ), - - TP_printk("ret %d", __entry->ret) -); - -/* - * Tracepoint for waiting on task to unschedule: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) - */ -TRACE_EVENT(sched_wait_task, - - TP_PROTO(struct rq *rq, struct task_struct *p), - - TP_ARGS(rq, p), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio) -); - -/* - * Tracepoint for waking up a task: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) - */ -TRACE_EVENT(sched_wakeup, - - TP_PROTO(struct rq *rq, struct task_struct *p, int success), - - TP_ARGS(rq, p, success), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - __field( int, success ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - __entry->success = success; - ), - - TP_printk("task %s:%d [%d] success=%d", - __entry->comm, __entry->pid, __entry->prio, - __entry->success) -); - -/* - * Tracepoint for waking up a new task: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) - */ -TRACE_EVENT(sched_wakeup_new, - - TP_PROTO(struct rq *rq, struct task_struct *p, int success), - - TP_ARGS(rq, p, success), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - __field( int, success ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - __entry->success = success; - ), - - TP_printk("task %s:%d [%d] success=%d", - __entry->comm, __entry->pid, __entry->prio, - __entry->success) -); - -/* - * Tracepoint for task switches, performed by the scheduler: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) - */ -TRACE_EVENT(sched_switch, - - TP_PROTO(struct rq *rq, struct task_struct *prev, - struct task_struct *next), - - TP_ARGS(rq, prev, next), - - TP_STRUCT__entry( - __array( char, prev_comm, TASK_COMM_LEN ) - __field( pid_t, prev_pid ) - __field( int, prev_prio ) - __array( char, next_comm, TASK_COMM_LEN ) - __field( pid_t, next_pid ) - __field( int, next_prio ) - ), - - TP_fast_assign( - memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); - __entry->prev_pid = prev->pid; - __entry->prev_prio = prev->prio; - memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); - __entry->next_pid = next->pid; - __entry->next_prio = next->prio; - ), - - TP_printk("task %s:%d [%d] ==> %s:%d [%d]", - __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, - __entry->next_comm, __entry->next_pid, __entry->next_prio) -); - -/* - * Tracepoint for a task being migrated: - */ -TRACE_EVENT(sched_migrate_task, - - TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu), - - TP_ARGS(p, orig_cpu, dest_cpu), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - __field( int, orig_cpu ) - __field( int, dest_cpu ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - __entry->orig_cpu = orig_cpu; - __entry->dest_cpu = dest_cpu; - ), - - TP_printk("task %s:%d [%d] from: %d to: %d", - __entry->comm, __entry->pid, __entry->prio, - __entry->orig_cpu, __entry->dest_cpu) -); - -/* - * Tracepoint for freeing a task: - */ -TRACE_EVENT(sched_process_free, - - TP_PROTO(struct task_struct *p), - - TP_ARGS(p), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio) -); - -/* - * Tracepoint for a task exiting: - */ -TRACE_EVENT(sched_process_exit, - - TP_PROTO(struct task_struct *p), - - TP_ARGS(p), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio) -); - -/* - * Tracepoint for a waiting task: - */ -TRACE_EVENT(sched_process_wait, - - TP_PROTO(struct pid *pid), - - TP_ARGS(pid), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - __entry->pid = pid_nr(pid); - __entry->prio = current->prio; - ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio) -); - -/* - * Tracepoint for do_fork: - */ -TRACE_EVENT(sched_process_fork, - - TP_PROTO(struct task_struct *parent, struct task_struct *child), - - TP_ARGS(parent, child), - - TP_STRUCT__entry( - __array( char, parent_comm, TASK_COMM_LEN ) - __field( pid_t, parent_pid ) - __array( char, child_comm, TASK_COMM_LEN ) - __field( pid_t, child_pid ) - ), - - TP_fast_assign( - memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN); - __entry->parent_pid = parent->pid; - memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN); - __entry->child_pid = child->pid; - ), - - TP_printk("parent %s:%d child %s:%d", - __entry->parent_comm, __entry->parent_pid, - __entry->child_comm, __entry->child_pid) -); - -/* - * Tracepoint for sending a signal: - */ -TRACE_EVENT(sched_signal_send, - - TP_PROTO(int sig, struct task_struct *p), - - TP_ARGS(sig, p), - - TP_STRUCT__entry( - __field( int, sig ) - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->sig = sig; - ), - - TP_printk("sig: %d task %s:%d", - __entry->sig, __entry->comm, __entry->pid) -); - -#endif /* _TRACE_SCHED_H */ - -/* This part must be outside protection */ -#include diff --git a/include/trace/skb.h b/include/trace/skb.h deleted file mode 100644 index 1e8fabb..0000000 --- a/include/trace/skb.h +++ /dev/null @@ -1,40 +0,0 @@ -#if !defined(_TRACE_SKB_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_SKB_H - -#include -#include - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM skb - -/* - * Tracepoint for free an sk_buff: - */ -TRACE_EVENT(kfree_skb, - - TP_PROTO(struct sk_buff *skb, void *location), - - TP_ARGS(skb, location), - - TP_STRUCT__entry( - __field( void *, skbaddr ) - __field( unsigned short, protocol ) - __field( void *, location ) - ), - - TP_fast_assign( - __entry->skbaddr = skb; - if (skb) { - __entry->protocol = ntohs(skb->protocol); - } - __entry->location = location; - ), - - TP_printk("skbaddr=%p protocol=%u location=%p", - __entry->skbaddr, __entry->protocol, __entry->location) -); - -#endif /* _TRACE_SKB_H */ - -/* This part must be outside protection */ -#include diff --git a/kernel/exit.c b/kernel/exit.c index 2fe9d2c..cab535c 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -48,7 +48,7 @@ #include #include #include -#include +#include #include #include diff --git a/kernel/fork.c b/kernel/fork.c index 4bebf26..085f73e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -61,7 +61,6 @@ #include #include #include -#include #include #include @@ -71,6 +70,8 @@ #include #include +#include + /* * Protected counters by write_lock_irq(&tasklist_lock) */ diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 983d8be..37c6363 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -20,7 +20,7 @@ #include #define CREATE_TRACE_POINTS -#include +#include #include "internals.h" diff --git a/kernel/kthread.c b/kernel/kthread.c index e1c7692..41c88fe 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #define KTHREAD_NICE_LEVEL (-5) diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 257f21a..47b201e 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -48,7 +48,7 @@ #include "lockdep_internals.h" #define CREATE_TRACE_POINTS -#include +#include #ifdef CONFIG_PROVE_LOCKING int prove_locking = 1; diff --git a/kernel/sched.c b/kernel/sched.c index e6d4518..9f7ffd0 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -79,7 +79,7 @@ #include "sched_cpupri.h" #define CREATE_TRACE_POINTS -#include +#include /* * Convert user-nice values [ -20 ... 0 ... 19 ] diff --git a/kernel/signal.c b/kernel/signal.c index 1d5703f..94ec0a4 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include diff --git a/kernel/softirq.c b/kernel/softirq.c index a2d9b45..7ab9dfd 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include /* diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8e6a0b5..a234889 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -29,7 +29,7 @@ #include #include -#include +#include #include diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 9d8cccd..a98106d 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include "trace.h" diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 5bc00e8f..b8b13c5 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include "trace.h" diff --git a/mm/util.c b/mm/util.c index 0e74a22..6794a33 100644 --- a/mm/util.c +++ b/mm/util.c @@ -7,7 +7,7 @@ #include #define CREATE_TRACE_POINTS -#include +#include /** * kstrdup - allocate space for and copy an existing string diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 9fd0dc3..b75b6ce 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -23,7 +23,7 @@ #include #include -#include +#include #include diff --git a/net/core/net-traces.c b/net/core/net-traces.c index 8017720..499a67e 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c @@ -24,6 +24,6 @@ #include #define CREATE_TRACE_POINTS -#include +#include EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ce6356c..12806b8 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -65,7 +65,7 @@ #include #include -#include +#include #include "kmap_skb.h" -- cgit v0.10.2 From 9cfe06f8cd5c8c3ad6ab323973e87dde670642b8 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 14 Apr 2009 21:37:03 -0400 Subject: tracing/events: add trace-events-sample This patch adds a sample to the samples directory on how to create and use TRACE_EVENT trace points. Signed-off-by: Steven Rostedt diff --git a/samples/Kconfig b/samples/Kconfig index 4b02f5a..93f41c0 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -19,6 +19,13 @@ config SAMPLE_TRACEPOINTS help This build tracepoints example modules. +config SAMPLE_TRACE_EVENTS + tristate "Build trace_events examples" + depends on EVENT_TRACING + default m + help + This build trace event example modules. + config SAMPLE_KOBJECT tristate "Build kobject examples" help diff --git a/samples/Makefile b/samples/Makefile index 10eaca8..13e4b47 100644 --- a/samples/Makefile +++ b/samples/Makefile @@ -1,3 +1,3 @@ # Makefile for Linux samples code -obj-$(CONFIG_SAMPLES) += markers/ kobject/ kprobes/ tracepoints/ +obj-$(CONFIG_SAMPLES) += markers/ kobject/ kprobes/ tracepoints/ trace_events/ diff --git a/samples/trace_events/Makefile b/samples/trace_events/Makefile new file mode 100644 index 0000000..06c6dea --- /dev/null +++ b/samples/trace_events/Makefile @@ -0,0 +1,8 @@ +# builds the trace events example kernel modules; +# then to use one (as root): insmod + +PWD := $(shell pwd) + +CFLAGS_trace-events-sample.o := -I$(PWD)/samples/trace_events/ + +obj-$(CONFIG_SAMPLE_TRACE_EVENTS) += trace-events-sample.o diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c new file mode 100644 index 0000000..f33b3ba --- /dev/null +++ b/samples/trace_events/trace-events-sample.c @@ -0,0 +1,56 @@ +#include +#include + +/* + * Any file that uses trace points, must include the header. + * But only one file, must include the header by defining + * CREATE_TRACE_POINTS first. This will make the C code that + * creates the handles for the trace points. + */ +#define CREATE_TRACE_POINTS +#include "trace-events-sample.h" + + +static void simple_thread_func(int cnt) +{ + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ); + trace_foo_bar("hello", cnt); + + if (!(cnt % 10)) + /* It is really important that I say "hi!" */ + printk(KERN_EMERG "hi!\n"); +} + +static int simple_thread(void *arg) +{ + int cnt = 0; + + while (!kthread_should_stop()) + simple_thread_func(cnt++); + + return 0; +} + +static struct task_struct *simple_tsk; + +static int __init trace_event_init(void) +{ + simple_tsk = kthread_run(simple_thread, NULL, "event-sample"); + if (IS_ERR(simple_tsk)) + return -1; + + return 0; +} + +static void __exit trace_event_exit(void) +{ + kthread_stop(simple_tsk); +} + +module_init(trace_event_init); +module_exit(trace_event_exit); + +MODULE_AUTHOR("Steven Rostedt"); +MODULE_DESCRIPTION("trace-events-sample"); +MODULE_LICENSE("GPL"); diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h new file mode 100644 index 0000000..eab4644 --- /dev/null +++ b/samples/trace_events/trace-events-sample.h @@ -0,0 +1,124 @@ +/* + * Notice that this file is not protected like a normal header. + * We also must allow for rereading of this file. The + * + * || defined(TRACE_HEADER_MULTI_READ) + * + * serves this purpose. + */ +#if !defined(_TRACE_EVENT_SAMPLE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_EVENT_SAMPLE_H + +/* + * All trace headers should include tracepoint.h, until we finally + * make it into a standard header. + */ +#include + +/* + * If TRACE_SYSTEM is defined, that will be the directory created + * in the ftrace directory under /debugfs/tracing/events/ + * + * The define_trace.h belowe will also look for a file name of + * TRACE_SYSTEM.h where TRACE_SYSTEM is what is defined here. + * + * If you want a different system than file name, you can override + * the header name by defining TRACE_INCLUDE_FILE + * + * If this file was called, goofy.h, then we would define: + * + * #define TRACE_INCLUDE_FILE goofy + * + */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM trace-events-sample + +/* + * The TRACE_EVENT macro is broken up into 5 parts. + * + * name: name of the trace point. This is also how to enable the tracepoint. + * A function called trace_foo_bar() will be created. + * + * proto: the prototype of the function trace_foo_bar() + * Here it is trace_foo_bar(char *foo, int bar). + * + * args: must match the arguments in the prototype. + * Here it is simply "foo, bar". + * + * struct: This defines the way the data will be stored in the ring buffer. + * There are currently two types of elements. __field and __array. + * a __field is broken up into (type, name). Where type can be any + * type but an array. + * For an array. there are three fields. (type, name, size). The + * type of elements in the array, the name of the field and the size + * of the array. + * + * __array( char, foo, 10) is the same as saying char foo[10]. + * + * fast_assign: This is a C like function that is used to store the items + * into the ring buffer. + * + * printk: This is a way to print out the data in pretty print. This is + * useful if the system crashes and you are logging via a serial line, + * the data can be printed to the console using this "printk" method. + * + * Note, that for both the assign and the printk, __entry is the handler + * to the data structure in the ring buffer, and is defined by the + * TP_STRUCT__entry. + */ +TRACE_EVENT(foo_bar, + + TP_PROTO(char *foo, int bar), + + TP_ARGS(foo, bar), + + TP_STRUCT__entry( + __array( char, foo, 10 ) + __field( int, bar ) + ), + + TP_fast_assign( + strncpy(__entry->foo, foo, 10); + __entry->bar = bar; + ), + + TP_printk("foo %s %d", __entry->foo, __entry->bar) +); +#endif + +/***** NOTICE! The #if protection ends here. *****/ + + +/* + * There are several ways I could have done this. If I left out the + * TRACE_INCLUDE_PATH, then it would default to the kernel source + * include/trace/events directory. + * + * I could specify a path from the define_trace.h file back to this + * file. + * + * #define TRACE_INCLUDE_PATH ../../samples/trace_events + * + * But I chose to simply make it use the current directory and then in + * the Makefile I added: + * + * CFLAGS_trace-events-sample.o := -I$(PWD)/samples/trace_events/ + * + * This will make sure the current path is part of the include + * structure for our file so that we can find it. + * + * I could have made only the top level directory the include: + * + * CFLAGS_trace-events-sample.o := -I$(PWD) + * + * And then let the path to this directory be the TRACE_INCLUDE_PATH: + * + * #define TRACE_INCLUDE_PATH samples/trace_events + * + * But then if something defines "samples" or "trace_events" then we + * could risk that being converted too, and give us an unexpected + * result. + */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#include -- cgit v0.10.2 From 05725f7eb4b8acb147c5fc7b91397b1f6bcab00d Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 14 Apr 2009 20:17:16 +0200 Subject: rculist: use list_entry_rcu in places where it's appropriate Use previously introduced list_entry_rcu instead of an open-coded list_entry + rcu_dereference combination. Signed-off-by: Jiri Pirko Reviewed-by: Paul E. McKenney Cc: dipankar@in.ibm.com LKML-Reference: <20090414181715.GA3634@psychotron.englab.brq.redhat.com> Signed-off-by: Ingo Molnar diff --git a/include/linux/sched.h b/include/linux/sched.h index b4c38bc..886df41 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -77,6 +77,7 @@ struct sched_param { #include #include #include +#include #include #include @@ -2010,7 +2011,8 @@ static inline unsigned long wait_task_inactive(struct task_struct *p, } #endif -#define next_task(p) list_entry(rcu_dereference((p)->tasks.next), struct task_struct, tasks) +#define next_task(p) \ + list_entry_rcu((p)->tasks.next, struct task_struct, tasks) #define for_each_process(p) \ for (p = &init_task ; (p = next_task(p)) != &init_task ; ) @@ -2049,8 +2051,8 @@ int same_thread_group(struct task_struct *p1, struct task_struct *p2) static inline struct task_struct *next_thread(const struct task_struct *p) { - return list_entry(rcu_dereference(p->thread_group.next), - struct task_struct, thread_group); + return list_entry_rcu(p->thread_group.next, + struct task_struct, thread_group); } static inline int thread_group_empty(struct task_struct *p) diff --git a/ipc/sem.c b/ipc/sem.c index 16a2189..87c2b64 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -1290,8 +1290,8 @@ void exit_sem(struct task_struct *tsk) int i; rcu_read_lock(); - un = list_entry(rcu_dereference(ulp->list_proc.next), - struct sem_undo, list_proc); + un = list_entry_rcu(ulp->list_proc.next, + struct sem_undo, list_proc); if (&un->list_proc == &ulp->list_proc) semid = -1; else diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c index ffbe259..510186f 100644 --- a/security/integrity/ima/ima_fs.c +++ b/security/integrity/ima/ima_fs.c @@ -84,8 +84,8 @@ static void *ima_measurements_next(struct seq_file *m, void *v, loff_t *pos) * against concurrent list-extension */ rcu_read_lock(); - qe = list_entry(rcu_dereference(qe->later.next), - struct ima_queue_entry, later); + qe = list_entry_rcu(qe->later.next, + struct ima_queue_entry, later); rcu_read_unlock(); (*pos)++; diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c index e03a7e1..11d2cb1 100644 --- a/security/smack/smackfs.c +++ b/security/smack/smackfs.c @@ -734,8 +734,8 @@ static void smk_netlbladdr_insert(struct smk_netlbladdr *new) return; } - m = list_entry(rcu_dereference(smk_netlbladdr_list.next), - struct smk_netlbladdr, list); + m = list_entry_rcu(smk_netlbladdr_list.next, + struct smk_netlbladdr, list); /* the comparison '>' is a bit hacky, but works */ if (new->smk_mask.s_addr > m->smk_mask.s_addr) { @@ -748,8 +748,8 @@ static void smk_netlbladdr_insert(struct smk_netlbladdr *new) list_add_rcu(&new->list, &m->list); return; } - m_next = list_entry(rcu_dereference(m->list.next), - struct smk_netlbladdr, list); + m_next = list_entry_rcu(m->list.next, + struct smk_netlbladdr, list); if (new->smk_mask.s_addr > m_next->smk_mask.s_addr) { list_add_rcu(&new->list, &m->list); return; -- cgit v0.10.2 From b206525ad1f653b7da35f5827be93770d28eae11 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Tue, 14 Apr 2009 23:04:37 +0530 Subject: x86: k8 convert node_to_k8_nb_misc() from a macro to an inline function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Converting node_to_k8_nb_misc() from a macro to an inline function makes compiler see the 'node' parameter in the !CONFIG_K8_NB too, which eliminates these compiler warnings: arch/x86/kernel/cpu/intel_cacheinfo.c: In function ‘show_cache_disable’: arch/x86/kernel/cpu/intel_cacheinfo.c:712: warning: unused variable ‘node’ arch/x86/kernel/cpu/intel_cacheinfo.c: In function ‘store_cache_disable’: arch/x86/kernel/cpu/intel_cacheinfo.c:739: warning: unused variable ‘node’ Signed-off-by: Jaswinder Singh Rajput Cc: Andreas Herrmann Cc: Mark Langsdorf LKML-Reference: <1239730477.2966.26.camel@ht.satnam> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/k8.h b/arch/x86/include/asm/k8.h index c23b3d1..c2d1f3b 100644 --- a/arch/x86/include/asm/k8.h +++ b/arch/x86/include/asm/k8.h @@ -13,10 +13,15 @@ extern void k8_flush_garts(void); extern int k8_scan_nodes(unsigned long start, unsigned long end); #ifdef CONFIG_K8_NB -#define node_to_k8_nb_misc(node) \ - (node < num_k8_northbridges) ? k8_northbridges[node] : NULL +static inline struct pci_dev *node_to_k8_nb_misc(int node) +{ + return (node < num_k8_northbridges) ? k8_northbridges[node] : NULL; +} #else -#define node_to_k8_nb_misc(node) NULL +static inline struct pci_dev *node_to_k8_nb_misc(int node) +{ + return NULL; +} #endif -- cgit v0.10.2 From e6a1a89d572c31b62d6dcf11a371c7323852d9b2 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 15 Apr 2009 18:22:41 +0900 Subject: dma-debug: add dma_debug_resize_entries() to adjust the number of dma_debug_entries We use a static value for the number of dma_debug_entries. It can be overwritten by a kernel command line option. Some IOMMUs (e.g. GART) can't set an appropriate value by a kernel command line option because they can't know such value until they finish initializing up their hardware. This patch adds dma_debug_resize_entries() enables IOMMUs to adjust the number of dma_debug_entries anytime. Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Cc: fujita.tomonori@lab.ntt.co.jp Cc: akpm@linux-foundation.org LKML-Reference: <20090415182234R.fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h index 28d53cb..171ad8a 100644 --- a/include/linux/dma-debug.h +++ b/include/linux/dma-debug.h @@ -32,6 +32,8 @@ extern void dma_debug_add_bus(struct bus_type *bus); extern void dma_debug_init(u32 num_entries); +extern int dma_debug_resize_entries(u32 num_entries); + extern void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, size_t size, int direction, dma_addr_t dma_addr, @@ -91,6 +93,11 @@ static inline void dma_debug_init(u32 num_entries) { } +static inline int dma_debug_resize_entries(u32 num_entries) +{ + return 0; +} + static inline void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, size_t size, int direction, dma_addr_t dma_addr, diff --git a/lib/dma-debug.c b/lib/dma-debug.c index d3da7ed..5d61019 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -85,6 +85,7 @@ static u32 show_num_errors = 1; static u32 num_free_entries; static u32 min_free_entries; +static u32 nr_total_entries; /* number of preallocated entries requested by kernel cmdline */ static u32 req_entries; @@ -257,6 +258,21 @@ static void add_dma_entry(struct dma_debug_entry *entry) put_hash_bucket(bucket, &flags); } +static struct dma_debug_entry *__dma_entry_alloc(void) +{ + struct dma_debug_entry *entry; + + entry = list_entry(free_entries.next, struct dma_debug_entry, list); + list_del(&entry->list); + memset(entry, 0, sizeof(*entry)); + + num_free_entries -= 1; + if (num_free_entries < min_free_entries) + min_free_entries = num_free_entries; + + return entry; +} + /* struct dma_entry allocator * * The next two functions implement the allocator for @@ -276,9 +292,7 @@ static struct dma_debug_entry *dma_entry_alloc(void) goto out; } - entry = list_entry(free_entries.next, struct dma_debug_entry, list); - list_del(&entry->list); - memset(entry, 0, sizeof(*entry)); + entry = __dma_entry_alloc(); #ifdef CONFIG_STACKTRACE entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES; @@ -286,9 +300,6 @@ static struct dma_debug_entry *dma_entry_alloc(void) entry->stacktrace.skip = 2; save_stack_trace(&entry->stacktrace); #endif - num_free_entries -= 1; - if (num_free_entries < min_free_entries) - min_free_entries = num_free_entries; out: spin_unlock_irqrestore(&free_entries_lock, flags); @@ -310,6 +321,53 @@ static void dma_entry_free(struct dma_debug_entry *entry) spin_unlock_irqrestore(&free_entries_lock, flags); } +int dma_debug_resize_entries(u32 num_entries) +{ + int i, delta, ret = 0; + unsigned long flags; + struct dma_debug_entry *entry; + LIST_HEAD(tmp); + + spin_lock_irqsave(&free_entries_lock, flags); + + if (nr_total_entries < num_entries) { + delta = num_entries - nr_total_entries; + + spin_unlock_irqrestore(&free_entries_lock, flags); + + for (i = 0; i < delta; i++) { + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + break; + + list_add_tail(&entry->list, &tmp); + } + + spin_lock_irqsave(&free_entries_lock, flags); + + list_splice(&tmp, &free_entries); + nr_total_entries += i; + num_free_entries += i; + } else { + delta = nr_total_entries - num_entries; + + for (i = 0; i < delta && !list_empty(&free_entries); i++) { + entry = __dma_entry_alloc(); + kfree(entry); + } + + nr_total_entries -= i; + } + + if (nr_total_entries != num_entries) + ret = 1; + + spin_unlock_irqrestore(&free_entries_lock, flags); + + return ret; +} +EXPORT_SYMBOL(dma_debug_resize_entries); + /* * DMA-API debugging init code * @@ -490,6 +548,8 @@ void dma_debug_init(u32 num_entries) return; } + nr_total_entries = num_free_entries; + printk(KERN_INFO "DMA-API: debugging enabled by kernel config\n"); } -- cgit v0.10.2 From 19c1a6f5764d787113fa323ffb18be7991208f82 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 14 Apr 2009 09:43:19 +0900 Subject: x86 gart: reimplement IOMMU_LEAK feature by using DMA_API_DEBUG IOMMU_LEAK, GART's own feature, dumps the used IOMMU entries when IOMMU entries is full, which might be useful to find a bad driver that eats IOMMU entries. DMA_API_DEBUG provides the similar feature, debug_dma_dump_mappings, and it's better than GART's IOMMU_LEAK feature. GART's IOMMU_LEAK feature doesn't say who uses IOMMU entries so it's hard to find a bad driver. This patch reimplements the GART's IOMMU_LEAK feature by using DMA_API_DEBUG. Signed-off-by: FUJITA Tomonori Acked-by: Joerg Roedel Cc: Andrew Morton LKML-Reference: <1239669799-23579-2-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index d8359e7..5865712 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -161,8 +161,7 @@ config IOMMU_DEBUG config IOMMU_LEAK bool "IOMMU leak tracing" - depends on DEBUG_KERNEL - depends on IOMMU_DEBUG + depends on IOMMU_DEBUG && DMA_API_DEBUG ---help--- Add a simple leak tracer to the IOMMU code. This is useful when you are debugging a buggy device driver that leaks IOMMU mappings. diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index b284b58..1e8920d 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -144,48 +144,21 @@ static void flush_gart(void) } #ifdef CONFIG_IOMMU_LEAK - -#define SET_LEAK(x) \ - do { \ - if (iommu_leak_tab) \ - iommu_leak_tab[x] = __builtin_return_address(0);\ - } while (0) - -#define CLEAR_LEAK(x) \ - do { \ - if (iommu_leak_tab) \ - iommu_leak_tab[x] = NULL; \ - } while (0) - /* Debugging aid for drivers that don't free their IOMMU tables */ -static void **iommu_leak_tab; static int leak_trace; static int iommu_leak_pages = 20; static void dump_leak(void) { - int i; static int dump; - if (dump || !iommu_leak_tab) + if (dump) return; dump = 1; - show_stack(NULL, NULL); - /* Very crude. dump some from the end of the table too */ - printk(KERN_DEBUG "Dumping %d pages from end of IOMMU:\n", - iommu_leak_pages); - for (i = 0; i < iommu_leak_pages; i += 2) { - printk(KERN_DEBUG "%lu: ", iommu_pages-i); - printk_address((unsigned long) iommu_leak_tab[iommu_pages-i], - 0); - printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' '); - } - printk(KERN_DEBUG "\n"); + show_stack(NULL, NULL); + debug_dma_dump_mappings(NULL); } -#else -# define SET_LEAK(x) -# define CLEAR_LEAK(x) #endif static void iommu_full(struct device *dev, size_t size, int dir) @@ -248,7 +221,6 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, for (i = 0; i < npages; i++) { iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem); - SET_LEAK(iommu_page + i); phys_mem += PAGE_SIZE; } return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); @@ -294,7 +266,6 @@ static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr, npages = iommu_num_pages(dma_addr, size, PAGE_SIZE); for (i = 0; i < npages; i++) { iommu_gatt_base[iommu_page + i] = gart_unmapped_entry; - CLEAR_LEAK(iommu_page + i); } free_iommu(iommu_page, npages); } @@ -377,7 +348,6 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start, pages = iommu_num_pages(s->offset, s->length, PAGE_SIZE); while (pages--) { iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); - SET_LEAK(iommu_page); addr += PAGE_SIZE; iommu_page++; } @@ -801,11 +771,12 @@ void __init gart_iommu_init(void) #ifdef CONFIG_IOMMU_LEAK if (leak_trace) { - iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, - get_order(iommu_pages*sizeof(void *))); - if (!iommu_leak_tab) + int ret; + + ret = dma_debug_resize_entries(iommu_pages); + if (ret) printk(KERN_DEBUG - "PCI-DMA: Cannot allocate leak trace area\n"); + "PCI-DMA: Cannot trace all the entries\n"); } #endif -- cgit v0.10.2 From 13318a7186d8e0ae08c996ea4111a945e7789772 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 15 Apr 2009 09:59:10 +0800 Subject: sched: use group_first_cpu() instead of cpumask_first(sched_group_cpus()) Impact: cleanup This patch changes cpumask_first(sched_group_cpus()) to group_first_cpu() for maintainability. Signed-off-by: Miao Xie Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar diff --git a/kernel/sched.c b/kernel/sched.c index 92b4b56..7601cee 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7995,7 +7995,7 @@ static void init_numa_sched_groups_power(struct sched_group *group_head) struct sched_domain *sd; sd = &per_cpu(phys_domains, j).sd; - if (j != cpumask_first(sched_group_cpus(sd->groups))) { + if (j != group_first_cpu(sd->groups)) { /* * Only add "power" once for each * physical package. @@ -8073,7 +8073,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) WARN_ON(!sd || !sd->groups); - if (cpu != cpumask_first(sched_group_cpus(sd->groups))) + if (cpu != group_first_cpu(sd->groups)) return; child = sd->child; -- cgit v0.10.2 From abe5fa7899fb5809ddc7336d8dd0edd5b2b96665 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 15 Apr 2009 20:45:03 +0800 Subject: crypto: eseqiv - Fix IV generation for sync algorithms If crypto_ablkcipher_encrypt() returns synchronous, eseqiv_complete2() is called even if req->giv is already the pointer to the generated IV. The generated IV is overwritten with some random data in this case. This patch fixes this by calling eseqiv_complete2() just if the generated IV has to be copied to req->giv. Signed-off-by: Steffen Klassert Signed-off-by: Herbert Xu diff --git a/crypto/eseqiv.c b/crypto/eseqiv.c index 2a342c8..3ca3b66 100644 --- a/crypto/eseqiv.c +++ b/crypto/eseqiv.c @@ -153,7 +153,8 @@ static int eseqiv_givencrypt(struct skcipher_givcrypt_request *req) if (err) goto out; - eseqiv_complete2(req); + if (giv != req->giv) + eseqiv_complete2(req); out: return err; -- cgit v0.10.2 From 77857dc07247ed5fa700a197c96ef842d8dbebdf Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 15 Apr 2009 11:57:01 -0700 Subject: x86: use used_vectors in init_IRQ() Impact: fix crash with many devices I found this crash: [ 552.616646] general protection fault: 0403 [#1] SMP [ 552.620013] last sysfs file: /sys/devices/pci0000:00/0000:00:02.0/usb1/1-1/1-1:1.0/host13/target13:0:0/13:0:0:0/block/sr0/size [ 552.620013] CPU 0 [ 552.620013] Modules linked in: [ 552.620013] Pid: 0, comm: swapper Not tainted 2.6.30-rc1-tip-01931-g8fcafd8-dirty #28 Sun Fire X4440 [ 552.620013] RIP: 0010:[] [] default_idle+0x7d/0xda [ 552.620013] RSP: 0018:ffffffff81345e68 EFLAGS: 00010246 [ 552.620013] RAX: 0000000000000000 RBX: ffffffff8133d870 RCX: ffffc20000000000 [ 552.620013] RDX: 00000000001d0620 RSI: ffffffff8023bad8 RDI: ffffffff802a3169 [ 552.620013] RBP: ffffffff81345e98 R08: 0000000000000000 R09: ffffffff812244a0 [ 552.620013] R10: ffffffff81345dc8 R11: 7ebe1b6fa0bcac50 R12: 4ec4ec4ec4ec4ec5 [ 552.620013] R13: ffffffff813a54d0 R14: ffffffff813a7a40 R15: 0000000000000000 [ 552.620013] FS: 00000000006d1880(0000) GS:ffffc20000000000(0000) knlGS:0000000000000000 [ 552.620013] CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b [ 552.620013] CR2: 00007fec9d936a50 CR3: 000000007d1a9000 CR4: 00000000000006e0 [ 552.620013] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 552.620013] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 552.620013] Process swapper (pid: 0, threadinfo ffffffff81344000,task ffffffff812244a0) [ 552.620013] Stack: [ 552.620013] 0000000000000000 ffffc20000000000 00000000001d0620 7ebe1b6fa0bcac50 [ 552.620013] ffffffff8133d870 4ec4ec4ec4ec4ec5 ffffffff81345ec8 ffffffff8023bd84 [ 552.620013] 4ec4ec4ec4ec4ec5 ffffffff813a54d0 7ebe1b6fa0bcac50 ffffffff8133d870 [ 552.620013] Call Trace: [ 552.620013] [] c1e_idle+0x109/0x124 [ 552.620013] [] cpu_idle+0xb8/0x101 [ 552.620013] [] rest_init+0x7e/0x94 [ 552.620013] [] start_kernel+0x3dc/0x3fd [ 552.620013] [] x86_64_start_reservations+0xb9/0xd4 [ 552.620013] [] x86_64_start_kernel+0xee/0x109 [ 552.620013] Code: 48 8b 04 25 f8 b4 00 00 83 a0 3c e0 ff ff fb 0f ae f0 65 48 8b 04 25 f8 b4 00 00 f6 80 38 e0 ff ff 08 75 09 e8 71 76 06 00 fb f4 06 e8 68 76 06 00 fb 65 48 8b 04 25 f8 b4 00 00 83 88 3c e0 [ 552.620013] RIP [] default_idle+0x7d/0xda [ 552.620013] RSP [ 552.828646] ---[ end trace 4cbfc5c01382af7f ]--- Joerg Roedel said "The 0403 error code means that there was an external interrupt with vector 0x80. Yinghai, my theory is that the kernel on this machine has no 32bit emulation compiled in, right? In this case the selector points to a zero entry which may cause the #gpf right after the hlt. But I have no idea where the external int 0x80 comes from" it turns out that we could use 0x80 for external device on 64-bit when 32-bit emulation is disabled. But we forgot to set the gate for it. try to set gate for it by checking used_vectors. Also move apic_intr_init() early to avoid setting that gate two times. Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: Joerg Roedel LKML-Reference: <49E62DFD.6010904@kernel.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index b424c32..2e08b10 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -240,19 +240,19 @@ void __init native_init_IRQ(void) /* Execute any quirks before the call gates are initialised: */ x86_quirk_pre_intr_init(); + apic_intr_init(); + /* * Cover the whole vector space, no vector can escape * us. (some of these will be overridden and become * 'special' SMP interrupts) */ for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { - /* IA32_SYSCALL_VECTOR was reserved in trap_init. */ - if (i != IA32_SYSCALL_VECTOR) + /* IA32_SYSCALL_VECTOR could be used in trap_init already. */ + if (!test_bit(i, used_vectors)) set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); } - apic_intr_init(); - if (!acpi_ioapic) setup_irq(2, &irq2); -- cgit v0.10.2 From ff7b1b4f000cea84f071c1b6aa2918b2119d66f1 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 15 Apr 2009 16:55:05 +0100 Subject: perfcounters: export perf_tpcounter_event Needed for modular tracepoint support. Signed-off-by: Steven Whitehouse Signed-off-by: Ingo Molnar diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 7f9521c..0939609 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -2590,6 +2590,7 @@ void perf_tpcounter_event(int event_id) __perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, regs, 0); } +EXPORT_SYMBOL_GPL(perf_tpcounter_event); extern int ftrace_profile_enable(int); extern void ftrace_profile_disable(int); -- cgit v0.10.2 From 0207a2efb43d81e29e23662b5d035945688a103f Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 16 Apr 2009 14:40:56 +0900 Subject: sh: Add support for SH7724 (SH-Mobile R2R) CPU subtype. This implements initial support for the SH-Mobile R2R CPU. Based on Rev 0.11 of the initial SH7724 hardware manual. Signed-off-by: Kuninori Morimoto Signed-off-by: Paul Mundt diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index e7390dd..505d1ac 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -347,6 +347,15 @@ config CPU_SUBTYPE_SH7723 help Select SH7723 if you have an SH-MobileR2 CPU. +config CPU_SUBTYPE_SH7724 + bool "Support SH7724 processor" + select CPU_SH4A + select CPU_SHX2 + select ARCH_SPARSEMEM_ENABLE + select SYS_SUPPORTS_CMT + help + Select SH7724 if you have an SH-MobileR2R CPU. + config CPU_SUBTYPE_SH7763 bool "Support SH7763 processor" select CPU_SH4A @@ -495,6 +504,7 @@ config SH_PCLK_FREQ CPU_SUBTYPE_SH7203 || CPU_SUBTYPE_SH7206 || \ CPU_SUBTYPE_SH7263 || CPU_SUBTYPE_MXG || \ CPU_SUBTYPE_SH7786 + default "41666666" if CPU_SUBTYPE_SH7724 default "60000000" if CPU_SUBTYPE_SH7751 || CPU_SUBTYPE_SH7751R default "66000000" if CPU_SUBTYPE_SH4_202 default "50000000" diff --git a/arch/sh/include/asm/processor.h b/arch/sh/include/asm/processor.h index 1fd58b4..005c962 100644 --- a/arch/sh/include/asm/processor.h +++ b/arch/sh/include/asm/processor.h @@ -32,7 +32,7 @@ enum cpu_type { /* SH-4A types */ CPU_SH7763, CPU_SH7770, CPU_SH7780, CPU_SH7781, CPU_SH7785, CPU_SH7786, - CPU_SH7723, CPU_SHX3, + CPU_SH7723, CPU_SH7724, CPU_SHX3, /* SH4AL-DSP types */ CPU_SH7343, CPU_SH7722, CPU_SH7366, diff --git a/arch/sh/include/cpu-sh4/cpu/freq.h b/arch/sh/include/cpu-sh4/cpu/freq.h index 749d1c4..ccf1d99 100644 --- a/arch/sh/include/cpu-sh4/cpu/freq.h +++ b/arch/sh/include/cpu-sh4/cpu/freq.h @@ -25,6 +25,24 @@ #elif defined(CONFIG_CPU_SUBTYPE_SH7763) || \ defined(CONFIG_CPU_SUBTYPE_SH7780) #define FRQCR 0xffc80000 +#elif defined(CONFIG_CPU_SUBTYPE_SH7724) +#define FRQCRA 0xa4150000 +#define FRQCRB 0xa4150004 +#define VCLKCR 0xa4150048 + +#define FCLKACR 0xa4150008 +#define FCLKBCR 0xa415000c +#define FRQCR FRQCRA +#define SCLKACR FCLKACR +#define SCLKBCR FCLKBCR +#define FCLKACR 0xa4150008 +#define FCLKBCR 0xa415000c +#define IrDACLKCR 0xa4150018 + +#define MSTPCR0 0xa4150030 +#define MSTPCR1 0xa4150034 +#define MSTPCR2 0xa4150038 + #elif defined(CONFIG_CPU_SUBTYPE_SH7785) #define FRQCR0 0xffc80000 #define FRQCR1 0xffc80004 diff --git a/arch/sh/include/cpu-sh4/cpu/sh7724.h b/arch/sh/include/cpu-sh4/cpu/sh7724.h new file mode 100644 index 0000000..34605c9 --- /dev/null +++ b/arch/sh/include/cpu-sh4/cpu/sh7724.h @@ -0,0 +1,255 @@ +#ifndef __ASM_SH7724_H__ +#define __ASM_SH7724_H__ + +enum { + /* PTA */ + GPIO_PTA7, GPIO_PTA6, GPIO_PTA5, GPIO_PTA4, + GPIO_PTA3, GPIO_PTA2, GPIO_PTA1, GPIO_PTA0, + + /* PTB */ + GPIO_PTB7, GPIO_PTB6, GPIO_PTB5, GPIO_PTB4, + GPIO_PTB3, GPIO_PTB2, GPIO_PTB1, GPIO_PTB0, + + /* PTC */ + GPIO_PTC7, GPIO_PTC6, GPIO_PTC5, GPIO_PTC4, + GPIO_PTC3, GPIO_PTC2, GPIO_PTC1, GPIO_PTC0, + + /* PTD */ + GPIO_PTD7, GPIO_PTD6, GPIO_PTD5, GPIO_PTD4, + GPIO_PTD3, GPIO_PTD2, GPIO_PTD1, GPIO_PTD0, + + /* PTE */ + GPIO_PTE7, GPIO_PTE6, GPIO_PTE5, GPIO_PTE4, + GPIO_PTE3, GPIO_PTE2, GPIO_PTE1, GPIO_PTE0, + + /* PTF */ + GPIO_PTF7, GPIO_PTF6, GPIO_PTF5, GPIO_PTF4, + GPIO_PTF3, GPIO_PTF2, GPIO_PTF1, GPIO_PTF0, + + /* PTG */ + GPIO_PTG5, GPIO_PTG4, + GPIO_PTG3, GPIO_PTG2, GPIO_PTG1, GPIO_PTG0, + + /* PTH */ + GPIO_PTH7, GPIO_PTH6, GPIO_PTH5, GPIO_PTH4, + GPIO_PTH3, GPIO_PTH2, GPIO_PTH1, GPIO_PTH0, + + /* PTJ */ + GPIO_PTJ7, GPIO_PTJ6, GPIO_PTJ5, + GPIO_PTJ3, GPIO_PTJ2, GPIO_PTJ1, GPIO_PTJ0, + + /* PTK */ + GPIO_PTK7, GPIO_PTK6, GPIO_PTK5, GPIO_PTK4, + GPIO_PTK3, GPIO_PTK2, GPIO_PTK1, GPIO_PTK0, + + /* PTL */ + GPIO_PTL7, GPIO_PTL6, GPIO_PTL5, GPIO_PTL4, + GPIO_PTL3, GPIO_PTL2, GPIO_PTL1, GPIO_PTL0, + + /* PTM */ + GPIO_PTM7, GPIO_PTM6, GPIO_PTM5, GPIO_PTM4, + GPIO_PTM3, GPIO_PTM2, GPIO_PTM1, GPIO_PTM0, + + /* PTN */ + GPIO_PTN7, GPIO_PTN6, GPIO_PTN5, GPIO_PTN4, + GPIO_PTN3, GPIO_PTN2, GPIO_PTN1, GPIO_PTN0, + + /* PTQ */ + GPIO_PTQ7, GPIO_PTQ6, GPIO_PTQ5, GPIO_PTQ4, + GPIO_PTQ3, GPIO_PTQ2, GPIO_PTQ1, GPIO_PTQ0, + + /* PTR */ + GPIO_PTR7, GPIO_PTR6, GPIO_PTR5, GPIO_PTR4, + GPIO_PTR3, GPIO_PTR2, GPIO_PTR1, GPIO_PTR0, + + /* PTS */ + GPIO_PTS6, GPIO_PTS5, GPIO_PTS4, + GPIO_PTS3, GPIO_PTS2, GPIO_PTS1, GPIO_PTS0, + + /* PTT */ + GPIO_PTT7, GPIO_PTT6, GPIO_PTT5, GPIO_PTT4, + GPIO_PTT3, GPIO_PTT2, GPIO_PTT1, GPIO_PTT0, + + /* PTU */ + GPIO_PTU7, GPIO_PTU6, GPIO_PTU5, GPIO_PTU4, + GPIO_PTU3, GPIO_PTU2, GPIO_PTU1, GPIO_PTU0, + + /* PTV */ + GPIO_PTV7, GPIO_PTV6, GPIO_PTV5, GPIO_PTV4, + GPIO_PTV3, GPIO_PTV2, GPIO_PTV1, GPIO_PTV0, + + /* PTW */ + GPIO_PTW7, GPIO_PTW6, GPIO_PTW5, GPIO_PTW4, + GPIO_PTW3, GPIO_PTW2, GPIO_PTW1, GPIO_PTW0, + + /* PTX */ + GPIO_PTX7, GPIO_PTX6, GPIO_PTX5, GPIO_PTX4, + GPIO_PTX3, GPIO_PTX2, GPIO_PTX1, GPIO_PTX0, + + /* PTY */ + GPIO_PTY7, GPIO_PTY6, GPIO_PTY5, GPIO_PTY4, + GPIO_PTY3, GPIO_PTY2, GPIO_PTY1, GPIO_PTY0, + + /* PTZ */ + GPIO_PTZ7, GPIO_PTZ6, GPIO_PTZ5, GPIO_PTZ4, + GPIO_PTZ3, GPIO_PTZ2, GPIO_PTZ1, GPIO_PTZ0, + + /* BSC (PTA/PTB/PTJ/PTQ/PTR/PTT) */ + GPIO_FN_D31, GPIO_FN_D30, GPIO_FN_D29, GPIO_FN_D28, + GPIO_FN_D27, GPIO_FN_D26, GPIO_FN_D25, GPIO_FN_D24, + GPIO_FN_D23, GPIO_FN_D22, GPIO_FN_D21, GPIO_FN_D20, + GPIO_FN_D19, GPIO_FN_D18, GPIO_FN_D17, GPIO_FN_D16, + GPIO_FN_D15, GPIO_FN_D14, GPIO_FN_D13, GPIO_FN_D12, + GPIO_FN_D11, GPIO_FN_D10, GPIO_FN_D9, GPIO_FN_D8, + GPIO_FN_D7, GPIO_FN_D6, GPIO_FN_D5, GPIO_FN_D4, + GPIO_FN_D3, GPIO_FN_D2, GPIO_FN_D1, GPIO_FN_D0, + GPIO_FN_A25, GPIO_FN_A24, GPIO_FN_A23, GPIO_FN_A22, + GPIO_FN_CS6B_CE1B, GPIO_FN_CS6A_CE2B, + GPIO_FN_CS5B_CE1A, GPIO_FN_CS5A_CE2A, + GPIO_FN_WE3_ICIOWR, GPIO_FN_WE2_ICIORD, + GPIO_FN_IOIS16, GPIO_FN_WAIT, + GPIO_FN_BS, + + /* KEYSC (PTA/PTB)*/ + GPIO_FN_KEYOUT5_IN5, GPIO_FN_KEYOUT4_IN6, GPIO_FN_KEYIN4, + GPIO_FN_KEYIN3, GPIO_FN_KEYIN2, GPIO_FN_KEYIN1, GPIO_FN_KEYIN0, + GPIO_FN_KEYOUT3, GPIO_FN_KEYOUT2, GPIO_FN_KEYOUT1, GPIO_FN_KEYOUT0, + + /* ATAPI (PTA/PTB/PTK/PTR/PTS/PTW) */ + GPIO_FN_IDED15, GPIO_FN_IDED14, GPIO_FN_IDED13, GPIO_FN_IDED12, + GPIO_FN_IDED11, GPIO_FN_IDED10, GPIO_FN_IDED9, GPIO_FN_IDED8, + GPIO_FN_IDED7, GPIO_FN_IDED6, GPIO_FN_IDED5, GPIO_FN_IDED4, + GPIO_FN_IDED3, GPIO_FN_IDED2, GPIO_FN_IDED1, GPIO_FN_IDED0, + GPIO_FN_IDEA2, GPIO_FN_IDEA1, GPIO_FN_IDEA0, GPIO_FN_IDEIOWR, + GPIO_FN_IODREQ, GPIO_FN_IDECS0, GPIO_FN_IDECS1, GPIO_FN_IDEIORD, + GPIO_FN_DIRECTION, GPIO_FN_EXBUF_ENB, GPIO_FN_IDERST, GPIO_FN_IODACK, + GPIO_FN_IDEINT, GPIO_FN_IDEIORDY, + + /* TPU (PTB/PTR/PTS) */ + GPIO_FN_TPUTO3, GPIO_FN_TPUTO2, GPIO_FN_TPUTO1, GPIO_FN_TPUTO0, + GPIO_FN_TPUTI3, GPIO_FN_TPUTI2, + + /* LCDC (PTC/PTD/PTE/PTF/PTM/PTR) */ + GPIO_FN_LCDD23, GPIO_FN_LCDD22, GPIO_FN_LCDD21, GPIO_FN_LCDD20, + GPIO_FN_LCDD19, GPIO_FN_LCDD18, GPIO_FN_LCDD17, GPIO_FN_LCDD16, + GPIO_FN_LCDD15, GPIO_FN_LCDD14, GPIO_FN_LCDD13, GPIO_FN_LCDD12, + GPIO_FN_LCDD11, GPIO_FN_LCDD10, GPIO_FN_LCDD9, GPIO_FN_LCDD8, + GPIO_FN_LCDD7, GPIO_FN_LCDD6, GPIO_FN_LCDD5, GPIO_FN_LCDD4, + GPIO_FN_LCDD3, GPIO_FN_LCDD2, GPIO_FN_LCDD1, GPIO_FN_LCDD0, + GPIO_FN_LCDVSYN, GPIO_FN_LCDDISP, GPIO_FN_LCDRS, GPIO_FN_LCDHSYN, + GPIO_FN_LCDCS, GPIO_FN_LCDDON, GPIO_FN_LCDDCK, GPIO_FN_LCDWR, + GPIO_FN_LCDVEPWC, GPIO_FN_LCDVCPWC, GPIO_FN_LCDRD, GPIO_FN_LCDLCLK, + + /* SCIF0 (PTF/PTM) */ + GPIO_FN_SCIF0_TXD, GPIO_FN_SCIF0_RXD, GPIO_FN_SCIF0_SCK, + + /* SCIF1 (PTL) */ + GPIO_FN_SCIF1_SCK, GPIO_FN_SCIF1_RXD, GPIO_FN_SCIF1_TXD, + + /* SCIF2 (PTE/PTF/PTN) with LCDC, VOU */ + GPIO_FN_SCIF2_L_TXD, GPIO_FN_SCIF2_L_SCK, GPIO_FN_SCIF2_L_RXD, + GPIO_FN_SCIF2_V_TXD, GPIO_FN_SCIF2_V_SCK, GPIO_FN_SCIF2_V_RXD, + + /* SCIF3 (PTL/PTN/PTZ) with VOU, IRQ */ + GPIO_FN_SCIF3_V_SCK, GPIO_FN_SCIF3_V_RXD, GPIO_FN_SCIF3_V_TXD, + GPIO_FN_SCIF3_V_CTS, GPIO_FN_SCIF3_V_RTS, + GPIO_FN_SCIF3_I_SCK, GPIO_FN_SCIF3_I_RXD, GPIO_FN_SCIF3_I_TXD, + GPIO_FN_SCIF3_I_CTS, GPIO_FN_SCIF3_I_RTS, + + /* SCIF4 (PTE) */ + GPIO_FN_SCIF4_SCK, GPIO_FN_SCIF4_RXD, GPIO_FN_SCIF4_TXD, + + /* SCIF5 (PTS) */ + GPIO_FN_SCIF5_SCK, GPIO_FN_SCIF5_RXD, GPIO_FN_SCIF5_TXD, + + /* FSI (PTE/PTU/PTV) */ + GPIO_FN_FSIMCKB, GPIO_FN_FSIMCKA, GPIO_FN_FSIOASD, + GPIO_FN_FSIIABCK, GPIO_FN_FSIIALRCK, GPIO_FN_FSIOABCK, + GPIO_FN_FSIOALRCK, GPIO_FN_CLKAUDIOAO, GPIO_FN_FSIIBSD, + GPIO_FN_FSIOBSD, GPIO_FN_FSIIBBCK, GPIO_FN_FSIIBLRCK, + GPIO_FN_FSIOBBCK, GPIO_FN_FSIOBLRCK, GPIO_FN_CLKAUDIOBO, + GPIO_FN_FSIIASD, + + /* AUD (PTG) */ + GPIO_FN_AUDCK, GPIO_FN_AUDSYNC, GPIO_FN_AUDATA3, + GPIO_FN_AUDATA2, GPIO_FN_AUDATA1, GPIO_FN_AUDATA0, + + /* VIO (PTS) (common?) */ + GPIO_FN_VIO_CKO, + + /* VIO0 (PTH/PTK) */ + GPIO_FN_VIO0_D15, GPIO_FN_VIO0_D14, GPIO_FN_VIO0_D13, GPIO_FN_VIO0_D12, + GPIO_FN_VIO0_D11, GPIO_FN_VIO0_D10, GPIO_FN_VIO0_D9, GPIO_FN_VIO0_D8, + GPIO_FN_VIO0_D7, GPIO_FN_VIO0_D6, GPIO_FN_VIO0_D5, GPIO_FN_VIO0_D4, + GPIO_FN_VIO0_D3, GPIO_FN_VIO0_D2, GPIO_FN_VIO0_D1, GPIO_FN_VIO0_D0, + GPIO_FN_VIO0_VD, GPIO_FN_VIO0_CLK, + GPIO_FN_VIO0_FLD, GPIO_FN_VIO0_HD, + + /* VIO1 (PTK/PTS) */ + GPIO_FN_VIO1_D7, GPIO_FN_VIO1_D6, GPIO_FN_VIO1_D5, GPIO_FN_VIO1_D4, + GPIO_FN_VIO1_D3, GPIO_FN_VIO1_D2, GPIO_FN_VIO1_D1, GPIO_FN_VIO1_D0, + GPIO_FN_VIO1_FLD, GPIO_FN_VIO1_HD, GPIO_FN_VIO1_VD, GPIO_FN_VIO1_CLK, + + /* Eth (PTL/PTN/PTX) */ + GPIO_FN_RMII_RXD0, GPIO_FN_RMII_RXD1, + GPIO_FN_RMII_TXD0, GPIO_FN_RMII_TXD1, + GPIO_FN_RMII_REF_CLK, GPIO_FN_RMII_TX_EN, + GPIO_FN_RMII_RX_ER, GPIO_FN_RMII_CRS_DV, + GPIO_FN_LNKSTA, GPIO_FN_MDIO, + GPIO_FN_MDC, + + /* System (PTJ) */ + GPIO_FN_PDSTATUS, GPIO_FN_STATUS2, GPIO_FN_STATUS0, + + /* VOU (PTL/PTM/PTN*/ + GPIO_FN_DV_D15, GPIO_FN_DV_D14, GPIO_FN_DV_D13, GPIO_FN_DV_D12, + GPIO_FN_DV_D11, GPIO_FN_DV_D10, GPIO_FN_DV_D9, GPIO_FN_DV_D8, + GPIO_FN_DV_D7, GPIO_FN_DV_D6, GPIO_FN_DV_D5, GPIO_FN_DV_D4, + GPIO_FN_DV_D3, GPIO_FN_DV_D2, GPIO_FN_DV_D1, GPIO_FN_DV_D0, + GPIO_FN_DV_CLKI, GPIO_FN_DV_CLK, GPIO_FN_DV_VSYNC, GPIO_FN_DV_HSYNC, + + /* MSIOF0 (PTL/PTM) */ + GPIO_FN_MSIOF0_RXD, GPIO_FN_MSIOF0_TXD, + GPIO_FN_MSIOF0_MCK, GPIO_FN_MSIOF0_TSCK, + GPIO_FN_MSIOF0_SS1, GPIO_FN_MSIOF0_SS2, + GPIO_FN_MSIOF0_TSYNC, GPIO_FN_MSIOF0_RSCK, + GPIO_FN_MSIOF0_RSYNC, + + /* MSIOF1 (PTV) */ + GPIO_FN_MSIOF1_RXD, GPIO_FN_MSIOF1_TXD, + GPIO_FN_MSIOF1_MCK, GPIO_FN_MSIOF1_TSCK, + GPIO_FN_MSIOF1_SS1, GPIO_FN_MSIOF1_SS2, + GPIO_FN_MSIOF1_TSYNC, GPIO_FN_MSIOF1_RSCK, + GPIO_FN_MSIOF1_RSYNC, + + /* DMAC (PTU/PTX) */ + GPIO_FN_DMAC_DACK0, GPIO_FN_DMAC_DREQ0, + GPIO_FN_DMAC_DACK1, GPIO_FN_DMAC_DREQ1, + + /* SDHI0 (PTY) */ + GPIO_FN_SDHI0CD, GPIO_FN_SDHI0WP, GPIO_FN_SDHI0CMD, GPIO_FN_SDHI0CLK, + GPIO_FN_SDHI0D3, GPIO_FN_SDHI0D2, GPIO_FN_SDHI0D1, GPIO_FN_SDHI0D0, + + /* SDHI1 (PTW) */ + GPIO_FN_SDHI1CD, GPIO_FN_SDHI1WP, GPIO_FN_SDHI1CMD, GPIO_FN_SDHI1CLK, + GPIO_FN_SDHI1D3, GPIO_FN_SDHI1D2, GPIO_FN_SDHI1D1, GPIO_FN_SDHI1D0, + + /* MMC (PTW/PTX)*/ + GPIO_FN_MMC_D7, GPIO_FN_MMC_D6, GPIO_FN_MMC_D5, GPIO_FN_MMC_D4, + GPIO_FN_MMC_D3, GPIO_FN_MMC_D2, GPIO_FN_MMC_D1, GPIO_FN_MMC_D0, + GPIO_FN_MMC_CLK, GPIO_FN_MMC_CMD, + + /* IrDA (PTX) */ + GPIO_FN_IRDA_OUT, GPIO_FN_IRDA_IN, + + /* TSIF (PTX) */ + GPIO_FN_TSIF_TS0_SDAT, GPIO_FN_TSIF_TS0_SCK, + GPIO_FN_TSIF_TS0_SDEN, GPIO_FN_TSIF_TS0_SPSYNC, + + /* IRQ (PTZ) */ + GPIO_FN_INTC_IRQ7, GPIO_FN_INTC_IRQ6, GPIO_FN_INTC_IRQ5, + GPIO_FN_INTC_IRQ4, GPIO_FN_INTC_IRQ3, GPIO_FN_INTC_IRQ2, + GPIO_FN_INTC_IRQ1, GPIO_FN_INTC_IRQ0, +}; + +#endif /* __ASM_SH7724_H__ */ diff --git a/arch/sh/kernel/cpu/sh4/probe.c b/arch/sh/kernel/cpu/sh4/probe.c index 91e3677..973ff83 100644 --- a/arch/sh/kernel/cpu/sh4/probe.c +++ b/arch/sh/kernel/cpu/sh4/probe.c @@ -156,6 +156,12 @@ int __init detect_cpu_and_cache_system(void) break; } break; + case 0x300b: + boot_cpu_data.type = CPU_SH7724; + boot_cpu_data.icache.ways = 4; + boot_cpu_data.dcache.ways = 4; + boot_cpu_data.flags |= CPU_HAS_LLSC | CPU_HAS_FPU; + break; case 0x4000: /* 1st cut */ case 0x4001: /* 2nd cut */ boot_cpu_data.type = CPU_SHX3; diff --git a/arch/sh/kernel/cpu/sh4a/Makefile b/arch/sh/kernel/cpu/sh4a/Makefile index 1a92361..afd6fba4 100644 --- a/arch/sh/kernel/cpu/sh4a/Makefile +++ b/arch/sh/kernel/cpu/sh4a/Makefile @@ -11,6 +11,7 @@ obj-$(CONFIG_CPU_SUBTYPE_SH7786) += setup-sh7786.o obj-$(CONFIG_CPU_SUBTYPE_SH7343) += setup-sh7343.o obj-$(CONFIG_CPU_SUBTYPE_SH7722) += setup-sh7722.o obj-$(CONFIG_CPU_SUBTYPE_SH7723) += setup-sh7723.o +obj-$(CONFIG_CPU_SUBTYPE_SH7724) += setup-sh7724.o obj-$(CONFIG_CPU_SUBTYPE_SH7366) += setup-sh7366.o obj-$(CONFIG_CPU_SUBTYPE_SHX3) += setup-shx3.o @@ -26,12 +27,14 @@ clock-$(CONFIG_CPU_SUBTYPE_SH7786) := clock-sh7786.o clock-$(CONFIG_CPU_SUBTYPE_SH7343) := clock-sh7722.o clock-$(CONFIG_CPU_SUBTYPE_SH7722) := clock-sh7722.o clock-$(CONFIG_CPU_SUBTYPE_SH7723) := clock-sh7722.o +clock-$(CONFIG_CPU_SUBTYPE_SH7724) := clock-sh7722.o clock-$(CONFIG_CPU_SUBTYPE_SH7366) := clock-sh7722.o clock-$(CONFIG_CPU_SUBTYPE_SHX3) := clock-shx3.o # Pinmux setup pinmux-$(CONFIG_CPU_SUBTYPE_SH7722) := pinmux-sh7722.o pinmux-$(CONFIG_CPU_SUBTYPE_SH7723) := pinmux-sh7723.o +pinmux-$(CONFIG_CPU_SUBTYPE_SH7724) := pinmux-sh7724.o pinmux-$(CONFIG_CPU_SUBTYPE_SH7785) := pinmux-sh7785.o pinmux-$(CONFIG_CPU_SUBTYPE_SH7786) := pinmux-sh7786.o diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c index 0e174af..1ccdfc5 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c +++ b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c @@ -130,6 +130,12 @@ static void adjust_clocks(int originate, int *l, unsigned long v[], * is quite simple.. */ +#if defined(CONFIG_CPU_SUBTYPE_SH7724) +#define STCPLL(frqcr) ((((frqcr >> 24) & 0x3f) + 1) * 2) +#else +#define STCPLL(frqcr) (((frqcr >> 24) & 0x1f) + 1) +#endif + /* * Instead of having two separate multipliers/divisors set, like this: * @@ -139,13 +145,17 @@ static void adjust_clocks(int originate, int *l, unsigned long v[], * I created the divisors2 array, which is used to calculate rate like * rate = parent * 2 / divisors2[ divisor ]; */ +#if defined(CONFIG_CPU_SUBTYPE_SH7724) +static int divisors2[] = { 4, 1, 8, 12, 16, 24, 32, 1, 48, 64, 72, 96, 1, 144 }; +#else static int divisors2[] = { 2, 3, 4, 5, 6, 8, 10, 12, 16, 20, 24, 32, 40 }; +#endif static void master_clk_recalc(struct clk *clk) { unsigned frqcr = ctrl_inl(FRQCR); - clk->rate = CONFIG_SH_PCLK_FREQ * (((frqcr >> 24) & 0x1f) + 1); + clk->rate = CONFIG_SH_PCLK_FREQ * STCPLL(frqcr); } static void master_clk_init(struct clk *clk) @@ -161,13 +171,30 @@ static void module_clk_recalc(struct clk *clk) { unsigned long frqcr = ctrl_inl(FRQCR); - clk->rate = clk->parent->rate / (((frqcr >> 24) & 0x1f) + 1); + clk->rate = clk->parent->rate / STCPLL(frqcr); } +#if defined(CONFIG_CPU_SUBTYPE_SH7724) +#define MASTERDIVS { 12, 16, 24, 30, 32, 36, 48 } +#define STCMASK 0x3f +#define DIVCALC(div) (div/2-1) +#define FRQCRKICK 0x80000000 +#elif defined(CONFIG_CPU_SUBTYPE_SH7723) +#define MASTERDIVS { 6, 8, 12, 16 } +#define STCMASK 0x1f +#define DIVCALC(div) (div-1) +#define FRQCRKICK 0x00000000 +#else +#define MASTERDIVS { 2, 3, 4, 6, 8, 16 } +#define STCMASK 0x1f +#define DIVCALC(div) (div-1) +#define FRQCRKICK 0x00000000 +#endif + static int master_clk_setrate(struct clk *clk, unsigned long rate, int id) { int div = rate / clk->rate; - int master_divs[] = { 2, 3, 4, 6, 8, 16 }; + int master_divs[] = MASTERDIVS; int index; unsigned long frqcr; @@ -180,8 +207,9 @@ static int master_clk_setrate(struct clk *clk, unsigned long rate, int id) div = master_divs[index - 1]; frqcr = ctrl_inl(FRQCR); - frqcr &= ~(0xF << 24); - frqcr |= ( (div-1) << 24); + frqcr &= ~(STCMASK << 24); + frqcr |= (DIVCALC(div) << 24); + frqcr |= FRQCRKICK; ctrl_outl(frqcr, FRQCR); return 0; @@ -377,6 +405,7 @@ static int sh7722_frqcr_set_rate(struct clk *clk, unsigned long rate, /* clear FRQCR bits */ frqcr &= ~(ctx.mask << ctx.shift); frqcr |= div << ctx.shift; + frqcr |= FRQCRKICK; /* ...and perform actual change */ ctrl_outl(frqcr, FRQCR); @@ -542,8 +571,8 @@ static struct clk sh7722_r_clock = { .flags = CLK_RATE_PROPAGATES, }; -#ifndef CONFIG_CPU_SUBTYPE_SH7343 - +#if !defined(CONFIG_CPU_SUBTYPE_SH7343) &&\ + !defined(CONFIG_CPU_SUBTYPE_SH7724) /* * these three clocks - SIU A, SIU B, IrDA - share the same clk_ops * methods of clk_ops determine which register they should access by @@ -560,15 +589,16 @@ static struct clk sh7722_siu_b_clock = { .arch_flags = SCLKBCR, .ops = &sh7722_siu_clk_ops, }; +#endif /* CONFIG_CPU_SUBTYPE_SH7343, SH7724 */ -#if defined(CONFIG_CPU_SUBTYPE_SH7722) +#if defined(CONFIG_CPU_SUBTYPE_SH7722) ||\ + defined(CONFIG_CPU_SUBTYPE_SH7724) static struct clk sh7722_irda_clock = { .name = "irda_clk", .arch_flags = IrDACLKCR, .ops = &sh7722_siu_clk_ops, }; #endif -#endif /* CONFIG_CPU_SUBTYPE_SH7343 */ static struct clk sh7722_video_clock = { .name = "video_clk", @@ -715,6 +745,61 @@ static struct clk sh7722_mstpcr_clocks[] = { MSTPCR("vpu0", "bus_clk", 2, 1), MSTPCR("lcdc0", "bus_clk", 2, 0), #endif +#if defined(CONFIG_CPU_SUBTYPE_SH7724) + /* See Datasheet : Overview -> Block Diagram */ + MSTPCR("tlb0", "cpu_clk", 0, 31), + MSTPCR("ic0", "cpu_clk", 0, 30), + MSTPCR("oc0", "cpu_clk", 0, 29), + MSTPCR("rs0", "bus_clk", 0, 28), + MSTPCR("ilmem0", "cpu_clk", 0, 27), + MSTPCR("l2c0", "sh_clk", 0, 26), + MSTPCR("fpu0", "cpu_clk", 0, 24), + MSTPCR("intc0", "peripheral_clk", 0, 22), + MSTPCR("dmac0", "bus_clk", 0, 21), + MSTPCR("sh0", "sh_clk", 0, 20), + MSTPCR("hudi0", "peripheral_clk", 0, 19), + MSTPCR("ubc0", "cpu_clk", 0, 17), + MSTPCR("tmu0", "peripheral_clk", 0, 15), + MSTPCR("cmt0", "r_clk", 0, 14), + MSTPCR("rwdt0", "r_clk", 0, 13), + MSTPCR("dmac1", "bus_clk", 0, 12), + MSTPCR("tmu1", "peripheral_clk", 0, 10), + MSTPCR("scif0", "peripheral_clk", 0, 9), + MSTPCR("scif1", "peripheral_clk", 0, 8), + MSTPCR("scif2", "peripheral_clk", 0, 7), + MSTPCR("scif3", "bus_clk", 0, 6), + MSTPCR("scif4", "bus_clk", 0, 5), + MSTPCR("scif5", "bus_clk", 0, 4), + MSTPCR("msiof0", "bus_clk", 0, 2), + MSTPCR("msiof1", "bus_clk", 0, 1), + MSTPCR("keysc0", "r_clk", 1, 12), + MSTPCR("rtc0", "r_clk", 1, 11), + MSTPCR("i2c0", "peripheral_clk", 1, 9), + MSTPCR("i2c1", "peripheral_clk", 1, 8), + MSTPCR("mmc0", "bus_clk", 2, 29), + MSTPCR("eth0", "bus_clk", 2, 28), + MSTPCR("atapi0", "bus_clk", 2, 26), + MSTPCR("tpu0", "bus_clk", 2, 25), + MSTPCR("irda0", "peripheral_clk", 2, 24), + MSTPCR("tsif0", "bus_clk", 2, 22), + MSTPCR("usb1", "bus_clk", 2, 21), + MSTPCR("usb0", "bus_clk", 2, 20), + MSTPCR("2dg0", "bus_clk", 2, 19), + MSTPCR("sdhi0", "bus_clk", 2, 18), + MSTPCR("sdhi1", "bus_clk", 2, 17), + MSTPCR("veu1", "bus_clk", 2, 15), + MSTPCR("ceu1", "bus_clk", 2, 13), + MSTPCR("beu1", "bus_clk", 2, 12), + MSTPCR("2ddmac0", "sh_clk", 2, 10), + MSTPCR("spu0", "bus_clk", 2, 9), + MSTPCR("jpu0", "bus_clk", 2, 6), + MSTPCR("vou0", "bus_clk", 2, 5), + MSTPCR("beu0", "bus_clk", 2, 4), + MSTPCR("ceu0", "bus_clk", 2, 3), + MSTPCR("veu0", "bus_clk", 2, 2), + MSTPCR("vpu0", "bus_clk", 2, 1), + MSTPCR("lcdc0", "bus_clk", 2, 0), +#endif #if defined(CONFIG_CPU_SUBTYPE_SH7343) MSTPCR("uram0", "umem_clk", 0, 28), MSTPCR("xymem0", "bus_clk", 0, 26), @@ -786,12 +871,15 @@ static struct clk *sh7722_clocks[] = { &sh7722_sh_clock, &sh7722_peripheral_clock, &sh7722_sdram_clock, -#ifndef CONFIG_CPU_SUBTYPE_SH7343 +#if !defined(CONFIG_CPU_SUBTYPE_SH7343) &&\ + !defined(CONFIG_CPU_SUBTYPE_SH7724) &sh7722_siu_a_clock, &sh7722_siu_b_clock, -#if defined(CONFIG_CPU_SUBTYPE_SH7722) - &sh7722_irda_clock, #endif +/* 7724 should support FSI clock */ +#if defined(CONFIG_CPU_SUBTYPE_SH7722) || \ + defined(CONFIG_CPU_SUBTYPE_SH7724) + &sh7722_irda_clock, #endif &sh7722_video_clock, }; diff --git a/arch/sh/kernel/cpu/sh4a/pinmux-sh7724.c b/arch/sh/kernel/cpu/sh4a/pinmux-sh7724.c new file mode 100644 index 0000000..1af0f95 --- /dev/null +++ b/arch/sh/kernel/cpu/sh4a/pinmux-sh7724.c @@ -0,0 +1,2230 @@ +/* + * SH7724 Pinmux + * + * Copyright (C) 2009 Renesas Solutions Corp. + * + * Kuninori Morimoto + * + * Based on SH7723 Pinmux + * Copyright (C) 2008 Magnus Damm + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#include +#include +#include +#include + +enum { + PINMUX_RESERVED = 0, + + PINMUX_DATA_BEGIN, + PTA7_DATA, PTA6_DATA, PTA5_DATA, PTA4_DATA, + PTA3_DATA, PTA2_DATA, PTA1_DATA, PTA0_DATA, + PTB7_DATA, PTB6_DATA, PTB5_DATA, PTB4_DATA, + PTB3_DATA, PTB2_DATA, PTB1_DATA, PTB0_DATA, + PTC7_DATA, PTC6_DATA, PTC5_DATA, PTC4_DATA, + PTC3_DATA, PTC2_DATA, PTC1_DATA, PTC0_DATA, + PTD7_DATA, PTD6_DATA, PTD5_DATA, PTD4_DATA, + PTD3_DATA, PTD2_DATA, PTD1_DATA, PTD0_DATA, + PTE7_DATA, PTE6_DATA, PTE5_DATA, PTE4_DATA, + PTE3_DATA, PTE2_DATA, PTE1_DATA, PTE0_DATA, + PTF7_DATA, PTF6_DATA, PTF5_DATA, PTF4_DATA, + PTF3_DATA, PTF2_DATA, PTF1_DATA, PTF0_DATA, + PTG5_DATA, PTG4_DATA, + PTG3_DATA, PTG2_DATA, PTG1_DATA, PTG0_DATA, + PTH7_DATA, PTH6_DATA, PTH5_DATA, PTH4_DATA, + PTH3_DATA, PTH2_DATA, PTH1_DATA, PTH0_DATA, + PTJ7_DATA, PTJ6_DATA, PTJ5_DATA, + PTJ3_DATA, PTJ2_DATA, PTJ1_DATA, PTJ0_DATA, + PTK7_DATA, PTK6_DATA, PTK5_DATA, PTK4_DATA, + PTK3_DATA, PTK2_DATA, PTK1_DATA, PTK0_DATA, + PTL7_DATA, PTL6_DATA, PTL5_DATA, PTL4_DATA, + PTL3_DATA, PTL2_DATA, PTL1_DATA, PTL0_DATA, + PTM7_DATA, PTM6_DATA, PTM5_DATA, PTM4_DATA, + PTM3_DATA, PTM2_DATA, PTM1_DATA, PTM0_DATA, + PTN7_DATA, PTN6_DATA, PTN5_DATA, PTN4_DATA, + PTN3_DATA, PTN2_DATA, PTN1_DATA, PTN0_DATA, + PTQ7_DATA, PTQ6_DATA, PTQ5_DATA, PTQ4_DATA, + PTQ3_DATA, PTQ2_DATA, PTQ1_DATA, PTQ0_DATA, + PTR7_DATA, PTR6_DATA, PTR5_DATA, PTR4_DATA, + PTR3_DATA, PTR2_DATA, PTR1_DATA, PTR0_DATA, + PTS6_DATA, PTS5_DATA, PTS4_DATA, + PTS3_DATA, PTS2_DATA, PTS1_DATA, PTS0_DATA, + PTT7_DATA, PTT6_DATA, PTT5_DATA, PTT4_DATA, + PTT3_DATA, PTT2_DATA, PTT1_DATA, PTT0_DATA, + PTU7_DATA, PTU6_DATA, PTU5_DATA, PTU4_DATA, + PTU3_DATA, PTU2_DATA, PTU1_DATA, PTU0_DATA, + PTV7_DATA, PTV6_DATA, PTV5_DATA, PTV4_DATA, + PTV3_DATA, PTV2_DATA, PTV1_DATA, PTV0_DATA, + PTW7_DATA, PTW6_DATA, PTW5_DATA, PTW4_DATA, + PTW3_DATA, PTW2_DATA, PTW1_DATA, PTW0_DATA, + PTX7_DATA, PTX6_DATA, PTX5_DATA, PTX4_DATA, + PTX3_DATA, PTX2_DATA, PTX1_DATA, PTX0_DATA, + PTY7_DATA, PTY6_DATA, PTY5_DATA, PTY4_DATA, + PTY3_DATA, PTY2_DATA, PTY1_DATA, PTY0_DATA, + PTZ7_DATA, PTZ6_DATA, PTZ5_DATA, PTZ4_DATA, + PTZ3_DATA, PTZ2_DATA, PTZ1_DATA, PTZ0_DATA, + PINMUX_DATA_END, + + PINMUX_INPUT_BEGIN, + PTA7_IN, PTA6_IN, PTA5_IN, PTA4_IN, + PTA3_IN, PTA2_IN, PTA1_IN, PTA0_IN, + PTB7_IN, PTB6_IN, PTB5_IN, PTB4_IN, + PTB3_IN, PTB2_IN, PTB1_IN, PTB0_IN, + PTC7_IN, PTC6_IN, PTC5_IN, PTC4_IN, + PTC3_IN, PTC2_IN, PTC1_IN, PTC0_IN, + PTD7_IN, PTD6_IN, PTD5_IN, PTD4_IN, + PTD3_IN, PTD2_IN, PTD1_IN, PTD0_IN, + PTE7_IN, PTE6_IN, PTE5_IN, PTE4_IN, + PTE3_IN, PTE2_IN, PTE1_IN, PTE0_IN, + PTF7_IN, PTF6_IN, PTF5_IN, PTF4_IN, + PTF3_IN, PTF2_IN, PTF1_IN, PTF0_IN, + PTH7_IN, PTH6_IN, PTH5_IN, PTH4_IN, + PTH3_IN, PTH2_IN, PTH1_IN, PTH0_IN, + PTJ3_IN, PTJ2_IN, PTJ1_IN, PTJ0_IN, + PTK7_IN, PTK6_IN, PTK5_IN, PTK4_IN, + PTK3_IN, PTK2_IN, PTK1_IN, PTK0_IN, + PTL7_IN, PTL6_IN, PTL5_IN, PTL4_IN, + PTL3_IN, PTL2_IN, PTL1_IN, PTL0_IN, + PTM7_IN, PTM6_IN, PTM5_IN, PTM4_IN, + PTM3_IN, PTM2_IN, PTM1_IN, PTM0_IN, + PTN7_IN, PTN6_IN, PTN5_IN, PTN4_IN, + PTN3_IN, PTN2_IN, PTN1_IN, PTN0_IN, + PTQ7_IN, PTQ6_IN, PTQ5_IN, PTQ4_IN, + PTQ3_IN, PTQ2_IN, PTQ1_IN, PTQ0_IN, + PTR7_IN, PTR6_IN, PTR5_IN, PTR4_IN, + PTR3_IN, PTR2_IN, PTR1_IN, PTR0_IN, + PTS6_IN, PTS5_IN, PTS4_IN, + PTS3_IN, PTS2_IN, PTS1_IN, PTS0_IN, + PTT7_IN, PTT6_IN, PTT5_IN, PTT4_IN, + PTT3_IN, PTT2_IN, PTT1_IN, PTT0_IN, + PTU7_IN, PTU6_IN, PTU5_IN, PTU4_IN, + PTU3_IN, PTU2_IN, PTU1_IN, PTU0_IN, + PTV7_IN, PTV6_IN, PTV5_IN, PTV4_IN, + PTV3_IN, PTV2_IN, PTV1_IN, PTV0_IN, + PTW7_IN, PTW6_IN, PTW5_IN, PTW4_IN, + PTW3_IN, PTW2_IN, PTW1_IN, PTW0_IN, + PTX7_IN, PTX6_IN, PTX5_IN, PTX4_IN, + PTX3_IN, PTX2_IN, PTX1_IN, PTX0_IN, + PTY7_IN, PTY6_IN, PTY5_IN, PTY4_IN, + PTY3_IN, PTY2_IN, PTY1_IN, PTY0_IN, + PTZ7_IN, PTZ6_IN, PTZ5_IN, PTZ4_IN, + PTZ3_IN, PTZ2_IN, PTZ1_IN, PTZ0_IN, + PINMUX_INPUT_END, + + PINMUX_INPUT_PULLUP_BEGIN, + PTA7_IN_PU, PTA6_IN_PU, PTA5_IN_PU, PTA4_IN_PU, + PTA3_IN_PU, PTA2_IN_PU, PTA1_IN_PU, PTA0_IN_PU, + PTB7_IN_PU, PTB6_IN_PU, PTB5_IN_PU, PTB4_IN_PU, + PTB3_IN_PU, PTB2_IN_PU, PTB1_IN_PU, PTB0_IN_PU, + PTC7_IN_PU, PTC6_IN_PU, PTC5_IN_PU, PTC4_IN_PU, + PTC3_IN_PU, PTC2_IN_PU, PTC1_IN_PU, PTC0_IN_PU, + PTD7_IN_PU, PTD6_IN_PU, PTD5_IN_PU, PTD4_IN_PU, + PTD3_IN_PU, PTD2_IN_PU, PTD1_IN_PU, PTD0_IN_PU, + PTE7_IN_PU, PTE6_IN_PU, PTE5_IN_PU, PTE4_IN_PU, + PTE3_IN_PU, PTE2_IN_PU, PTE1_IN_PU, PTE0_IN_PU, + PTF7_IN_PU, PTF6_IN_PU, PTF5_IN_PU, PTF4_IN_PU, + PTF3_IN_PU, PTF2_IN_PU, PTF1_IN_PU, PTF0_IN_PU, + PTH7_IN_PU, PTH6_IN_PU, PTH5_IN_PU, PTH4_IN_PU, + PTH3_IN_PU, PTH2_IN_PU, PTH1_IN_PU, PTH0_IN_PU, + PTJ3_IN_PU, PTJ2_IN_PU, PTJ1_IN_PU, PTJ0_IN_PU, + PTK7_IN_PU, PTK6_IN_PU, PTK5_IN_PU, PTK4_IN_PU, + PTK3_IN_PU, PTK2_IN_PU, PTK1_IN_PU, PTK0_IN_PU, + PTL7_IN_PU, PTL6_IN_PU, PTL5_IN_PU, PTL4_IN_PU, + PTL3_IN_PU, PTL2_IN_PU, PTL1_IN_PU, PTL0_IN_PU, + PTM7_IN_PU, PTM6_IN_PU, PTM5_IN_PU, PTM4_IN_PU, + PTM3_IN_PU, PTM2_IN_PU, PTM1_IN_PU, PTM0_IN_PU, + PTN7_IN_PU, PTN6_IN_PU, PTN5_IN_PU, PTN4_IN_PU, + PTN3_IN_PU, PTN2_IN_PU, PTN1_IN_PU, PTN0_IN_PU, + PTQ7_IN_PU, PTQ6_IN_PU, PTQ5_IN_PU, PTQ4_IN_PU, + PTQ3_IN_PU, PTQ2_IN_PU, PTQ1_IN_PU, PTQ0_IN_PU, + PTR7_IN_PU, PTR6_IN_PU, PTR5_IN_PU, PTR4_IN_PU, + PTR3_IN_PU, PTR2_IN_PU, PTR1_IN_PU, PTR0_IN_PU, + PTS6_IN_PU, PTS5_IN_PU, PTS4_IN_PU, + PTS3_IN_PU, PTS2_IN_PU, PTS1_IN_PU, PTS0_IN_PU, + PTT7_IN_PU, PTT6_IN_PU, PTT5_IN_PU, PTT4_IN_PU, + PTT3_IN_PU, PTT2_IN_PU, PTT1_IN_PU, PTT0_IN_PU, + PTU7_IN_PU, PTU6_IN_PU, PTU5_IN_PU, PTU4_IN_PU, + PTU3_IN_PU, PTU2_IN_PU, PTU1_IN_PU, PTU0_IN_PU, + PTV7_IN_PU, PTV6_IN_PU, PTV5_IN_PU, PTV4_IN_PU, + PTV3_IN_PU, PTV2_IN_PU, PTV1_IN_PU, PTV0_IN_PU, + PTW7_IN_PU, PTW6_IN_PU, PTW5_IN_PU, PTW4_IN_PU, + PTW3_IN_PU, PTW2_IN_PU, PTW1_IN_PU, PTW0_IN_PU, + PTX7_IN_PU, PTX6_IN_PU, PTX5_IN_PU, PTX4_IN_PU, + PTX3_IN_PU, PTX2_IN_PU, PTX1_IN_PU, PTX0_IN_PU, + PTY7_IN_PU, PTY6_IN_PU, PTY5_IN_PU, PTY4_IN_PU, + PTY3_IN_PU, PTY2_IN_PU, PTY1_IN_PU, PTY0_IN_PU, + PTZ7_IN_PU, PTZ6_IN_PU, PTZ5_IN_PU, PTZ4_IN_PU, + PTZ3_IN_PU, PTZ2_IN_PU, PTZ1_IN_PU, PTZ0_IN_PU, + PINMUX_INPUT_PULLUP_END, + + PINMUX_OUTPUT_BEGIN, + PTA7_OUT, PTA6_OUT, PTA5_OUT, PTA4_OUT, + PTA3_OUT, PTA2_OUT, PTA1_OUT, PTA0_OUT, + PTB7_OUT, PTB6_OUT, PTB5_OUT, PTB4_OUT, + PTB3_OUT, PTB2_OUT, PTB1_OUT, PTB0_OUT, + PTC7_OUT, PTC6_OUT, PTC5_OUT, PTC4_OUT, + PTC3_OUT, PTC2_OUT, PTC1_OUT, PTC0_OUT, + PTD7_OUT, PTD6_OUT, PTD5_OUT, PTD4_OUT, + PTD3_OUT, PTD2_OUT, PTD1_OUT, PTD0_OUT, + PTE7_OUT, PTE6_OUT, PTE5_OUT, PTE4_OUT, + PTE3_OUT, PTE2_OUT, PTE1_OUT, PTE0_OUT, + PTF7_OUT, PTF6_OUT, PTF5_OUT, PTF4_OUT, + PTF3_OUT, PTF2_OUT, PTF1_OUT, PTF0_OUT, + PTG5_OUT, PTG4_OUT, + PTG3_OUT, PTG2_OUT, PTG1_OUT, PTG0_OUT, + PTH7_OUT, PTH6_OUT, PTH5_OUT, PTH4_OUT, + PTH3_OUT, PTH2_OUT, PTH1_OUT, PTH0_OUT, + PTJ7_OUT, PTJ6_OUT, PTJ5_OUT, + PTJ3_OUT, PTJ2_OUT, PTJ1_OUT, PTJ0_OUT, + PTK7_OUT, PTK6_OUT, PTK5_OUT, PTK4_OUT, + PTK3_OUT, PTK2_OUT, PTK1_OUT, PTK0_OUT, + PTL7_OUT, PTL6_OUT, PTL5_OUT, PTL4_OUT, + PTL3_OUT, PTL2_OUT, PTL1_OUT, PTL0_OUT, + PTM7_OUT, PTM6_OUT, PTM5_OUT, PTM4_OUT, + PTM3_OUT, PTM2_OUT, PTM1_OUT, PTM0_OUT, + PTN7_OUT, PTN6_OUT, PTN5_OUT, PTN4_OUT, + PTN3_OUT, PTN2_OUT, PTN1_OUT, PTN0_OUT, + PTQ7_OUT, PTQ6_OUT, PTQ5_OUT, PTQ4_OUT, + PTQ3_OUT, PTQ2_OUT, PTQ1_OUT, PTQ0_OUT, + PTR7_OUT, PTR6_OUT, PTR5_OUT, PTR4_OUT, + PTR1_OUT, PTR0_OUT, + PTS6_OUT, PTS5_OUT, PTS4_OUT, + PTS3_OUT, PTS2_OUT, PTS1_OUT, PTS0_OUT, + PTT7_OUT, PTT6_OUT, PTT5_OUT, PTT4_OUT, + PTT3_OUT, PTT2_OUT, PTT1_OUT, PTT0_OUT, + PTU7_OUT, PTU6_OUT, PTU5_OUT, PTU4_OUT, + PTU3_OUT, PTU2_OUT, PTU1_OUT, PTU0_OUT, + PTV7_OUT, PTV6_OUT, PTV5_OUT, PTV4_OUT, + PTV3_OUT, PTV2_OUT, PTV1_OUT, PTV0_OUT, + PTW7_OUT, PTW6_OUT, PTW5_OUT, PTW4_OUT, + PTW3_OUT, PTW2_OUT, PTW1_OUT, PTW0_OUT, + PTX7_OUT, PTX6_OUT, PTX5_OUT, PTX4_OUT, + PTX3_OUT, PTX2_OUT, PTX1_OUT, PTX0_OUT, + PTY7_OUT, PTY6_OUT, PTY5_OUT, PTY4_OUT, + PTY3_OUT, PTY2_OUT, PTY1_OUT, PTY0_OUT, + PTZ7_OUT, PTZ6_OUT, PTZ5_OUT, PTZ4_OUT, + PTZ3_OUT, PTZ2_OUT, PTZ1_OUT, PTZ0_OUT, + PINMUX_OUTPUT_END, + + PINMUX_FUNCTION_BEGIN, + PTA7_FN, PTA6_FN, PTA5_FN, PTA4_FN, + PTA3_FN, PTA2_FN, PTA1_FN, PTA0_FN, + PTB7_FN, PTB6_FN, PTB5_FN, PTB4_FN, + PTB3_FN, PTB2_FN, PTB1_FN, PTB0_FN, + PTC7_FN, PTC6_FN, PTC5_FN, PTC4_FN, + PTC3_FN, PTC2_FN, PTC1_FN, PTC0_FN, + PTD7_FN, PTD6_FN, PTD5_FN, PTD4_FN, + PTD3_FN, PTD2_FN, PTD1_FN, PTD0_FN, + PTE7_FN, PTE6_FN, PTE5_FN, PTE4_FN, + PTE3_FN, PTE2_FN, PTE1_FN, PTE0_FN, + PTF7_FN, PTF6_FN, PTF5_FN, PTF4_FN, + PTF3_FN, PTF2_FN, PTF1_FN, PTF0_FN, + PTG5_FN, PTG4_FN, + PTG3_FN, PTG2_FN, PTG1_FN, PTG0_FN, + PTH7_FN, PTH6_FN, PTH5_FN, PTH4_FN, + PTH3_FN, PTH2_FN, PTH1_FN, PTH0_FN, + PTJ7_FN, PTJ6_FN, PTJ5_FN, + PTJ3_FN, PTJ2_FN, PTJ1_FN, PTJ0_FN, + PTK7_FN, PTK6_FN, PTK5_FN, PTK4_FN, + PTK3_FN, PTK2_FN, PTK1_FN, PTK0_FN, + PTL7_FN, PTL6_FN, PTL5_FN, PTL4_FN, + PTL3_FN, PTL2_FN, PTL1_FN, PTL0_FN, + PTM7_FN, PTM6_FN, PTM5_FN, PTM4_FN, + PTM3_FN, PTM2_FN, PTM1_FN, PTM0_FN, + PTN7_FN, PTN6_FN, PTN5_FN, PTN4_FN, + PTN3_FN, PTN2_FN, PTN1_FN, PTN0_FN, + PTQ7_FN, PTQ6_FN, PTQ5_FN, PTQ4_FN, + PTQ3_FN, PTQ2_FN, PTQ1_FN, PTQ0_FN, + PTR7_FN, PTR6_FN, PTR5_FN, PTR4_FN, + PTR3_FN, PTR2_FN, PTR1_FN, PTR0_FN, + PTS6_FN, PTS5_FN, PTS4_FN, + PTS3_FN, PTS2_FN, PTS1_FN, PTS0_FN, + PTT7_FN, PTT6_FN, PTT5_FN, PTT4_FN, + PTT3_FN, PTT2_FN, PTT1_FN, PTT0_FN, + PTU7_FN, PTU6_FN, PTU5_FN, PTU4_FN, + PTU3_FN, PTU2_FN, PTU1_FN, PTU0_FN, + PTV7_FN, PTV6_FN, PTV5_FN, PTV4_FN, + PTV3_FN, PTV2_FN, PTV1_FN, PTV0_FN, + PTW7_FN, PTW6_FN, PTW5_FN, PTW4_FN, + PTW3_FN, PTW2_FN, PTW1_FN, PTW0_FN, + PTX7_FN, PTX6_FN, PTX5_FN, PTX4_FN, + PTX3_FN, PTX2_FN, PTX1_FN, PTX0_FN, + PTY7_FN, PTY6_FN, PTY5_FN, PTY4_FN, + PTY3_FN, PTY2_FN, PTY1_FN, PTY0_FN, + PTZ7_FN, PTZ6_FN, PTZ5_FN, PTZ4_FN, + PTZ3_FN, PTZ2_FN, PTZ1_FN, PTZ0_FN, + + + PSA15_0, PSA15_1, + PSA14_0, PSA14_1, + PSA13_0, PSA13_1, + PSA12_0, PSA12_1, + PSA10_0, PSA10_1, + PSA9_0, PSA9_1, + PSA8_0, PSA8_1, + PSA7_0, PSA7_1, + PSA6_0, PSA6_1, + PSA5_0, PSA5_1, + PSA3_0, PSA3_1, + PSA2_0, PSA2_1, + PSA1_0, PSA1_1, + PSA0_0, PSA0_1, + + PSB14_0, PSB14_1, + PSB13_0, PSB13_1, + PSB12_0, PSB12_1, + PSB11_0, PSB11_1, + PSB10_0, PSB10_1, + PSB9_0, PSB9_1, + PSB8_0, PSB8_1, + PSB7_0, PSB7_1, + PSB6_0, PSB6_1, + PSB5_0, PSB5_1, + PSB4_0, PSB4_1, + PSB3_0, PSB3_1, + PSB2_0, PSB2_1, + PSB1_0, PSB1_1, + PSB0_0, PSB0_1, + + PSC15_0, PSC15_1, + PSC14_0, PSC14_1, + PSC13_0, PSC13_1, + PSC12_0, PSC12_1, + PSC11_0, PSC11_1, + PSC10_0, PSC10_1, + PSC9_0, PSC9_1, + PSC8_0, PSC8_1, + PSC7_0, PSC7_1, + PSC6_0, PSC6_1, + PSC5_0, PSC5_1, + PSC4_0, PSC4_1, + PSC2_0, PSC2_1, + PSC1_0, PSC1_1, + PSC0_0, PSC0_1, + + PSD15_0, PSD15_1, + PSD14_0, PSD14_1, + PSD13_0, PSD13_1, + PSD12_0, PSD12_1, + PSD11_0, PSD11_1, + PSD10_0, PSD10_1, + PSD9_0, PSD9_1, + PSD8_0, PSD8_1, + PSD7_0, PSD7_1, + PSD6_0, PSD6_1, + PSD5_0, PSD5_1, + PSD4_0, PSD4_1, + PSD3_0, PSD3_1, + PSD2_0, PSD2_1, + PSD1_0, PSD1_1, + PSD0_0, PSD0_1, + + PSE15_0, PSE15_1, + PSE14_0, PSE14_1, + PSE13_0, PSE13_1, + PSE12_0, PSE12_1, + PSE11_0, PSE11_1, + PSE10_0, PSE10_1, + PSE9_0, PSE9_1, + PSE8_0, PSE8_1, + PSE7_0, PSE7_1, + PSE6_0, PSE6_1, + PSE5_0, PSE5_1, + PSE4_0, PSE4_1, + PSE3_0, PSE3_1, + PSE2_0, PSE2_1, + PSE1_0, PSE1_1, + PSE0_0, PSE0_1, + PINMUX_FUNCTION_END, + + PINMUX_MARK_BEGIN, + /*PTA*/ + D23_MARK, KEYOUT2_MARK, IDED15_MARK, + D22_MARK, KEYOUT1_MARK, IDED14_MARK, + D21_MARK, KEYOUT0_MARK, IDED13_MARK, + D20_MARK, KEYIN4_MARK, IDED12_MARK, + D19_MARK, KEYIN3_MARK, IDED11_MARK, + D18_MARK, KEYIN2_MARK, IDED10_MARK, + D17_MARK, KEYIN1_MARK, IDED9_MARK, + D16_MARK, KEYIN0_MARK, IDED8_MARK, + + /*PTB*/ + D31_MARK, TPUTO1_MARK, IDEA1_MARK, + D30_MARK, TPUTO0_MARK, IDEA0_MARK, + D29_MARK, IODREQ_MARK, + D28_MARK, IDECS0_MARK, + D27_MARK, IDECS1_MARK, + D26_MARK, KEYOUT5_IN5_MARK, IDEIORD_MARK, + D25_MARK, KEYOUT4_IN6_MARK, IDEIOWR_MARK, + D24_MARK, KEYOUT3_MARK, IDEINT_MARK, + + /*PTC*/ + LCDD7_MARK, + LCDD6_MARK, + LCDD5_MARK, + LCDD4_MARK, + LCDD3_MARK, + LCDD2_MARK, + LCDD1_MARK, + LCDD0_MARK, + + /*PTD*/ + LCDD15_MARK, + LCDD14_MARK, + LCDD13_MARK, + LCDD12_MARK, + LCDD11_MARK, + LCDD10_MARK, + LCDD9_MARK, + LCDD8_MARK, + + /*PTE*/ + FSIMCKB_MARK, + FSIMCKA_MARK, + LCDD21_MARK, SCIF2_L_TXD_MARK, + LCDD20_MARK, SCIF4_SCK_MARK, + LCDD19_MARK, SCIF4_RXD_MARK, + LCDD18_MARK, SCIF4_TXD_MARK, + LCDD17_MARK, + LCDD16_MARK, + + /*PTF*/ + LCDVSYN_MARK, + LCDDISP_MARK, LCDRS_MARK, + LCDHSYN_MARK, LCDCS_MARK, + LCDDON_MARK, + LCDDCK_MARK, LCDWR_MARK, + LCDVEPWC_MARK, SCIF0_TXD_MARK, + LCDD23_MARK, SCIF2_L_SCK_MARK, + LCDD22_MARK, SCIF2_L_RXD_MARK, + + /*PTG*/ + AUDCK_MARK, + AUDSYNC_MARK, + AUDATA3_MARK, + AUDATA2_MARK, + AUDATA1_MARK, + AUDATA0_MARK, + + /*PTH*/ + VIO0_VD_MARK, + VIO0_CLK_MARK, + VIO0_D7_MARK, + VIO0_D6_MARK, + VIO0_D5_MARK, + VIO0_D4_MARK, + VIO0_D3_MARK, + VIO0_D2_MARK, + + /*PTJ*/ + PDSTATUS_MARK, + STATUS2_MARK, + STATUS0_MARK, + A25_MARK, BS_MARK, + A24_MARK, + A23_MARK, + A22_MARK, + + /*PTK*/ + VIO1_D5_MARK, VIO0_D13_MARK, IDED5_MARK, + VIO1_D4_MARK, VIO0_D12_MARK, IDED4_MARK, + VIO1_D3_MARK, VIO0_D11_MARK, IDED3_MARK, + VIO1_D2_MARK, VIO0_D10_MARK, IDED2_MARK, + VIO1_D1_MARK, VIO0_D9_MARK, IDED1_MARK, + VIO1_D0_MARK, VIO0_D8_MARK, IDED0_MARK, + VIO0_FLD_MARK, + VIO0_HD_MARK, + + /*PTL*/ + DV_D5_MARK, SCIF3_V_SCK_MARK, RMII_RXD0_MARK, + DV_D4_MARK, SCIF3_V_RXD_MARK, RMII_RXD1_MARK, + DV_D3_MARK, SCIF3_V_TXD_MARK, RMII_REF_CLK_MARK, + DV_D2_MARK, SCIF1_SCK_MARK, RMII_TX_EN_MARK, + DV_D1_MARK, SCIF1_RXD_MARK, RMII_TXD0_MARK, + DV_D0_MARK, SCIF1_TXD_MARK, RMII_TXD1_MARK, + DV_D15_MARK, + DV_D14_MARK, MSIOF0_MCK_MARK, + + /*PTM*/ + DV_D13_MARK, MSIOF0_TSCK_MARK, + DV_D12_MARK, MSIOF0_RXD_MARK, + DV_D11_MARK, MSIOF0_TXD_MARK, + DV_D10_MARK, MSIOF0_TSYNC_MARK, + DV_D9_MARK, MSIOF0_SS1_MARK, MSIOF0_RSCK_MARK, + DV_D8_MARK, MSIOF0_SS2_MARK, MSIOF0_RSYNC_MARK, + LCDVCPWC_MARK, SCIF0_RXD_MARK, + LCDRD_MARK, SCIF0_SCK_MARK, + + /*PTN*/ + VIO0_D1_MARK, + VIO0_D0_MARK, + DV_CLKI_MARK, + DV_CLK_MARK, SCIF2_V_SCK_MARK, + DV_VSYNC_MARK, SCIF2_V_RXD_MARK, + DV_HSYNC_MARK, SCIF2_V_TXD_MARK, + DV_D7_MARK, SCIF3_V_CTS_MARK, RMII_RX_ER_MARK, + DV_D6_MARK, SCIF3_V_RTS_MARK, RMII_CRS_DV_MARK, + + /*PTQ*/ + D7_MARK, + D6_MARK, + D5_MARK, + D4_MARK, + D3_MARK, + D2_MARK, + D1_MARK, + D0_MARK, + + /*PTR*/ + CS6B_CE1B_MARK, + CS6A_CE2B_MARK, + CS5B_CE1A_MARK, + CS5A_CE2A_MARK, + IOIS16_MARK, LCDLCLK_MARK, + WAIT_MARK, + WE3_ICIOWR_MARK, TPUTO3_MARK, TPUTI3_MARK, + WE2_ICIORD_MARK, TPUTO2_MARK, IDEA2_MARK, + + /*PTS*/ + VIO_CKO_MARK, + VIO1_FLD_MARK, TPUTI2_MARK, IDEIORDY_MARK, + VIO1_HD_MARK, SCIF5_SCK_MARK, + VIO1_VD_MARK, SCIF5_RXD_MARK, + VIO1_CLK_MARK, SCIF5_TXD_MARK, + VIO1_D7_MARK, VIO0_D15_MARK, IDED7_MARK, + VIO1_D6_MARK, VIO0_D14_MARK, IDED6_MARK, + + /*PTT*/ + D15_MARK, + D14_MARK, + D13_MARK, + D12_MARK, + D11_MARK, + D10_MARK, + D9_MARK, + D8_MARK, + + /*PTU*/ + DMAC_DACK0_MARK, + DMAC_DREQ0_MARK, + FSIOASD_MARK, + FSIIABCK_MARK, + FSIIALRCK_MARK, + FSIOABCK_MARK, + FSIOALRCK_MARK, + CLKAUDIOAO_MARK, + + /*PTV*/ + FSIIBSD_MARK, MSIOF1_SS2_MARK, MSIOF1_RSYNC_MARK, + FSIOBSD_MARK, MSIOF1_SS1_MARK, MSIOF1_RSCK_MARK, + FSIIBBCK_MARK, MSIOF1_RXD_MARK, + FSIIBLRCK_MARK, MSIOF1_TSYNC_MARK, + FSIOBBCK_MARK, MSIOF1_TSCK_MARK, + FSIOBLRCK_MARK, MSIOF1_TXD_MARK, + CLKAUDIOBO_MARK, MSIOF1_MCK_MARK, + FSIIASD_MARK, + + /*PTW*/ + MMC_D7_MARK, SDHI1CD_MARK, IODACK_MARK, + MMC_D6_MARK, SDHI1WP_MARK, IDERST_MARK, + MMC_D5_MARK, SDHI1D3_MARK, EXBUF_ENB_MARK, + MMC_D4_MARK, SDHI1D2_MARK, DIRECTION_MARK, + MMC_D3_MARK, SDHI1D1_MARK, + MMC_D2_MARK, SDHI1D0_MARK, + MMC_D1_MARK, SDHI1CMD_MARK, + MMC_D0_MARK, SDHI1CLK_MARK, + + /*PTX*/ + DMAC_DACK1_MARK, IRDA_OUT_MARK, + DMAC_DREQ1_MARK, IRDA_IN_MARK, + TSIF_TS0_SDAT_MARK, LNKSTA_MARK, + TSIF_TS0_SCK_MARK, MDIO_MARK, + TSIF_TS0_SDEN_MARK, MDC_MARK, + TSIF_TS0_SPSYNC_MARK, + MMC_CLK_MARK, + MMC_CMD_MARK, + + /*PTY*/ + SDHI0CD_MARK, + SDHI0WP_MARK, + SDHI0D3_MARK, + SDHI0D2_MARK, + SDHI0D1_MARK, + SDHI0D0_MARK, + SDHI0CMD_MARK, + SDHI0CLK_MARK, + + /*PTZ*/ + INTC_IRQ7_MARK, SCIF3_I_CTS_MARK, + INTC_IRQ6_MARK, SCIF3_I_RTS_MARK, + INTC_IRQ5_MARK, SCIF3_I_SCK_MARK, + INTC_IRQ4_MARK, SCIF3_I_RXD_MARK, + INTC_IRQ3_MARK, SCIF3_I_TXD_MARK, + INTC_IRQ2_MARK, + INTC_IRQ1_MARK, + INTC_IRQ0_MARK, + PINMUX_MARK_END, +}; + +static pinmux_enum_t pinmux_data[] = { + /* PTA GPIO */ + PINMUX_DATA(PTA7_DATA, PTA7_IN, PTA7_OUT, PTA7_IN_PU), + PINMUX_DATA(PTA6_DATA, PTA6_IN, PTA6_OUT, PTA6_IN_PU), + PINMUX_DATA(PTA5_DATA, PTA5_IN, PTA5_OUT, PTA5_IN_PU), + PINMUX_DATA(PTA4_DATA, PTA4_IN, PTA4_OUT, PTA4_IN_PU), + PINMUX_DATA(PTA3_DATA, PTA3_IN, PTA3_OUT, PTA3_IN_PU), + PINMUX_DATA(PTA2_DATA, PTA2_IN, PTA2_OUT, PTA2_IN_PU), + PINMUX_DATA(PTA1_DATA, PTA1_IN, PTA1_OUT, PTA1_IN_PU), + PINMUX_DATA(PTA0_DATA, PTA0_IN, PTA0_OUT, PTA0_IN_PU), + + /* PTB GPIO */ + PINMUX_DATA(PTB7_DATA, PTB7_IN, PTB7_OUT, PTB7_IN_PU), + PINMUX_DATA(PTB6_DATA, PTB6_IN, PTB6_OUT, PTB6_IN_PU), + PINMUX_DATA(PTB5_DATA, PTB5_IN, PTB5_OUT, PTB5_IN_PU), + PINMUX_DATA(PTB4_DATA, PTB4_IN, PTB4_OUT, PTB4_IN_PU), + PINMUX_DATA(PTB3_DATA, PTB3_IN, PTB3_OUT, PTB3_IN_PU), + PINMUX_DATA(PTB2_DATA, PTB2_IN, PTB2_OUT, PTB2_IN_PU), + PINMUX_DATA(PTB1_DATA, PTB1_IN, PTB1_OUT, PTB1_IN_PU), + PINMUX_DATA(PTB0_DATA, PTB0_IN, PTB0_OUT, PTB0_IN_PU), + + /* PTC GPIO */ + PINMUX_DATA(PTC7_DATA, PTC7_IN, PTC7_OUT, PTC7_IN_PU), + PINMUX_DATA(PTC6_DATA, PTC6_IN, PTC6_OUT, PTC6_IN_PU), + PINMUX_DATA(PTC5_DATA, PTC5_IN, PTC5_OUT, PTC5_IN_PU), + PINMUX_DATA(PTC4_DATA, PTC4_IN, PTC4_OUT, PTC4_IN_PU), + PINMUX_DATA(PTC3_DATA, PTC3_IN, PTC3_OUT, PTC3_IN_PU), + PINMUX_DATA(PTC2_DATA, PTC2_IN, PTC2_OUT, PTC2_IN_PU), + PINMUX_DATA(PTC1_DATA, PTC1_IN, PTC1_OUT, PTC1_IN_PU), + PINMUX_DATA(PTC0_DATA, PTC0_IN, PTC0_OUT, PTC0_IN_PU), + + /* PTD GPIO */ + PINMUX_DATA(PTD7_DATA, PTD7_IN, PTD7_OUT, PTD7_IN_PU), + PINMUX_DATA(PTD6_DATA, PTD6_IN, PTD6_OUT, PTD6_IN_PU), + PINMUX_DATA(PTD5_DATA, PTD5_IN, PTD5_OUT, PTD5_IN_PU), + PINMUX_DATA(PTD4_DATA, PTD4_IN, PTD4_OUT, PTD4_IN_PU), + PINMUX_DATA(PTD3_DATA, PTD3_IN, PTD3_OUT, PTD3_IN_PU), + PINMUX_DATA(PTD2_DATA, PTD2_IN, PTD2_OUT, PTD2_IN_PU), + PINMUX_DATA(PTD1_DATA, PTD1_IN, PTD1_OUT, PTD1_IN_PU), + PINMUX_DATA(PTD0_DATA, PTD0_IN, PTD0_OUT, PTD0_IN_PU), + + /* PTE GPIO */ + PINMUX_DATA(PTE7_DATA, PTE7_IN, PTE7_OUT, PTE7_IN_PU), + PINMUX_DATA(PTE6_DATA, PTE6_IN, PTE6_OUT, PTE6_IN_PU), + PINMUX_DATA(PTE5_DATA, PTE5_IN, PTE5_OUT, PTE5_IN_PU), + PINMUX_DATA(PTE4_DATA, PTE4_IN, PTE4_OUT, PTE4_IN_PU), + PINMUX_DATA(PTE3_DATA, PTE3_IN, PTE3_OUT, PTE3_IN_PU), + PINMUX_DATA(PTE2_DATA, PTE2_IN, PTE2_OUT, PTE2_IN_PU), + PINMUX_DATA(PTE1_DATA, PTE1_IN, PTE1_OUT, PTE1_IN_PU), + PINMUX_DATA(PTE0_DATA, PTE0_IN, PTE0_OUT, PTE0_IN_PU), + + /* PTF GPIO */ + PINMUX_DATA(PTF7_DATA, PTF7_IN, PTF7_OUT, PTF7_IN_PU), + PINMUX_DATA(PTF6_DATA, PTF6_IN, PTF6_OUT, PTF6_IN_PU), + PINMUX_DATA(PTF5_DATA, PTF5_IN, PTF5_OUT, PTF5_IN_PU), + PINMUX_DATA(PTF4_DATA, PTF4_IN, PTF4_OUT, PTF4_IN_PU), + PINMUX_DATA(PTF3_DATA, PTF3_IN, PTF3_OUT, PTF3_IN_PU), + PINMUX_DATA(PTF2_DATA, PTF2_IN, PTF2_OUT, PTF2_IN_PU), + PINMUX_DATA(PTF1_DATA, PTF1_IN, PTF1_OUT, PTF1_IN_PU), + PINMUX_DATA(PTF0_DATA, PTF0_IN, PTF0_OUT, PTF0_IN_PU), + + /* PTG GPIO */ + PINMUX_DATA(PTG5_DATA, PTG5_OUT), + PINMUX_DATA(PTG4_DATA, PTG4_OUT), + PINMUX_DATA(PTG3_DATA, PTG3_OUT), + PINMUX_DATA(PTG2_DATA, PTG2_OUT), + PINMUX_DATA(PTG1_DATA, PTG1_OUT), + PINMUX_DATA(PTG0_DATA, PTG0_OUT), + + /* PTH GPIO */ + PINMUX_DATA(PTH7_DATA, PTH7_IN, PTH7_OUT, PTH7_IN_PU), + PINMUX_DATA(PTH6_DATA, PTH6_IN, PTH6_OUT, PTH6_IN_PU), + PINMUX_DATA(PTH5_DATA, PTH5_IN, PTH5_OUT, PTH5_IN_PU), + PINMUX_DATA(PTH4_DATA, PTH4_IN, PTH4_OUT, PTH4_IN_PU), + PINMUX_DATA(PTH3_DATA, PTH3_IN, PTH3_OUT, PTH3_IN_PU), + PINMUX_DATA(PTH2_DATA, PTH2_IN, PTH2_OUT, PTH2_IN_PU), + PINMUX_DATA(PTH1_DATA, PTH1_IN, PTH1_OUT, PTH1_IN_PU), + PINMUX_DATA(PTH0_DATA, PTH0_IN, PTH0_OUT, PTH0_IN_PU), + + /* PTJ GPIO */ + PINMUX_DATA(PTJ7_DATA, PTJ7_OUT), + PINMUX_DATA(PTJ6_DATA, PTJ6_OUT), + PINMUX_DATA(PTJ5_DATA, PTJ5_OUT), + PINMUX_DATA(PTJ3_DATA, PTJ3_IN, PTJ3_OUT, PTJ3_IN_PU), + PINMUX_DATA(PTJ2_DATA, PTJ2_IN, PTJ2_OUT, PTJ2_IN_PU), + PINMUX_DATA(PTJ1_DATA, PTJ1_IN, PTJ1_OUT, PTJ1_IN_PU), + PINMUX_DATA(PTJ0_DATA, PTJ0_IN, PTJ0_OUT, PTJ0_IN_PU), + + /* PTK GPIO */ + PINMUX_DATA(PTK7_DATA, PTK7_IN, PTK7_OUT, PTK7_IN_PU), + PINMUX_DATA(PTK6_DATA, PTK6_IN, PTK6_OUT, PTK6_IN_PU), + PINMUX_DATA(PTK5_DATA, PTK5_IN, PTK5_OUT, PTK5_IN_PU), + PINMUX_DATA(PTK4_DATA, PTK4_IN, PTK4_OUT, PTK4_IN_PU), + PINMUX_DATA(PTK3_DATA, PTK3_IN, PTK3_OUT, PTK3_IN_PU), + PINMUX_DATA(PTK2_DATA, PTK2_IN, PTK2_OUT, PTK2_IN_PU), + PINMUX_DATA(PTK1_DATA, PTK1_IN, PTK1_OUT, PTK1_IN_PU), + PINMUX_DATA(PTK0_DATA, PTK0_IN, PTK0_OUT, PTK0_IN_PU), + + /* PTL GPIO */ + PINMUX_DATA(PTL7_DATA, PTL7_IN, PTL7_OUT, PTL7_IN_PU), + PINMUX_DATA(PTL6_DATA, PTL6_IN, PTL6_OUT, PTL6_IN_PU), + PINMUX_DATA(PTL5_DATA, PTL5_IN, PTL5_OUT, PTL5_IN_PU), + PINMUX_DATA(PTL4_DATA, PTL4_IN, PTL4_OUT, PTL4_IN_PU), + PINMUX_DATA(PTL3_DATA, PTL3_IN, PTL3_OUT, PTL3_IN_PU), + PINMUX_DATA(PTL2_DATA, PTL2_IN, PTL2_OUT, PTL2_IN_PU), + PINMUX_DATA(PTL1_DATA, PTL1_IN, PTL1_OUT, PTL1_IN_PU), + PINMUX_DATA(PTL0_DATA, PTL0_IN, PTL0_OUT, PTL0_IN_PU), + + /* PTM GPIO */ + PINMUX_DATA(PTM7_DATA, PTM7_IN, PTM7_OUT, PTM7_IN_PU), + PINMUX_DATA(PTM6_DATA, PTM6_IN, PTM6_OUT, PTM6_IN_PU), + PINMUX_DATA(PTM5_DATA, PTM5_IN, PTM5_OUT, PTM5_IN_PU), + PINMUX_DATA(PTM4_DATA, PTM4_IN, PTM4_OUT, PTM4_IN_PU), + PINMUX_DATA(PTM3_DATA, PTM3_IN, PTM3_OUT, PTM3_IN_PU), + PINMUX_DATA(PTM2_DATA, PTM2_IN, PTM2_OUT, PTM2_IN_PU), + PINMUX_DATA(PTM1_DATA, PTM1_IN, PTM1_OUT, PTM1_IN_PU), + PINMUX_DATA(PTM0_DATA, PTM0_IN, PTM0_OUT, PTM0_IN_PU), + + /* PTN GPIO */ + PINMUX_DATA(PTN7_DATA, PTN7_IN, PTN7_OUT, PTN7_IN_PU), + PINMUX_DATA(PTN6_DATA, PTN6_IN, PTN6_OUT, PTN6_IN_PU), + PINMUX_DATA(PTN5_DATA, PTN5_IN, PTN5_OUT, PTN5_IN_PU), + PINMUX_DATA(PTN4_DATA, PTN4_IN, PTN4_OUT, PTN4_IN_PU), + PINMUX_DATA(PTN3_DATA, PTN3_IN, PTN3_OUT, PTN3_IN_PU), + PINMUX_DATA(PTN2_DATA, PTN2_IN, PTN2_OUT, PTN2_IN_PU), + PINMUX_DATA(PTN1_DATA, PTN1_IN, PTN1_OUT, PTN1_IN_PU), + PINMUX_DATA(PTN0_DATA, PTN0_IN, PTN0_OUT, PTN0_IN_PU), + + /* PTQ GPIO */ + PINMUX_DATA(PTQ7_DATA, PTQ7_IN, PTQ7_OUT, PTQ7_IN_PU), + PINMUX_DATA(PTQ6_DATA, PTQ6_IN, PTQ6_OUT, PTQ6_IN_PU), + PINMUX_DATA(PTQ5_DATA, PTQ5_IN, PTQ5_OUT, PTQ5_IN_PU), + PINMUX_DATA(PTQ4_DATA, PTQ4_IN, PTQ4_OUT, PTQ4_IN_PU), + PINMUX_DATA(PTQ3_DATA, PTQ3_IN, PTQ3_OUT, PTQ3_IN_PU), + PINMUX_DATA(PTQ2_DATA, PTQ2_IN, PTQ2_OUT, PTQ2_IN_PU), + PINMUX_DATA(PTQ1_DATA, PTQ1_IN, PTQ1_OUT, PTQ1_IN_PU), + PINMUX_DATA(PTQ0_DATA, PTQ0_IN, PTQ0_OUT, PTQ0_IN_PU), + + /* PTR GPIO */ + PINMUX_DATA(PTR7_DATA, PTR7_IN, PTR7_OUT, PTR7_IN_PU), + PINMUX_DATA(PTR6_DATA, PTR6_IN, PTR6_OUT, PTR6_IN_PU), + PINMUX_DATA(PTR5_DATA, PTR5_IN, PTR5_OUT, PTR5_IN_PU), + PINMUX_DATA(PTR4_DATA, PTR4_IN, PTR4_OUT, PTR4_IN_PU), + PINMUX_DATA(PTR3_DATA, PTR3_IN, PTR3_IN_PU), + PINMUX_DATA(PTR2_DATA, PTR2_IN, PTR2_IN_PU), + PINMUX_DATA(PTR1_DATA, PTR1_IN, PTR1_OUT, PTR1_IN_PU), + PINMUX_DATA(PTR0_DATA, PTR0_IN, PTR0_OUT, PTR0_IN_PU), + + /* PTS GPIO */ + PINMUX_DATA(PTS6_DATA, PTS6_IN, PTS6_OUT, PTS6_IN_PU), + PINMUX_DATA(PTS5_DATA, PTS5_IN, PTS5_OUT, PTS5_IN_PU), + PINMUX_DATA(PTS4_DATA, PTS4_IN, PTS4_OUT, PTS4_IN_PU), + PINMUX_DATA(PTS3_DATA, PTS3_IN, PTS3_OUT, PTS3_IN_PU), + PINMUX_DATA(PTS2_DATA, PTS2_IN, PTS2_OUT, PTS2_IN_PU), + PINMUX_DATA(PTS1_DATA, PTS1_IN, PTS1_OUT, PTS1_IN_PU), + PINMUX_DATA(PTS0_DATA, PTS0_IN, PTS0_OUT, PTS0_IN_PU), + + /* PTT GPIO */ + PINMUX_DATA(PTT7_DATA, PTT7_IN, PTT7_OUT, PTT7_IN_PU), + PINMUX_DATA(PTT6_DATA, PTT6_IN, PTT6_OUT, PTT6_IN_PU), + PINMUX_DATA(PTT5_DATA, PTT5_IN, PTT5_OUT, PTT5_IN_PU), + PINMUX_DATA(PTT4_DATA, PTT4_IN, PTT4_OUT, PTT4_IN_PU), + PINMUX_DATA(PTT3_DATA, PTT3_IN, PTT3_OUT, PTT3_IN_PU), + PINMUX_DATA(PTT2_DATA, PTT2_IN, PTT2_OUT, PTT2_IN_PU), + PINMUX_DATA(PTT1_DATA, PTT1_IN, PTT1_OUT, PTT1_IN_PU), + PINMUX_DATA(PTT0_DATA, PTT0_IN, PTT0_OUT, PTT0_IN_PU), + + /* PTU GPIO */ + PINMUX_DATA(PTU7_DATA, PTU7_IN, PTU7_OUT, PTU7_IN_PU), + PINMUX_DATA(PTU6_DATA, PTU6_IN, PTU6_OUT, PTU6_IN_PU), + PINMUX_DATA(PTU5_DATA, PTU5_IN, PTU5_OUT, PTU5_IN_PU), + PINMUX_DATA(PTU4_DATA, PTU4_IN, PTU4_OUT, PTU4_IN_PU), + PINMUX_DATA(PTU3_DATA, PTU3_IN, PTU3_OUT, PTU3_IN_PU), + PINMUX_DATA(PTU2_DATA, PTU2_IN, PTU2_OUT, PTU2_IN_PU), + PINMUX_DATA(PTU1_DATA, PTU1_IN, PTU1_OUT, PTU1_IN_PU), + PINMUX_DATA(PTU0_DATA, PTU0_IN, PTU0_OUT, PTU0_IN_PU), + + /* PTV GPIO */ + PINMUX_DATA(PTV7_DATA, PTV7_IN, PTV7_OUT, PTV7_IN_PU), + PINMUX_DATA(PTV6_DATA, PTV6_IN, PTV6_OUT, PTV6_IN_PU), + PINMUX_DATA(PTV5_DATA, PTV5_IN, PTV5_OUT, PTV5_IN_PU), + PINMUX_DATA(PTV4_DATA, PTV4_IN, PTV4_OUT, PTV4_IN_PU), + PINMUX_DATA(PTV3_DATA, PTV3_IN, PTV3_OUT, PTV3_IN_PU), + PINMUX_DATA(PTV2_DATA, PTV2_IN, PTV2_OUT, PTV2_IN_PU), + PINMUX_DATA(PTV1_DATA, PTV1_IN, PTV1_OUT, PTV1_IN_PU), + PINMUX_DATA(PTV0_DATA, PTV0_IN, PTV0_OUT, PTV0_IN_PU), + + /* PTW GPIO */ + PINMUX_DATA(PTW7_DATA, PTW7_IN, PTW7_OUT, PTW7_IN_PU), + PINMUX_DATA(PTW6_DATA, PTW6_IN, PTW6_OUT, PTW6_IN_PU), + PINMUX_DATA(PTW5_DATA, PTW5_IN, PTW5_OUT, PTW5_IN_PU), + PINMUX_DATA(PTW4_DATA, PTW4_IN, PTW4_OUT, PTW4_IN_PU), + PINMUX_DATA(PTW3_DATA, PTW3_IN, PTW3_OUT, PTW3_IN_PU), + PINMUX_DATA(PTW2_DATA, PTW2_IN, PTW2_OUT, PTW2_IN_PU), + PINMUX_DATA(PTW1_DATA, PTW1_IN, PTW1_OUT, PTW1_IN_PU), + PINMUX_DATA(PTW0_DATA, PTW0_IN, PTW0_OUT, PTW0_IN_PU), + + /* PTX GPIO */ + PINMUX_DATA(PTX7_DATA, PTX7_IN, PTX7_OUT, PTX7_IN_PU), + PINMUX_DATA(PTX6_DATA, PTX6_IN, PTX6_OUT, PTX6_IN_PU), + PINMUX_DATA(PTX5_DATA, PTX5_IN, PTX5_OUT, PTX5_IN_PU), + PINMUX_DATA(PTX4_DATA, PTX4_IN, PTX4_OUT, PTX4_IN_PU), + PINMUX_DATA(PTX3_DATA, PTX3_IN, PTX3_OUT, PTX3_IN_PU), + PINMUX_DATA(PTX2_DATA, PTX2_IN, PTX2_OUT, PTX2_IN_PU), + PINMUX_DATA(PTX1_DATA, PTX1_IN, PTX1_OUT, PTX1_IN_PU), + PINMUX_DATA(PTX0_DATA, PTX0_IN, PTX0_OUT, PTX0_IN_PU), + + /* PTY GPIO */ + PINMUX_DATA(PTY7_DATA, PTY7_IN, PTY7_OUT, PTY7_IN_PU), + PINMUX_DATA(PTY6_DATA, PTY6_IN, PTY6_OUT, PTY6_IN_PU), + PINMUX_DATA(PTY5_DATA, PTY5_IN, PTY5_OUT, PTY5_IN_PU), + PINMUX_DATA(PTY4_DATA, PTY4_IN, PTY4_OUT, PTY4_IN_PU), + PINMUX_DATA(PTY3_DATA, PTY3_IN, PTY3_OUT, PTY3_IN_PU), + PINMUX_DATA(PTY2_DATA, PTY2_IN, PTY2_OUT, PTY2_IN_PU), + PINMUX_DATA(PTY1_DATA, PTY1_IN, PTY1_OUT, PTY1_IN_PU), + PINMUX_DATA(PTY0_DATA, PTY0_IN, PTY0_OUT, PTY0_IN_PU), + + /* PTZ GPIO */ + PINMUX_DATA(PTZ7_DATA, PTZ7_IN, PTZ7_OUT, PTZ7_IN_PU), + PINMUX_DATA(PTZ6_DATA, PTZ6_IN, PTZ6_OUT, PTZ6_IN_PU), + PINMUX_DATA(PTZ5_DATA, PTZ5_IN, PTZ5_OUT, PTZ5_IN_PU), + PINMUX_DATA(PTZ4_DATA, PTZ4_IN, PTZ4_OUT, PTZ4_IN_PU), + PINMUX_DATA(PTZ3_DATA, PTZ3_IN, PTZ3_OUT, PTZ3_IN_PU), + PINMUX_DATA(PTZ2_DATA, PTZ2_IN, PTZ2_OUT, PTZ2_IN_PU), + PINMUX_DATA(PTZ1_DATA, PTZ1_IN, PTZ1_OUT, PTZ1_IN_PU), + PINMUX_DATA(PTZ0_DATA, PTZ0_IN, PTZ0_OUT, PTZ0_IN_PU), + + /* PTA FN */ + PINMUX_DATA(D23_MARK, PSA15_0, PSA14_0, PTA7_FN), + PINMUX_DATA(D22_MARK, PSA15_0, PSA14_0, PTA6_FN), + PINMUX_DATA(D21_MARK, PSA15_0, PSA14_0, PTA5_FN), + PINMUX_DATA(D20_MARK, PSA15_0, PSA14_0, PTA4_FN), + PINMUX_DATA(D19_MARK, PSA15_0, PSA14_0, PTA3_FN), + PINMUX_DATA(D18_MARK, PSA15_0, PSA14_0, PTA2_FN), + PINMUX_DATA(D17_MARK, PSA15_0, PSA14_0, PTA1_FN), + PINMUX_DATA(D16_MARK, PSA15_0, PSA14_0, PTA0_FN), + + PINMUX_DATA(KEYOUT2_MARK, PSA15_0, PSA14_1, PTA7_FN), + PINMUX_DATA(KEYOUT1_MARK, PSA15_0, PSA14_1, PTA6_FN), + PINMUX_DATA(KEYOUT0_MARK, PSA15_0, PSA14_1, PTA5_FN), + PINMUX_DATA(KEYIN4_MARK, PSA15_0, PSA14_1, PTA4_FN), + PINMUX_DATA(KEYIN3_MARK, PSA15_0, PSA14_1, PTA3_FN), + PINMUX_DATA(KEYIN2_MARK, PSA15_0, PSA14_1, PTA2_FN), + PINMUX_DATA(KEYIN1_MARK, PSA15_0, PSA14_1, PTA1_FN), + PINMUX_DATA(KEYIN0_MARK, PSA15_0, PSA14_1, PTA0_FN), + + PINMUX_DATA(IDED15_MARK, PSA15_1, PSA14_0, PTA7_FN), + PINMUX_DATA(IDED14_MARK, PSA15_1, PSA14_0, PTA6_FN), + PINMUX_DATA(IDED13_MARK, PSA15_1, PSA14_0, PTA5_FN), + PINMUX_DATA(IDED12_MARK, PSA15_1, PSA14_0, PTA4_FN), + PINMUX_DATA(IDED11_MARK, PSA15_1, PSA14_0, PTA3_FN), + PINMUX_DATA(IDED10_MARK, PSA15_1, PSA14_0, PTA2_FN), + PINMUX_DATA(IDED9_MARK, PSA15_1, PSA14_0, PTA1_FN), + PINMUX_DATA(IDED8_MARK, PSA15_1, PSA14_0, PTA0_FN), + + /* PTB FN */ + PINMUX_DATA(D31_MARK, PSE15_0, PSE14_0, PTB7_FN), + PINMUX_DATA(D30_MARK, PSE15_0, PSE14_0, PTB6_FN), + PINMUX_DATA(D29_MARK, PSE11_0, PTB5_FN), + PINMUX_DATA(D28_MARK, PSE11_0, PTB4_FN), + PINMUX_DATA(D27_MARK, PSE11_0, PTB3_FN), + PINMUX_DATA(D26_MARK, PSA15_0, PSA14_0, PTB2_FN), + PINMUX_DATA(D25_MARK, PSA15_0, PSA14_0, PTB1_FN), + PINMUX_DATA(D24_MARK, PSA15_0, PSA14_0, PTB0_FN), + + PINMUX_DATA(IDEA1_MARK, PSE15_1, PSE14_0, PTB7_FN), + PINMUX_DATA(IDEA0_MARK, PSE15_1, PSE14_0, PTB6_FN), + PINMUX_DATA(IODREQ_MARK, PSE11_1, PTB5_FN), + PINMUX_DATA(IDECS0_MARK, PSE11_1, PTB4_FN), + PINMUX_DATA(IDECS1_MARK, PSE11_1, PTB3_FN), + PINMUX_DATA(IDEIORD_MARK, PSA15_1, PSA14_0, PTB2_FN), + PINMUX_DATA(IDEIOWR_MARK, PSA15_1, PSA14_0, PTB1_FN), + PINMUX_DATA(IDEINT_MARK, PSA15_1, PSA14_0, PTB0_FN), + + PINMUX_DATA(TPUTO1_MARK, PSE15_0, PSE14_1, PTB7_FN), + PINMUX_DATA(TPUTO0_MARK, PSE15_0, PSE14_1, PTB6_FN), + + PINMUX_DATA(KEYOUT5_IN5_MARK, PSA15_0, PSA14_1, PTB2_FN), + PINMUX_DATA(KEYOUT4_IN6_MARK, PSA15_0, PSA14_1, PTB1_FN), + PINMUX_DATA(KEYOUT3_MARK, PSA15_0, PSA14_1, PTB0_FN), + + /* PTC FN */ + PINMUX_DATA(LCDD7_MARK, PSD5_0, PTC7_FN), + PINMUX_DATA(LCDD6_MARK, PSD5_0, PTC6_FN), + PINMUX_DATA(LCDD5_MARK, PSD5_0, PTC5_FN), + PINMUX_DATA(LCDD4_MARK, PSD5_0, PTC4_FN), + PINMUX_DATA(LCDD3_MARK, PSD5_0, PTC3_FN), + PINMUX_DATA(LCDD2_MARK, PSD5_0, PTC2_FN), + PINMUX_DATA(LCDD1_MARK, PSD5_0, PTC1_FN), + PINMUX_DATA(LCDD0_MARK, PSD5_0, PTC0_FN), + + /* PTD FN */ + PINMUX_DATA(LCDD15_MARK, PSD5_0, PTD7_FN), + PINMUX_DATA(LCDD14_MARK, PSD5_0, PTD6_FN), + PINMUX_DATA(LCDD13_MARK, PSD5_0, PTD5_FN), + PINMUX_DATA(LCDD12_MARK, PSD5_0, PTD4_FN), + PINMUX_DATA(LCDD11_MARK, PSD5_0, PTD3_FN), + PINMUX_DATA(LCDD10_MARK, PSD5_0, PTD2_FN), + PINMUX_DATA(LCDD9_MARK, PSD5_0, PTD1_FN), + PINMUX_DATA(LCDD8_MARK, PSD5_0, PTD0_FN), + + /* PTE FN */ + PINMUX_DATA(FSIMCKB_MARK, PTE7_FN), + PINMUX_DATA(FSIMCKA_MARK, PTE6_FN), + + PINMUX_DATA(LCDD21_MARK, PSC5_0, PSC4_0, PTE5_FN), + PINMUX_DATA(LCDD20_MARK, PSD3_0, PSD2_0, PTE4_FN), + PINMUX_DATA(LCDD19_MARK, PSA3_0, PSA2_0, PTE3_FN), + PINMUX_DATA(LCDD18_MARK, PSA3_0, PSA2_0, PTE2_FN), + PINMUX_DATA(LCDD17_MARK, PSD5_0, PTE1_FN), + PINMUX_DATA(LCDD16_MARK, PSD5_0, PTE0_FN), + + PINMUX_DATA(SCIF2_L_TXD_MARK, PSC5_0, PSC4_1, PTE5_FN), + PINMUX_DATA(SCIF4_SCK_MARK, PSD3_0, PSD2_1, PTE4_FN), + PINMUX_DATA(SCIF4_RXD_MARK, PSA3_0, PSA2_1, PTE3_FN), + PINMUX_DATA(SCIF4_TXD_MARK, PSA3_0, PSA2_1, PTE2_FN), + + /* PTF FN */ + PINMUX_DATA(LCDVSYN_MARK, PSD8_0, PTF7_FN), + PINMUX_DATA(LCDDISP_MARK, PSD10_0, PSD9_0, PTF6_FN), + PINMUX_DATA(LCDHSYN_MARK, PSD10_0, PSD9_0, PTF5_FN), + PINMUX_DATA(LCDDON_MARK, PSD8_0, PTF4_FN), + PINMUX_DATA(LCDDCK_MARK, PSD10_0, PSD9_0, PTF3_FN), + PINMUX_DATA(LCDVEPWC_MARK, PSA6_0, PTF2_FN), + PINMUX_DATA(LCDD23_MARK, PSC7_0, PSC6_0, PTF1_FN), + PINMUX_DATA(LCDD22_MARK, PSC5_0, PSC4_0, PTF0_FN), + + PINMUX_DATA(LCDRS_MARK, PSD10_0, PSD9_1, PTF6_FN), + PINMUX_DATA(LCDCS_MARK, PSD10_0, PSD9_1, PTF5_FN), + PINMUX_DATA(LCDWR_MARK, PSD10_0, PSD9_1, PTF3_FN), + + PINMUX_DATA(SCIF0_TXD_MARK, PSA6_1, PTF2_FN), + PINMUX_DATA(SCIF2_L_SCK_MARK, PSC7_0, PSC6_1, PTF1_FN), + PINMUX_DATA(SCIF2_L_RXD_MARK, PSC5_0, PSC4_1, PTF0_FN), + + /* PTG FN */ + PINMUX_DATA(AUDCK_MARK, PTG5_FN), + PINMUX_DATA(AUDSYNC_MARK, PTG4_FN), + PINMUX_DATA(AUDATA3_MARK, PTG3_FN), + PINMUX_DATA(AUDATA2_MARK, PTG2_FN), + PINMUX_DATA(AUDATA1_MARK, PTG1_FN), + PINMUX_DATA(AUDATA0_MARK, PTG0_FN), + + /* PTH FN */ + PINMUX_DATA(VIO0_VD_MARK, PTH7_FN), + PINMUX_DATA(VIO0_CLK_MARK, PTH6_FN), + PINMUX_DATA(VIO0_D7_MARK, PTH5_FN), + PINMUX_DATA(VIO0_D6_MARK, PTH4_FN), + PINMUX_DATA(VIO0_D5_MARK, PTH3_FN), + PINMUX_DATA(VIO0_D4_MARK, PTH2_FN), + PINMUX_DATA(VIO0_D3_MARK, PTH1_FN), + PINMUX_DATA(VIO0_D2_MARK, PTH0_FN), + + /* PTJ FN */ + PINMUX_DATA(PDSTATUS_MARK, PTJ7_FN), + PINMUX_DATA(STATUS2_MARK, PTJ6_FN), + PINMUX_DATA(STATUS0_MARK, PTJ5_FN), + PINMUX_DATA(A25_MARK, PSA8_0, PTJ3_FN), + PINMUX_DATA(BS_MARK, PSA8_1, PTJ3_FN), + PINMUX_DATA(A24_MARK, PTJ2_FN), + PINMUX_DATA(A23_MARK, PTJ1_FN), + PINMUX_DATA(A22_MARK, PTJ0_FN), + + /* PTK FN */ + PINMUX_DATA(VIO1_D5_MARK, PSB7_0, PSB6_0, PTK7_FN), + PINMUX_DATA(VIO1_D4_MARK, PSB7_0, PSB6_0, PTK6_FN), + PINMUX_DATA(VIO1_D3_MARK, PSB7_0, PSB6_0, PTK5_FN), + PINMUX_DATA(VIO1_D2_MARK, PSB7_0, PSB6_0, PTK4_FN), + PINMUX_DATA(VIO1_D1_MARK, PSB7_0, PSB6_0, PTK3_FN), + PINMUX_DATA(VIO1_D0_MARK, PSB7_0, PSB6_0, PTK2_FN), + + PINMUX_DATA(VIO0_D13_MARK, PSB7_0, PSB6_1, PTK7_FN), + PINMUX_DATA(VIO0_D12_MARK, PSB7_0, PSB6_1, PTK6_FN), + PINMUX_DATA(VIO0_D11_MARK, PSB7_0, PSB6_1, PTK5_FN), + PINMUX_DATA(VIO0_D10_MARK, PSB7_0, PSB6_1, PTK4_FN), + PINMUX_DATA(VIO0_D9_MARK, PSB7_0, PSB6_1, PTK3_FN), + PINMUX_DATA(VIO0_D8_MARK, PSB7_0, PSB6_1, PTK2_FN), + + PINMUX_DATA(IDED5_MARK, PSB7_1, PSB6_0, PTK7_FN), + PINMUX_DATA(IDED4_MARK, PSB7_1, PSB6_0, PTK6_FN), + PINMUX_DATA(IDED3_MARK, PSB7_1, PSB6_0, PTK5_FN), + PINMUX_DATA(IDED2_MARK, PSB7_1, PSB6_0, PTK4_FN), + PINMUX_DATA(IDED1_MARK, PSB7_1, PSB6_0, PTK3_FN), + PINMUX_DATA(IDED0_MARK, PSB7_1, PSB6_0, PTK2_FN), + + PINMUX_DATA(VIO0_FLD_MARK, PTK1_FN), + PINMUX_DATA(VIO0_HD_MARK, PTK0_FN), + + /* PTL FN */ + PINMUX_DATA(DV_D5_MARK, PSB9_0, PSB8_0, PTL7_FN), + PINMUX_DATA(DV_D4_MARK, PSB9_0, PSB8_0, PTL6_FN), + PINMUX_DATA(DV_D3_MARK, PSE7_0, PSE6_0, PTL5_FN), + PINMUX_DATA(DV_D2_MARK, PSC9_0, PSC8_0, PTL4_FN), + PINMUX_DATA(DV_D1_MARK, PSC9_0, PSC8_0, PTL3_FN), + PINMUX_DATA(DV_D0_MARK, PSC9_0, PSC8_0, PTL2_FN), + PINMUX_DATA(DV_D15_MARK, PSD4_0, PTL1_FN), + PINMUX_DATA(DV_D14_MARK, PSE5_0, PSE4_0, PTL0_FN), + + PINMUX_DATA(SCIF3_V_SCK_MARK, PSB9_0, PSB8_1, PTL7_FN), + PINMUX_DATA(SCIF3_V_RXD_MARK, PSB9_0, PSB8_1, PTL6_FN), + PINMUX_DATA(SCIF3_V_TXD_MARK, PSE7_0, PSE6_1, PTL5_FN), + PINMUX_DATA(SCIF1_SCK_MARK, PSC9_0, PSC8_1, PTL4_FN), + PINMUX_DATA(SCIF1_RXD_MARK, PSC9_0, PSC8_1, PTL3_FN), + PINMUX_DATA(SCIF1_TXD_MARK, PSC9_0, PSC8_1, PTL2_FN), + + PINMUX_DATA(RMII_RXD0_MARK, PSB9_1, PSB8_0, PTL7_FN), + PINMUX_DATA(RMII_RXD1_MARK, PSB9_1, PSB8_0, PTL6_FN), + PINMUX_DATA(RMII_REF_CLK_MARK, PSE7_1, PSE6_0, PTL5_FN), + PINMUX_DATA(RMII_TX_EN_MARK, PSC9_1, PSC8_0, PTL4_FN), + PINMUX_DATA(RMII_TXD0_MARK, PSC9_1, PSC8_0, PTL3_FN), + PINMUX_DATA(RMII_TXD1_MARK, PSC9_1, PSC8_0, PTL2_FN), + + PINMUX_DATA(MSIOF0_MCK_MARK, PSE5_0, PSE4_1, PTL0_FN), + + /* PTM FN */ + PINMUX_DATA(DV_D13_MARK, PSC13_0, PSC12_0, PTM7_FN), + PINMUX_DATA(DV_D12_MARK, PSC13_0, PSC12_0, PTM6_FN), + PINMUX_DATA(DV_D11_MARK, PSC13_0, PSC12_0, PTM5_FN), + PINMUX_DATA(DV_D10_MARK, PSC13_0, PSC12_0, PTM4_FN), + PINMUX_DATA(DV_D9_MARK, PSC11_0, PSC10_0, PTM3_FN), + PINMUX_DATA(DV_D8_MARK, PSC11_0, PSC10_0, PTM2_FN), + + PINMUX_DATA(MSIOF0_TSCK_MARK, PSC13_0, PSC12_1, PTM7_FN), + PINMUX_DATA(MSIOF0_RXD_MARK, PSC13_0, PSC12_1, PTM6_FN), + PINMUX_DATA(MSIOF0_TXD_MARK, PSC13_0, PSC12_1, PTM5_FN), + PINMUX_DATA(MSIOF0_TSYNC_MARK, PSC13_0, PSC12_1, PTM4_FN), + PINMUX_DATA(MSIOF0_SS1_MARK, PSC11_0, PSC10_1, PTM3_FN), + PINMUX_DATA(MSIOF0_RSCK_MARK, PSC11_1, PSC10_0, PTM3_FN), + PINMUX_DATA(MSIOF0_SS2_MARK, PSC11_0, PSC10_1, PTM2_FN), + PINMUX_DATA(MSIOF0_RSYNC_MARK, PSC11_1, PSC10_0, PTM2_FN), + + PINMUX_DATA(LCDVCPWC_MARK, PSA6_0, PTM1_FN), + PINMUX_DATA(LCDRD_MARK, PSA7_0, PTM0_FN), + + PINMUX_DATA(SCIF0_RXD_MARK, PSA6_1, PTM1_FN), + PINMUX_DATA(SCIF0_SCK_MARK, PSA7_1, PTM0_FN), + + /* PTN FN */ + PINMUX_DATA(VIO0_D1_MARK, PTN7_FN), + PINMUX_DATA(VIO0_D0_MARK, PTN6_FN), + + PINMUX_DATA(DV_CLKI_MARK, PSD11_0, PTN5_FN), + PINMUX_DATA(DV_CLK_MARK, PSD13_0, PSD12_0, PTN4_FN), + PINMUX_DATA(DV_VSYNC_MARK, PSD15_0, PSD14_0, PTN3_FN), + PINMUX_DATA(DV_HSYNC_MARK, PSB5_0, PSB4_0, PTN2_FN), + PINMUX_DATA(DV_D7_MARK, PSB3_0, PSB2_0, PTN1_FN), + PINMUX_DATA(DV_D6_MARK, PSB1_0, PSB0_0, PTN0_FN), + + PINMUX_DATA(SCIF2_V_SCK_MARK, PSD13_0, PSD12_1, PTN4_FN), + PINMUX_DATA(SCIF2_V_RXD_MARK, PSD15_0, PSD14_1, PTN3_FN), + PINMUX_DATA(SCIF2_V_TXD_MARK, PSB5_0, PSB4_1, PTN2_FN), + PINMUX_DATA(SCIF3_V_CTS_MARK, PSB3_0, PSB2_1, PTN1_FN), + PINMUX_DATA(SCIF3_V_RTS_MARK, PSB1_0, PSB0_1, PTN0_FN), + + PINMUX_DATA(RMII_RX_ER_MARK, PSB3_1, PSB2_0, PTN1_FN), + PINMUX_DATA(RMII_CRS_DV_MARK, PSB1_1, PSB0_0, PTN0_FN), + + /* PTQ FN */ + PINMUX_DATA(D7_MARK, PTQ7_FN), + PINMUX_DATA(D6_MARK, PTQ6_FN), + PINMUX_DATA(D5_MARK, PTQ5_FN), + PINMUX_DATA(D4_MARK, PTQ4_FN), + PINMUX_DATA(D3_MARK, PTQ3_FN), + PINMUX_DATA(D2_MARK, PTQ2_FN), + PINMUX_DATA(D1_MARK, PTQ1_FN), + PINMUX_DATA(D0_MARK, PTQ0_FN), + + /* PTR FN */ + PINMUX_DATA(CS6B_CE1B_MARK, PTR7_FN), + PINMUX_DATA(CS6A_CE2B_MARK, PTR6_FN), + PINMUX_DATA(CS5B_CE1A_MARK, PTR5_FN), + PINMUX_DATA(CS5A_CE2A_MARK, PTR4_FN), + PINMUX_DATA(IOIS16_MARK, PSA5_0, PTR3_FN), + PINMUX_DATA(WAIT_MARK, PTR2_FN), + PINMUX_DATA(WE3_ICIOWR_MARK, PSA1_0, PSA0_0, PTR1_FN), + PINMUX_DATA(WE2_ICIORD_MARK, PSD1_0, PSD0_0, PTR0_FN), + + PINMUX_DATA(LCDLCLK_MARK, PSA5_1, PTR3_FN), + + PINMUX_DATA(IDEA2_MARK, PSD1_1, PSD0_0, PTR0_FN), + + PINMUX_DATA(TPUTO3_MARK, PSA1_0, PSA0_1, PTR1_FN), + PINMUX_DATA(TPUTI3_MARK, PSA1_1, PSA0_0, PTR1_FN), + PINMUX_DATA(TPUTO2_MARK, PSD1_0, PSD0_1, PTR0_FN), + + /* PTS FN */ + PINMUX_DATA(VIO_CKO_MARK, PTS6_FN), + + PINMUX_DATA(TPUTI2_MARK, PSE9_0, PSE8_1, PTS5_FN), + + PINMUX_DATA(IDEIORDY_MARK, PSE9_1, PSE8_0, PTS5_FN), + + PINMUX_DATA(VIO1_FLD_MARK, PSE9_0, PSE8_0, PTS5_FN), + PINMUX_DATA(VIO1_HD_MARK, PSA10_0, PTS4_FN), + PINMUX_DATA(VIO1_VD_MARK, PSA9_0, PTS3_FN), + PINMUX_DATA(VIO1_CLK_MARK, PSA9_0, PTS2_FN), + PINMUX_DATA(VIO1_D7_MARK, PSB7_0, PSB6_0, PTS1_FN), + PINMUX_DATA(VIO1_D6_MARK, PSB7_0, PSB6_0, PTS0_FN), + + PINMUX_DATA(SCIF5_SCK_MARK, PSA10_1, PTS4_FN), + PINMUX_DATA(SCIF5_RXD_MARK, PSA9_1, PTS3_FN), + PINMUX_DATA(SCIF5_TXD_MARK, PSA9_1, PTS2_FN), + + PINMUX_DATA(VIO0_D15_MARK, PSB7_0, PSB6_1, PTS1_FN), + PINMUX_DATA(VIO0_D14_MARK, PSB7_0, PSB6_1, PTS0_FN), + + PINMUX_DATA(IDED7_MARK, PSB7_1, PSB6_0, PTS1_FN), + PINMUX_DATA(IDED6_MARK, PSB7_1, PSB6_0, PTS0_FN), + + /* PTT FN */ + PINMUX_DATA(D15_MARK, PTT7_FN), + PINMUX_DATA(D14_MARK, PTT6_FN), + PINMUX_DATA(D13_MARK, PTT5_FN), + PINMUX_DATA(D12_MARK, PTT4_FN), + PINMUX_DATA(D11_MARK, PTT3_FN), + PINMUX_DATA(D10_MARK, PTT2_FN), + PINMUX_DATA(D9_MARK, PTT1_FN), + PINMUX_DATA(D8_MARK, PTT0_FN), + + /* PTU FN */ + PINMUX_DATA(DMAC_DACK0_MARK, PTU7_FN), + PINMUX_DATA(DMAC_DREQ0_MARK, PTU6_FN), + + PINMUX_DATA(FSIOASD_MARK, PSE1_0, PTU5_FN), + PINMUX_DATA(FSIIABCK_MARK, PSE1_0, PTU4_FN), + PINMUX_DATA(FSIIALRCK_MARK, PSE1_0, PTU3_FN), + PINMUX_DATA(FSIOABCK_MARK, PSE1_0, PTU2_FN), + PINMUX_DATA(FSIOALRCK_MARK, PSE1_0, PTU1_FN), + PINMUX_DATA(CLKAUDIOAO_MARK, PSE0_0, PTU0_FN), + + /* PTV FN */ + PINMUX_DATA(FSIIBSD_MARK, PSD7_0, PSD6_0, PTV7_FN), + PINMUX_DATA(FSIOBSD_MARK, PSD7_0, PSD6_0, PTV6_FN), + PINMUX_DATA(FSIIBBCK_MARK, PSC15_0, PSC14_0, PTV5_FN), + PINMUX_DATA(FSIIBLRCK_MARK, PSC15_0, PSC14_0, PTV4_FN), + PINMUX_DATA(FSIOBBCK_MARK, PSC15_0, PSC14_0, PTV3_FN), + PINMUX_DATA(FSIOBLRCK_MARK, PSC15_0, PSC14_0, PTV2_FN), + PINMUX_DATA(CLKAUDIOBO_MARK, PSE3_0, PSE2_0, PTV1_FN), + PINMUX_DATA(FSIIASD_MARK, PSE10_0, PTV0_FN), + + PINMUX_DATA(MSIOF1_SS2_MARK, PSD7_0, PSD6_1, PTV7_FN), + PINMUX_DATA(MSIOF1_RSYNC_MARK, PSD7_1, PSD6_0, PTV7_FN), + PINMUX_DATA(MSIOF1_SS1_MARK, PSD7_0, PSD6_1, PTV6_FN), + PINMUX_DATA(MSIOF1_RSCK_MARK, PSD7_1, PSD6_0, PTV6_FN), + PINMUX_DATA(MSIOF1_RXD_MARK, PSC15_0, PSC14_1, PTV5_FN), + PINMUX_DATA(MSIOF1_TSYNC_MARK, PSC15_0, PSC14_1, PTV4_FN), + PINMUX_DATA(MSIOF1_TSCK_MARK, PSC15_0, PSC14_1, PTV3_FN), + PINMUX_DATA(MSIOF1_TXD_MARK, PSC15_0, PSC14_1, PTV2_FN), + PINMUX_DATA(MSIOF1_MCK_MARK, PSE3_0, PSE2_1, PTV1_FN), + + /* PTW FN */ + PINMUX_DATA(MMC_D7_MARK, PSE13_0, PSE12_0, PTW7_FN), + PINMUX_DATA(MMC_D6_MARK, PSE13_0, PSE12_0, PTW6_FN), + PINMUX_DATA(MMC_D5_MARK, PSE13_0, PSE12_0, PTW5_FN), + PINMUX_DATA(MMC_D4_MARK, PSE13_0, PSE12_0, PTW4_FN), + PINMUX_DATA(MMC_D3_MARK, PSA13_0, PTW3_FN), + PINMUX_DATA(MMC_D2_MARK, PSA13_0, PTW2_FN), + PINMUX_DATA(MMC_D1_MARK, PSA13_0, PTW1_FN), + PINMUX_DATA(MMC_D0_MARK, PSA13_0, PTW0_FN), + + PINMUX_DATA(SDHI1CD_MARK, PSE13_0, PSE12_1, PTW7_FN), + PINMUX_DATA(SDHI1WP_MARK, PSE13_0, PSE12_1, PTW6_FN), + PINMUX_DATA(SDHI1D3_MARK, PSE13_0, PSE12_1, PTW5_FN), + PINMUX_DATA(SDHI1D2_MARK, PSE13_0, PSE12_1, PTW4_FN), + PINMUX_DATA(SDHI1D1_MARK, PSA13_1, PTW3_FN), + PINMUX_DATA(SDHI1D0_MARK, PSA13_1, PTW2_FN), + PINMUX_DATA(SDHI1CMD_MARK, PSA13_1, PTW1_FN), + PINMUX_DATA(SDHI1CLK_MARK, PSA13_1, PTW0_FN), + + PINMUX_DATA(IODACK_MARK, PSE13_1, PSE12_0, PTW7_FN), + PINMUX_DATA(IDERST_MARK, PSE13_1, PSE12_0, PTW6_FN), + PINMUX_DATA(EXBUF_ENB_MARK, PSE13_1, PSE12_0, PTW5_FN), + PINMUX_DATA(DIRECTION_MARK, PSE13_1, PSE12_0, PTW4_FN), + + /* PTX FN */ + PINMUX_DATA(DMAC_DACK1_MARK, PSA12_0, PTX7_FN), + PINMUX_DATA(DMAC_DREQ1_MARK, PSA12_0, PTX6_FN), + + PINMUX_DATA(IRDA_OUT_MARK, PSA12_1, PTX7_FN), + PINMUX_DATA(IRDA_IN_MARK, PSA12_1, PTX6_FN), + + PINMUX_DATA(TSIF_TS0_SDAT_MARK, PSC0_0, PTX5_FN), + PINMUX_DATA(TSIF_TS0_SCK_MARK, PSC1_0, PTX4_FN), + PINMUX_DATA(TSIF_TS0_SDEN_MARK, PSC2_0, PTX3_FN), + PINMUX_DATA(TSIF_TS0_SPSYNC_MARK, PTX2_FN), + + PINMUX_DATA(LNKSTA_MARK, PSC0_1, PTX5_FN), + PINMUX_DATA(MDIO_MARK, PSC1_1, PTX4_FN), + PINMUX_DATA(MDC_MARK, PSC2_1, PTX3_FN), + + PINMUX_DATA(MMC_CLK_MARK, PTX1_FN), + PINMUX_DATA(MMC_CMD_MARK, PTX0_FN), + + /* PTY FN */ + PINMUX_DATA(SDHI0CD_MARK, PTY7_FN), + PINMUX_DATA(SDHI0WP_MARK, PTY6_FN), + PINMUX_DATA(SDHI0D3_MARK, PTY5_FN), + PINMUX_DATA(SDHI0D2_MARK, PTY4_FN), + PINMUX_DATA(SDHI0D1_MARK, PTY3_FN), + PINMUX_DATA(SDHI0D0_MARK, PTY2_FN), + PINMUX_DATA(SDHI0CMD_MARK, PTY1_FN), + PINMUX_DATA(SDHI0CLK_MARK, PTY0_FN), + + /* PTZ FN */ + PINMUX_DATA(INTC_IRQ7_MARK, PSB10_0, PTZ7_FN), + PINMUX_DATA(INTC_IRQ6_MARK, PSB11_0, PTZ6_FN), + PINMUX_DATA(INTC_IRQ5_MARK, PSB12_0, PTZ5_FN), + PINMUX_DATA(INTC_IRQ4_MARK, PSB13_0, PTZ4_FN), + PINMUX_DATA(INTC_IRQ3_MARK, PSB14_0, PTZ3_FN), + PINMUX_DATA(INTC_IRQ2_MARK, PTZ2_FN), + PINMUX_DATA(INTC_IRQ1_MARK, PTZ1_FN), + PINMUX_DATA(INTC_IRQ0_MARK, PTZ0_FN), + + PINMUX_DATA(SCIF3_I_CTS_MARK, PSB10_1, PTZ7_FN), + PINMUX_DATA(SCIF3_I_RTS_MARK, PSB11_1, PTZ6_FN), + PINMUX_DATA(SCIF3_I_SCK_MARK, PSB12_1, PTZ5_FN), + PINMUX_DATA(SCIF3_I_RXD_MARK, PSB13_1, PTZ4_FN), + PINMUX_DATA(SCIF3_I_TXD_MARK, PSB14_1, PTZ3_FN), +}; + +static struct pinmux_gpio pinmux_gpios[] = { + /* PTA */ + PINMUX_GPIO(GPIO_PTA7, PTA7_DATA), + PINMUX_GPIO(GPIO_PTA6, PTA6_DATA), + PINMUX_GPIO(GPIO_PTA5, PTA5_DATA), + PINMUX_GPIO(GPIO_PTA4, PTA4_DATA), + PINMUX_GPIO(GPIO_PTA3, PTA3_DATA), + PINMUX_GPIO(GPIO_PTA2, PTA2_DATA), + PINMUX_GPIO(GPIO_PTA1, PTA1_DATA), + PINMUX_GPIO(GPIO_PTA0, PTA0_DATA), + + /* PTB */ + PINMUX_GPIO(GPIO_PTB7, PTB7_DATA), + PINMUX_GPIO(GPIO_PTB6, PTB6_DATA), + PINMUX_GPIO(GPIO_PTB5, PTB5_DATA), + PINMUX_GPIO(GPIO_PTB4, PTB4_DATA), + PINMUX_GPIO(GPIO_PTB3, PTB3_DATA), + PINMUX_GPIO(GPIO_PTB2, PTB2_DATA), + PINMUX_GPIO(GPIO_PTB1, PTB1_DATA), + PINMUX_GPIO(GPIO_PTB0, PTB0_DATA), + + /* PTC */ + PINMUX_GPIO(GPIO_PTC7, PTC7_DATA), + PINMUX_GPIO(GPIO_PTC6, PTC6_DATA), + PINMUX_GPIO(GPIO_PTC5, PTC5_DATA), + PINMUX_GPIO(GPIO_PTC4, PTC4_DATA), + PINMUX_GPIO(GPIO_PTC3, PTC3_DATA), + PINMUX_GPIO(GPIO_PTC2, PTC2_DATA), + PINMUX_GPIO(GPIO_PTC1, PTC1_DATA), + PINMUX_GPIO(GPIO_PTC0, PTC0_DATA), + + /* PTD */ + PINMUX_GPIO(GPIO_PTD7, PTD7_DATA), + PINMUX_GPIO(GPIO_PTD6, PTD6_DATA), + PINMUX_GPIO(GPIO_PTD5, PTD5_DATA), + PINMUX_GPIO(GPIO_PTD4, PTD4_DATA), + PINMUX_GPIO(GPIO_PTD3, PTD3_DATA), + PINMUX_GPIO(GPIO_PTD2, PTD2_DATA), + PINMUX_GPIO(GPIO_PTD1, PTD1_DATA), + PINMUX_GPIO(GPIO_PTD0, PTD0_DATA), + + /* PTE */ + PINMUX_GPIO(GPIO_PTE7, PTE7_DATA), + PINMUX_GPIO(GPIO_PTE6, PTE6_DATA), + PINMUX_GPIO(GPIO_PTE5, PTE5_DATA), + PINMUX_GPIO(GPIO_PTE4, PTE4_DATA), + PINMUX_GPIO(GPIO_PTE3, PTE3_DATA), + PINMUX_GPIO(GPIO_PTE2, PTE2_DATA), + PINMUX_GPIO(GPIO_PTE1, PTE1_DATA), + PINMUX_GPIO(GPIO_PTE0, PTE0_DATA), + + /* PTF */ + PINMUX_GPIO(GPIO_PTF7, PTF7_DATA), + PINMUX_GPIO(GPIO_PTF6, PTF6_DATA), + PINMUX_GPIO(GPIO_PTF5, PTF5_DATA), + PINMUX_GPIO(GPIO_PTF4, PTF4_DATA), + PINMUX_GPIO(GPIO_PTF3, PTF3_DATA), + PINMUX_GPIO(GPIO_PTF2, PTF2_DATA), + PINMUX_GPIO(GPIO_PTF1, PTF1_DATA), + PINMUX_GPIO(GPIO_PTF0, PTF0_DATA), + + /* PTG */ + PINMUX_GPIO(GPIO_PTG5, PTG5_DATA), + PINMUX_GPIO(GPIO_PTG4, PTG4_DATA), + PINMUX_GPIO(GPIO_PTG3, PTG3_DATA), + PINMUX_GPIO(GPIO_PTG2, PTG2_DATA), + PINMUX_GPIO(GPIO_PTG1, PTG1_DATA), + PINMUX_GPIO(GPIO_PTG0, PTG0_DATA), + + /* PTH */ + PINMUX_GPIO(GPIO_PTH7, PTH7_DATA), + PINMUX_GPIO(GPIO_PTH6, PTH6_DATA), + PINMUX_GPIO(GPIO_PTH5, PTH5_DATA), + PINMUX_GPIO(GPIO_PTH4, PTH4_DATA), + PINMUX_GPIO(GPIO_PTH3, PTH3_DATA), + PINMUX_GPIO(GPIO_PTH2, PTH2_DATA), + PINMUX_GPIO(GPIO_PTH1, PTH1_DATA), + PINMUX_GPIO(GPIO_PTH0, PTH0_DATA), + + /* PTJ */ + PINMUX_GPIO(GPIO_PTJ7, PTJ7_DATA), + PINMUX_GPIO(GPIO_PTJ6, PTJ6_DATA), + PINMUX_GPIO(GPIO_PTJ5, PTJ5_DATA), + PINMUX_GPIO(GPIO_PTJ3, PTJ3_DATA), + PINMUX_GPIO(GPIO_PTJ2, PTJ2_DATA), + PINMUX_GPIO(GPIO_PTJ1, PTJ1_DATA), + PINMUX_GPIO(GPIO_PTJ0, PTJ0_DATA), + + /* PTK */ + PINMUX_GPIO(GPIO_PTK7, PTK7_DATA), + PINMUX_GPIO(GPIO_PTK6, PTK6_DATA), + PINMUX_GPIO(GPIO_PTK5, PTK5_DATA), + PINMUX_GPIO(GPIO_PTK4, PTK4_DATA), + PINMUX_GPIO(GPIO_PTK3, PTK3_DATA), + PINMUX_GPIO(GPIO_PTK2, PTK2_DATA), + PINMUX_GPIO(GPIO_PTK1, PTK1_DATA), + PINMUX_GPIO(GPIO_PTK0, PTK0_DATA), + + /* PTL */ + PINMUX_GPIO(GPIO_PTL7, PTL7_DATA), + PINMUX_GPIO(GPIO_PTL6, PTL6_DATA), + PINMUX_GPIO(GPIO_PTL5, PTL5_DATA), + PINMUX_GPIO(GPIO_PTL4, PTL4_DATA), + PINMUX_GPIO(GPIO_PTL3, PTL3_DATA), + PINMUX_GPIO(GPIO_PTL2, PTL2_DATA), + PINMUX_GPIO(GPIO_PTL1, PTL1_DATA), + PINMUX_GPIO(GPIO_PTL0, PTL0_DATA), + + /* PTM */ + PINMUX_GPIO(GPIO_PTM7, PTM7_DATA), + PINMUX_GPIO(GPIO_PTM6, PTM6_DATA), + PINMUX_GPIO(GPIO_PTM5, PTM5_DATA), + PINMUX_GPIO(GPIO_PTM4, PTM4_DATA), + PINMUX_GPIO(GPIO_PTM3, PTM3_DATA), + PINMUX_GPIO(GPIO_PTM2, PTM2_DATA), + PINMUX_GPIO(GPIO_PTM1, PTM1_DATA), + PINMUX_GPIO(GPIO_PTM0, PTM0_DATA), + + /* PTN */ + PINMUX_GPIO(GPIO_PTN7, PTN7_DATA), + PINMUX_GPIO(GPIO_PTN6, PTN6_DATA), + PINMUX_GPIO(GPIO_PTN5, PTN5_DATA), + PINMUX_GPIO(GPIO_PTN4, PTN4_DATA), + PINMUX_GPIO(GPIO_PTN3, PTN3_DATA), + PINMUX_GPIO(GPIO_PTN2, PTN2_DATA), + PINMUX_GPIO(GPIO_PTN1, PTN1_DATA), + PINMUX_GPIO(GPIO_PTN0, PTN0_DATA), + + /* PTQ */ + PINMUX_GPIO(GPIO_PTQ7, PTQ7_DATA), + PINMUX_GPIO(GPIO_PTQ6, PTQ6_DATA), + PINMUX_GPIO(GPIO_PTQ5, PTQ5_DATA), + PINMUX_GPIO(GPIO_PTQ4, PTQ4_DATA), + PINMUX_GPIO(GPIO_PTQ3, PTQ3_DATA), + PINMUX_GPIO(GPIO_PTQ2, PTQ2_DATA), + PINMUX_GPIO(GPIO_PTQ1, PTQ1_DATA), + PINMUX_GPIO(GPIO_PTQ0, PTQ0_DATA), + + /* PTR */ + PINMUX_GPIO(GPIO_PTR7, PTR7_DATA), + PINMUX_GPIO(GPIO_PTR6, PTR6_DATA), + PINMUX_GPIO(GPIO_PTR5, PTR5_DATA), + PINMUX_GPIO(GPIO_PTR4, PTR4_DATA), + PINMUX_GPIO(GPIO_PTR3, PTR3_DATA), + PINMUX_GPIO(GPIO_PTR2, PTR2_DATA), + PINMUX_GPIO(GPIO_PTR1, PTR1_DATA), + PINMUX_GPIO(GPIO_PTR0, PTR0_DATA), + + /* PTS */ + PINMUX_GPIO(GPIO_PTS6, PTS6_DATA), + PINMUX_GPIO(GPIO_PTS5, PTS5_DATA), + PINMUX_GPIO(GPIO_PTS4, PTS4_DATA), + PINMUX_GPIO(GPIO_PTS3, PTS3_DATA), + PINMUX_GPIO(GPIO_PTS2, PTS2_DATA), + PINMUX_GPIO(GPIO_PTS1, PTS1_DATA), + PINMUX_GPIO(GPIO_PTS0, PTS0_DATA), + + /* PTT */ + PINMUX_GPIO(GPIO_PTT7, PTT7_DATA), + PINMUX_GPIO(GPIO_PTT6, PTT6_DATA), + PINMUX_GPIO(GPIO_PTT5, PTT5_DATA), + PINMUX_GPIO(GPIO_PTT4, PTT4_DATA), + PINMUX_GPIO(GPIO_PTT3, PTT3_DATA), + PINMUX_GPIO(GPIO_PTT2, PTT2_DATA), + PINMUX_GPIO(GPIO_PTT1, PTT1_DATA), + PINMUX_GPIO(GPIO_PTT0, PTT0_DATA), + + /* PTU */ + PINMUX_GPIO(GPIO_PTU7, PTU7_DATA), + PINMUX_GPIO(GPIO_PTU6, PTU6_DATA), + PINMUX_GPIO(GPIO_PTU5, PTU5_DATA), + PINMUX_GPIO(GPIO_PTU4, PTU4_DATA), + PINMUX_GPIO(GPIO_PTU3, PTU3_DATA), + PINMUX_GPIO(GPIO_PTU2, PTU2_DATA), + PINMUX_GPIO(GPIO_PTU1, PTU1_DATA), + PINMUX_GPIO(GPIO_PTU0, PTU0_DATA), + + /* PTV */ + PINMUX_GPIO(GPIO_PTV7, PTV7_DATA), + PINMUX_GPIO(GPIO_PTV6, PTV6_DATA), + PINMUX_GPIO(GPIO_PTV5, PTV5_DATA), + PINMUX_GPIO(GPIO_PTV4, PTV4_DATA), + PINMUX_GPIO(GPIO_PTV3, PTV3_DATA), + PINMUX_GPIO(GPIO_PTV2, PTV2_DATA), + PINMUX_GPIO(GPIO_PTV1, PTV1_DATA), + PINMUX_GPIO(GPIO_PTV0, PTV0_DATA), + + /* PTW */ + PINMUX_GPIO(GPIO_PTW7, PTW7_DATA), + PINMUX_GPIO(GPIO_PTW6, PTW6_DATA), + PINMUX_GPIO(GPIO_PTW5, PTW5_DATA), + PINMUX_GPIO(GPIO_PTW4, PTW4_DATA), + PINMUX_GPIO(GPIO_PTW3, PTW3_DATA), + PINMUX_GPIO(GPIO_PTW2, PTW2_DATA), + PINMUX_GPIO(GPIO_PTW1, PTW1_DATA), + PINMUX_GPIO(GPIO_PTW0, PTW0_DATA), + + /* PTX */ + PINMUX_GPIO(GPIO_PTX7, PTX7_DATA), + PINMUX_GPIO(GPIO_PTX6, PTX6_DATA), + PINMUX_GPIO(GPIO_PTX5, PTX5_DATA), + PINMUX_GPIO(GPIO_PTX4, PTX4_DATA), + PINMUX_GPIO(GPIO_PTX3, PTX3_DATA), + PINMUX_GPIO(GPIO_PTX2, PTX2_DATA), + PINMUX_GPIO(GPIO_PTX1, PTX1_DATA), + PINMUX_GPIO(GPIO_PTX0, PTX0_DATA), + + /* PTY */ + PINMUX_GPIO(GPIO_PTY7, PTY7_DATA), + PINMUX_GPIO(GPIO_PTY6, PTY6_DATA), + PINMUX_GPIO(GPIO_PTY5, PTY5_DATA), + PINMUX_GPIO(GPIO_PTY4, PTY4_DATA), + PINMUX_GPIO(GPIO_PTY3, PTY3_DATA), + PINMUX_GPIO(GPIO_PTY2, PTY2_DATA), + PINMUX_GPIO(GPIO_PTY1, PTY1_DATA), + PINMUX_GPIO(GPIO_PTY0, PTY0_DATA), + + /* PTZ */ + PINMUX_GPIO(GPIO_PTZ7, PTZ7_DATA), + PINMUX_GPIO(GPIO_PTZ6, PTZ6_DATA), + PINMUX_GPIO(GPIO_PTZ5, PTZ5_DATA), + PINMUX_GPIO(GPIO_PTZ4, PTZ4_DATA), + PINMUX_GPIO(GPIO_PTZ3, PTZ3_DATA), + PINMUX_GPIO(GPIO_PTZ2, PTZ2_DATA), + PINMUX_GPIO(GPIO_PTZ1, PTZ1_DATA), + PINMUX_GPIO(GPIO_PTZ0, PTZ0_DATA), + + /* BSC */ + PINMUX_GPIO(GPIO_FN_D31, D31_MARK), + PINMUX_GPIO(GPIO_FN_D30, D30_MARK), + PINMUX_GPIO(GPIO_FN_D29, D29_MARK), + PINMUX_GPIO(GPIO_FN_D28, D28_MARK), + PINMUX_GPIO(GPIO_FN_D27, D27_MARK), + PINMUX_GPIO(GPIO_FN_D26, D26_MARK), + PINMUX_GPIO(GPIO_FN_D25, D25_MARK), + PINMUX_GPIO(GPIO_FN_D24, D24_MARK), + PINMUX_GPIO(GPIO_FN_D23, D23_MARK), + PINMUX_GPIO(GPIO_FN_D22, D22_MARK), + PINMUX_GPIO(GPIO_FN_D21, D21_MARK), + PINMUX_GPIO(GPIO_FN_D20, D20_MARK), + PINMUX_GPIO(GPIO_FN_D19, D19_MARK), + PINMUX_GPIO(GPIO_FN_D18, D18_MARK), + PINMUX_GPIO(GPIO_FN_D17, D17_MARK), + PINMUX_GPIO(GPIO_FN_D16, D16_MARK), + PINMUX_GPIO(GPIO_FN_D15, D15_MARK), + PINMUX_GPIO(GPIO_FN_D14, D14_MARK), + PINMUX_GPIO(GPIO_FN_D13, D13_MARK), + PINMUX_GPIO(GPIO_FN_D12, D12_MARK), + PINMUX_GPIO(GPIO_FN_D11, D11_MARK), + PINMUX_GPIO(GPIO_FN_D10, D10_MARK), + PINMUX_GPIO(GPIO_FN_D9, D9_MARK), + PINMUX_GPIO(GPIO_FN_D8, D8_MARK), + PINMUX_GPIO(GPIO_FN_D7, D7_MARK), + PINMUX_GPIO(GPIO_FN_D6, D6_MARK), + PINMUX_GPIO(GPIO_FN_D5, D5_MARK), + PINMUX_GPIO(GPIO_FN_D4, D4_MARK), + PINMUX_GPIO(GPIO_FN_D3, D3_MARK), + PINMUX_GPIO(GPIO_FN_D2, D2_MARK), + PINMUX_GPIO(GPIO_FN_D1, D1_MARK), + PINMUX_GPIO(GPIO_FN_D0, D0_MARK), + PINMUX_GPIO(GPIO_FN_A25, A25_MARK), + PINMUX_GPIO(GPIO_FN_A24, A24_MARK), + PINMUX_GPIO(GPIO_FN_A23, A23_MARK), + PINMUX_GPIO(GPIO_FN_A22, A22_MARK), + PINMUX_GPIO(GPIO_FN_CS6B_CE1B, CS6B_CE1B_MARK), + PINMUX_GPIO(GPIO_FN_CS6A_CE2B, CS6A_CE2B_MARK), + PINMUX_GPIO(GPIO_FN_CS5B_CE1A, CS5B_CE1A_MARK), + PINMUX_GPIO(GPIO_FN_CS5A_CE2A, CS5A_CE2A_MARK), + PINMUX_GPIO(GPIO_FN_WE3_ICIOWR, WE3_ICIOWR_MARK), + PINMUX_GPIO(GPIO_FN_WE2_ICIORD, WE2_ICIORD_MARK), + PINMUX_GPIO(GPIO_FN_IOIS16, IOIS16_MARK), + PINMUX_GPIO(GPIO_FN_WAIT, WAIT_MARK), + PINMUX_GPIO(GPIO_FN_BS, BS_MARK), + + /* KEYSC */ + PINMUX_GPIO(GPIO_FN_KEYOUT5_IN5, KEYOUT5_IN5_MARK), + PINMUX_GPIO(GPIO_FN_KEYOUT4_IN6, KEYOUT4_IN6_MARK), + PINMUX_GPIO(GPIO_FN_KEYIN4, KEYIN4_MARK), + PINMUX_GPIO(GPIO_FN_KEYIN3, KEYIN3_MARK), + PINMUX_GPIO(GPIO_FN_KEYIN2, KEYIN2_MARK), + PINMUX_GPIO(GPIO_FN_KEYIN1, KEYIN1_MARK), + PINMUX_GPIO(GPIO_FN_KEYIN0, KEYIN0_MARK), + PINMUX_GPIO(GPIO_FN_KEYOUT3, KEYOUT3_MARK), + PINMUX_GPIO(GPIO_FN_KEYOUT2, KEYOUT2_MARK), + PINMUX_GPIO(GPIO_FN_KEYOUT1, KEYOUT1_MARK), + PINMUX_GPIO(GPIO_FN_KEYOUT0, KEYOUT0_MARK), + + /* ATAPI */ + PINMUX_GPIO(GPIO_FN_IDED15, IDED15_MARK), + PINMUX_GPIO(GPIO_FN_IDED14, IDED14_MARK), + PINMUX_GPIO(GPIO_FN_IDED13, IDED13_MARK), + PINMUX_GPIO(GPIO_FN_IDED12, IDED12_MARK), + PINMUX_GPIO(GPIO_FN_IDED11, IDED11_MARK), + PINMUX_GPIO(GPIO_FN_IDED10, IDED10_MARK), + PINMUX_GPIO(GPIO_FN_IDED9, IDED9_MARK), + PINMUX_GPIO(GPIO_FN_IDED8, IDED8_MARK), + PINMUX_GPIO(GPIO_FN_IDED7, IDED7_MARK), + PINMUX_GPIO(GPIO_FN_IDED6, IDED6_MARK), + PINMUX_GPIO(GPIO_FN_IDED5, IDED5_MARK), + PINMUX_GPIO(GPIO_FN_IDED4, IDED4_MARK), + PINMUX_GPIO(GPIO_FN_IDED3, IDED3_MARK), + PINMUX_GPIO(GPIO_FN_IDED2, IDED2_MARK), + PINMUX_GPIO(GPIO_FN_IDED1, IDED1_MARK), + PINMUX_GPIO(GPIO_FN_IDED0, IDED0_MARK), + PINMUX_GPIO(GPIO_FN_IDEA2, IDEA2_MARK), + PINMUX_GPIO(GPIO_FN_IDEA1, IDEA1_MARK), + PINMUX_GPIO(GPIO_FN_IDEA0, IDEA0_MARK), + PINMUX_GPIO(GPIO_FN_IDEIOWR, IDEIOWR_MARK), + PINMUX_GPIO(GPIO_FN_IODREQ, IODREQ_MARK), + PINMUX_GPIO(GPIO_FN_IDECS0, IDECS0_MARK), + PINMUX_GPIO(GPIO_FN_IDECS1, IDECS1_MARK), + PINMUX_GPIO(GPIO_FN_IDEIORD, IDEIORD_MARK), + PINMUX_GPIO(GPIO_FN_DIRECTION, DIRECTION_MARK), + PINMUX_GPIO(GPIO_FN_EXBUF_ENB, EXBUF_ENB_MARK), + PINMUX_GPIO(GPIO_FN_IDERST, IDERST_MARK), + PINMUX_GPIO(GPIO_FN_IODACK, IODACK_MARK), + PINMUX_GPIO(GPIO_FN_IDEINT, IDEINT_MARK), + PINMUX_GPIO(GPIO_FN_IDEIORDY, IDEIORDY_MARK), + + /* TPU */ + PINMUX_GPIO(GPIO_FN_TPUTO3, TPUTO3_MARK), + PINMUX_GPIO(GPIO_FN_TPUTO2, TPUTO2_MARK), + PINMUX_GPIO(GPIO_FN_TPUTO1, TPUTO1_MARK), + PINMUX_GPIO(GPIO_FN_TPUTO0, TPUTO0_MARK), + PINMUX_GPIO(GPIO_FN_TPUTI3, TPUTI3_MARK), + PINMUX_GPIO(GPIO_FN_TPUTI2, TPUTI2_MARK), + + /* LCDC */ + PINMUX_GPIO(GPIO_FN_LCDD23, LCDD23_MARK), + PINMUX_GPIO(GPIO_FN_LCDD22, LCDD22_MARK), + PINMUX_GPIO(GPIO_FN_LCDD21, LCDD21_MARK), + PINMUX_GPIO(GPIO_FN_LCDD20, LCDD20_MARK), + PINMUX_GPIO(GPIO_FN_LCDD19, LCDD19_MARK), + PINMUX_GPIO(GPIO_FN_LCDD18, LCDD18_MARK), + PINMUX_GPIO(GPIO_FN_LCDD17, LCDD17_MARK), + PINMUX_GPIO(GPIO_FN_LCDD16, LCDD16_MARK), + PINMUX_GPIO(GPIO_FN_LCDD15, LCDD15_MARK), + PINMUX_GPIO(GPIO_FN_LCDD14, LCDD14_MARK), + PINMUX_GPIO(GPIO_FN_LCDD13, LCDD13_MARK), + PINMUX_GPIO(GPIO_FN_LCDD12, LCDD12_MARK), + PINMUX_GPIO(GPIO_FN_LCDD11, LCDD11_MARK), + PINMUX_GPIO(GPIO_FN_LCDD10, LCDD10_MARK), + PINMUX_GPIO(GPIO_FN_LCDD9, LCDD9_MARK), + PINMUX_GPIO(GPIO_FN_LCDD8, LCDD8_MARK), + PINMUX_GPIO(GPIO_FN_LCDD7, LCDD7_MARK), + PINMUX_GPIO(GPIO_FN_LCDD6, LCDD6_MARK), + PINMUX_GPIO(GPIO_FN_LCDD5, LCDD5_MARK), + PINMUX_GPIO(GPIO_FN_LCDD4, LCDD4_MARK), + PINMUX_GPIO(GPIO_FN_LCDD3, LCDD3_MARK), + PINMUX_GPIO(GPIO_FN_LCDD2, LCDD2_MARK), + PINMUX_GPIO(GPIO_FN_LCDD1, LCDD1_MARK), + PINMUX_GPIO(GPIO_FN_LCDD0, LCDD0_MARK), + PINMUX_GPIO(GPIO_FN_LCDVSYN, LCDVSYN_MARK), + PINMUX_GPIO(GPIO_FN_LCDDISP, LCDDISP_MARK), + PINMUX_GPIO(GPIO_FN_LCDRS, LCDRS_MARK), + PINMUX_GPIO(GPIO_FN_LCDHSYN, LCDHSYN_MARK), + PINMUX_GPIO(GPIO_FN_LCDCS, LCDCS_MARK), + PINMUX_GPIO(GPIO_FN_LCDDON, LCDDON_MARK), + PINMUX_GPIO(GPIO_FN_LCDDCK, LCDDCK_MARK), + PINMUX_GPIO(GPIO_FN_LCDWR, LCDWR_MARK), + PINMUX_GPIO(GPIO_FN_LCDVEPWC, LCDVEPWC_MARK), + PINMUX_GPIO(GPIO_FN_LCDVCPWC, LCDVCPWC_MARK), + PINMUX_GPIO(GPIO_FN_LCDRD, LCDRD_MARK), + PINMUX_GPIO(GPIO_FN_LCDLCLK, LCDLCLK_MARK), + + /* SCIF0 */ + PINMUX_GPIO(GPIO_FN_SCIF0_TXD, SCIF0_TXD_MARK), + PINMUX_GPIO(GPIO_FN_SCIF0_RXD, SCIF0_RXD_MARK), + PINMUX_GPIO(GPIO_FN_SCIF0_SCK, SCIF0_SCK_MARK), + + /* SCIF1 */ + PINMUX_GPIO(GPIO_FN_SCIF1_SCK, SCIF1_SCK_MARK), + PINMUX_GPIO(GPIO_FN_SCIF1_RXD, SCIF1_RXD_MARK), + PINMUX_GPIO(GPIO_FN_SCIF1_TXD, SCIF1_TXD_MARK), + + /* SCIF2 */ + PINMUX_GPIO(GPIO_FN_SCIF2_L_TXD, SCIF2_L_TXD_MARK), + PINMUX_GPIO(GPIO_FN_SCIF2_L_SCK, SCIF2_L_SCK_MARK), + PINMUX_GPIO(GPIO_FN_SCIF2_L_RXD, SCIF2_L_RXD_MARK), + PINMUX_GPIO(GPIO_FN_SCIF2_V_TXD, SCIF2_V_TXD_MARK), + PINMUX_GPIO(GPIO_FN_SCIF2_V_SCK, SCIF2_V_SCK_MARK), + PINMUX_GPIO(GPIO_FN_SCIF2_V_RXD, SCIF2_V_RXD_MARK), + + /* SCIF3 */ + PINMUX_GPIO(GPIO_FN_SCIF3_V_SCK, SCIF3_V_SCK_MARK), + PINMUX_GPIO(GPIO_FN_SCIF3_V_RXD, SCIF3_V_RXD_MARK), + PINMUX_GPIO(GPIO_FN_SCIF3_V_TXD, SCIF3_V_TXD_MARK), + PINMUX_GPIO(GPIO_FN_SCIF3_V_CTS, SCIF3_V_CTS_MARK), + PINMUX_GPIO(GPIO_FN_SCIF3_V_RTS, SCIF3_V_RTS_MARK), + PINMUX_GPIO(GPIO_FN_SCIF3_I_SCK, SCIF3_I_SCK_MARK), + PINMUX_GPIO(GPIO_FN_SCIF3_I_RXD, SCIF3_I_RXD_MARK), + PINMUX_GPIO(GPIO_FN_SCIF3_I_TXD, SCIF3_I_TXD_MARK), + PINMUX_GPIO(GPIO_FN_SCIF3_I_CTS, SCIF3_I_CTS_MARK), + PINMUX_GPIO(GPIO_FN_SCIF3_I_RTS, SCIF3_I_RTS_MARK), + + /* SCIF4 */ + PINMUX_GPIO(GPIO_FN_SCIF4_SCK, SCIF4_SCK_MARK), + PINMUX_GPIO(GPIO_FN_SCIF4_RXD, SCIF4_RXD_MARK), + PINMUX_GPIO(GPIO_FN_SCIF4_TXD, SCIF4_TXD_MARK), + + /* SCIF5 */ + PINMUX_GPIO(GPIO_FN_SCIF5_SCK, SCIF5_SCK_MARK), + PINMUX_GPIO(GPIO_FN_SCIF5_RXD, SCIF5_RXD_MARK), + PINMUX_GPIO(GPIO_FN_SCIF5_TXD, SCIF5_TXD_MARK), + + /* FSI */ + PINMUX_GPIO(GPIO_FN_FSIMCKB, FSIMCKB_MARK), + PINMUX_GPIO(GPIO_FN_FSIMCKA, FSIMCKA_MARK), + PINMUX_GPIO(GPIO_FN_FSIOASD, FSIOASD_MARK), + PINMUX_GPIO(GPIO_FN_FSIIABCK, FSIIABCK_MARK), + PINMUX_GPIO(GPIO_FN_FSIIALRCK, FSIIALRCK_MARK), + PINMUX_GPIO(GPIO_FN_FSIOABCK, FSIOABCK_MARK), + PINMUX_GPIO(GPIO_FN_FSIOALRCK, FSIOALRCK_MARK), + PINMUX_GPIO(GPIO_FN_CLKAUDIOAO, CLKAUDIOAO_MARK), + PINMUX_GPIO(GPIO_FN_FSIIBSD, FSIIBSD_MARK), + PINMUX_GPIO(GPIO_FN_FSIOBSD, FSIOBSD_MARK), + PINMUX_GPIO(GPIO_FN_FSIIBBCK, FSIIBBCK_MARK), + PINMUX_GPIO(GPIO_FN_FSIIBLRCK, FSIIBLRCK_MARK), + PINMUX_GPIO(GPIO_FN_FSIOBBCK, FSIOBBCK_MARK), + PINMUX_GPIO(GPIO_FN_FSIOBLRCK, FSIOBLRCK_MARK), + PINMUX_GPIO(GPIO_FN_CLKAUDIOBO, CLKAUDIOBO_MARK), + PINMUX_GPIO(GPIO_FN_FSIIASD, FSIIASD_MARK), + + /* AUD */ + PINMUX_GPIO(GPIO_FN_AUDCK, AUDCK_MARK), + PINMUX_GPIO(GPIO_FN_AUDSYNC, AUDSYNC_MARK), + PINMUX_GPIO(GPIO_FN_AUDATA3, AUDATA3_MARK), + PINMUX_GPIO(GPIO_FN_AUDATA2, AUDATA2_MARK), + PINMUX_GPIO(GPIO_FN_AUDATA1, AUDATA1_MARK), + PINMUX_GPIO(GPIO_FN_AUDATA0, AUDATA0_MARK), + + /* VIO */ + PINMUX_GPIO(GPIO_FN_VIO_CKO, VIO_CKO_MARK), + + /* VIO0 */ + PINMUX_GPIO(GPIO_FN_VIO0_D15, VIO0_D15_MARK), + PINMUX_GPIO(GPIO_FN_VIO0_D14, VIO0_D14_MARK), + PINMUX_GPIO(GPIO_FN_VIO0_D13, VIO0_D13_MARK), + PINMUX_GPIO(GPIO_FN_VIO0_D12, VIO0_D12_MARK), + PINMUX_GPIO(GPIO_FN_VIO0_D11, VIO0_D11_MARK), + PINMUX_GPIO(GPIO_FN_VIO0_D10, VIO0_D10_MARK), + PINMUX_GPIO(GPIO_FN_VIO0_D9, VIO0_D9_MARK), + PINMUX_GPIO(GPIO_FN_VIO0_D8, VIO0_D8_MARK), + PINMUX_GPIO(GPIO_FN_VIO0_D7, VIO0_D7_MARK), + PINMUX_GPIO(GPIO_FN_VIO0_D6, VIO0_D6_MARK), + PINMUX_GPIO(GPIO_FN_VIO0_D5, VIO0_D5_MARK), + PINMUX_GPIO(GPIO_FN_VIO0_D4, VIO0_D4_MARK), + PINMUX_GPIO(GPIO_FN_VIO0_D3, VIO0_D3_MARK), + PINMUX_GPIO(GPIO_FN_VIO0_D2, VIO0_D2_MARK), + PINMUX_GPIO(GPIO_FN_VIO0_D1, VIO0_D1_MARK), + PINMUX_GPIO(GPIO_FN_VIO0_D0, VIO0_D0_MARK), + PINMUX_GPIO(GPIO_FN_VIO0_VD, VIO0_VD_MARK), + PINMUX_GPIO(GPIO_FN_VIO0_CLK, VIO0_CLK_MARK), + PINMUX_GPIO(GPIO_FN_VIO0_FLD, VIO0_FLD_MARK), + PINMUX_GPIO(GPIO_FN_VIO0_HD, VIO0_HD_MARK), + + /* VIO1 */ + PINMUX_GPIO(GPIO_FN_VIO1_D7, VIO1_D7_MARK), + PINMUX_GPIO(GPIO_FN_VIO1_D6, VIO1_D6_MARK), + PINMUX_GPIO(GPIO_FN_VIO1_D5, VIO1_D5_MARK), + PINMUX_GPIO(GPIO_FN_VIO1_D4, VIO1_D4_MARK), + PINMUX_GPIO(GPIO_FN_VIO1_D3, VIO1_D3_MARK), + PINMUX_GPIO(GPIO_FN_VIO1_D2, VIO1_D2_MARK), + PINMUX_GPIO(GPIO_FN_VIO1_D1, VIO1_D1_MARK), + PINMUX_GPIO(GPIO_FN_VIO1_D0, VIO1_D0_MARK), + PINMUX_GPIO(GPIO_FN_VIO1_FLD, VIO1_FLD_MARK), + PINMUX_GPIO(GPIO_FN_VIO1_HD, VIO1_HD_MARK), + PINMUX_GPIO(GPIO_FN_VIO1_VD, VIO1_VD_MARK), + PINMUX_GPIO(GPIO_FN_VIO1_CLK, VIO1_CLK_MARK), + + /* Eth */ + PINMUX_GPIO(GPIO_FN_RMII_RXD0, RMII_RXD0_MARK), + PINMUX_GPIO(GPIO_FN_RMII_RXD1, RMII_RXD1_MARK), + PINMUX_GPIO(GPIO_FN_RMII_TXD0, RMII_TXD0_MARK), + PINMUX_GPIO(GPIO_FN_RMII_TXD1, RMII_TXD1_MARK), + PINMUX_GPIO(GPIO_FN_RMII_REF_CLK, RMII_REF_CLK_MARK), + PINMUX_GPIO(GPIO_FN_RMII_TX_EN, RMII_TX_EN_MARK), + PINMUX_GPIO(GPIO_FN_RMII_RX_ER, RMII_RX_ER_MARK), + PINMUX_GPIO(GPIO_FN_RMII_CRS_DV, RMII_CRS_DV_MARK), + PINMUX_GPIO(GPIO_FN_LNKSTA, LNKSTA_MARK), + PINMUX_GPIO(GPIO_FN_MDIO, MDIO_MARK), + PINMUX_GPIO(GPIO_FN_MDC, MDC_MARK), + + /* System */ + PINMUX_GPIO(GPIO_FN_PDSTATUS, PDSTATUS_MARK), + PINMUX_GPIO(GPIO_FN_STATUS2, STATUS2_MARK), + PINMUX_GPIO(GPIO_FN_STATUS0, STATUS0_MARK), + + /* VOU */ + PINMUX_GPIO(GPIO_FN_DV_D15, DV_D15_MARK), + PINMUX_GPIO(GPIO_FN_DV_D14, DV_D14_MARK), + PINMUX_GPIO(GPIO_FN_DV_D13, DV_D13_MARK), + PINMUX_GPIO(GPIO_FN_DV_D12, DV_D12_MARK), + PINMUX_GPIO(GPIO_FN_DV_D11, DV_D11_MARK), + PINMUX_GPIO(GPIO_FN_DV_D10, DV_D10_MARK), + PINMUX_GPIO(GPIO_FN_DV_D9, DV_D9_MARK), + PINMUX_GPIO(GPIO_FN_DV_D8, DV_D8_MARK), + PINMUX_GPIO(GPIO_FN_DV_D7, DV_D7_MARK), + PINMUX_GPIO(GPIO_FN_DV_D6, DV_D6_MARK), + PINMUX_GPIO(GPIO_FN_DV_D5, DV_D5_MARK), + PINMUX_GPIO(GPIO_FN_DV_D4, DV_D4_MARK), + PINMUX_GPIO(GPIO_FN_DV_D3, DV_D3_MARK), + PINMUX_GPIO(GPIO_FN_DV_D2, DV_D2_MARK), + PINMUX_GPIO(GPIO_FN_DV_D1, DV_D1_MARK), + PINMUX_GPIO(GPIO_FN_DV_D0, DV_D0_MARK), + PINMUX_GPIO(GPIO_FN_DV_CLKI, DV_CLKI_MARK), + PINMUX_GPIO(GPIO_FN_DV_CLK, DV_CLK_MARK), + PINMUX_GPIO(GPIO_FN_DV_VSYNC, DV_VSYNC_MARK), + PINMUX_GPIO(GPIO_FN_DV_HSYNC, DV_HSYNC_MARK), + + /* MSIOF0 */ + PINMUX_GPIO(GPIO_FN_MSIOF0_RXD, MSIOF0_RXD_MARK), + PINMUX_GPIO(GPIO_FN_MSIOF0_TXD, MSIOF0_TXD_MARK), + PINMUX_GPIO(GPIO_FN_MSIOF0_MCK, MSIOF0_MCK_MARK), + PINMUX_GPIO(GPIO_FN_MSIOF0_TSCK, MSIOF0_TSCK_MARK), + PINMUX_GPIO(GPIO_FN_MSIOF0_SS1, MSIOF0_SS1_MARK), + PINMUX_GPIO(GPIO_FN_MSIOF0_SS2, MSIOF0_SS2_MARK), + PINMUX_GPIO(GPIO_FN_MSIOF0_TSYNC, MSIOF0_TSYNC_MARK), + PINMUX_GPIO(GPIO_FN_MSIOF0_RSCK, MSIOF0_RSCK_MARK), + PINMUX_GPIO(GPIO_FN_MSIOF0_RSYNC, MSIOF0_RSYNC_MARK), + + /* MSIOF1 */ + PINMUX_GPIO(GPIO_FN_MSIOF1_RXD, MSIOF1_RXD_MARK), + PINMUX_GPIO(GPIO_FN_MSIOF1_TXD, MSIOF1_TXD_MARK), + PINMUX_GPIO(GPIO_FN_MSIOF1_MCK, MSIOF1_MCK_MARK), + PINMUX_GPIO(GPIO_FN_MSIOF1_TSCK, MSIOF1_TSCK_MARK), + PINMUX_GPIO(GPIO_FN_MSIOF1_SS1, MSIOF1_SS1_MARK), + PINMUX_GPIO(GPIO_FN_MSIOF1_SS2, MSIOF1_SS2_MARK), + PINMUX_GPIO(GPIO_FN_MSIOF1_TSYNC, MSIOF1_TSYNC_MARK), + PINMUX_GPIO(GPIO_FN_MSIOF1_RSCK, MSIOF1_RSCK_MARK), + PINMUX_GPIO(GPIO_FN_MSIOF1_RSYNC, MSIOF1_RSYNC_MARK), + + /* DMAC */ + PINMUX_GPIO(GPIO_FN_DMAC_DACK0, DMAC_DACK0_MARK), + PINMUX_GPIO(GPIO_FN_DMAC_DREQ0, DMAC_DREQ0_MARK), + PINMUX_GPIO(GPIO_FN_DMAC_DACK1, DMAC_DACK1_MARK), + PINMUX_GPIO(GPIO_FN_DMAC_DREQ1, DMAC_DREQ1_MARK), + + /* SDHI0 */ + PINMUX_GPIO(GPIO_FN_SDHI0CD, SDHI0CD_MARK), + PINMUX_GPIO(GPIO_FN_SDHI0WP, SDHI0WP_MARK), + PINMUX_GPIO(GPIO_FN_SDHI0CMD, SDHI0CMD_MARK), + PINMUX_GPIO(GPIO_FN_SDHI0CLK, SDHI0CLK_MARK), + PINMUX_GPIO(GPIO_FN_SDHI0D3, SDHI0D3_MARK), + PINMUX_GPIO(GPIO_FN_SDHI0D2, SDHI0D2_MARK), + PINMUX_GPIO(GPIO_FN_SDHI0D1, SDHI0D1_MARK), + PINMUX_GPIO(GPIO_FN_SDHI0D0, SDHI0D0_MARK), + + /* SDHI1 */ + PINMUX_GPIO(GPIO_FN_SDHI1CD, SDHI1CD_MARK), + PINMUX_GPIO(GPIO_FN_SDHI1WP, SDHI1WP_MARK), + PINMUX_GPIO(GPIO_FN_SDHI1CMD, SDHI1CMD_MARK), + PINMUX_GPIO(GPIO_FN_SDHI1CLK, SDHI1CLK_MARK), + PINMUX_GPIO(GPIO_FN_SDHI1D3, SDHI1D3_MARK), + PINMUX_GPIO(GPIO_FN_SDHI1D2, SDHI1D2_MARK), + PINMUX_GPIO(GPIO_FN_SDHI1D1, SDHI1D1_MARK), + PINMUX_GPIO(GPIO_FN_SDHI1D0, SDHI1D0_MARK), + + /* MMC */ + PINMUX_GPIO(GPIO_FN_MMC_D7, MMC_D7_MARK), + PINMUX_GPIO(GPIO_FN_MMC_D6, MMC_D6_MARK), + PINMUX_GPIO(GPIO_FN_MMC_D5, MMC_D5_MARK), + PINMUX_GPIO(GPIO_FN_MMC_D4, MMC_D4_MARK), + PINMUX_GPIO(GPIO_FN_MMC_D3, MMC_D3_MARK), + PINMUX_GPIO(GPIO_FN_MMC_D2, MMC_D2_MARK), + PINMUX_GPIO(GPIO_FN_MMC_D1, MMC_D1_MARK), + PINMUX_GPIO(GPIO_FN_MMC_D0, MMC_D0_MARK), + PINMUX_GPIO(GPIO_FN_MMC_CLK, MMC_CLK_MARK), + PINMUX_GPIO(GPIO_FN_MMC_CMD, MMC_CMD_MARK), + + /* IrDA */ + PINMUX_GPIO(GPIO_FN_IRDA_OUT, IRDA_OUT_MARK), + PINMUX_GPIO(GPIO_FN_IRDA_IN, IRDA_IN_MARK), + + /* TSIF */ + PINMUX_GPIO(GPIO_FN_TSIF_TS0_SDAT, TSIF_TS0_SDAT_MARK), + PINMUX_GPIO(GPIO_FN_TSIF_TS0_SCK, TSIF_TS0_SCK_MARK), + PINMUX_GPIO(GPIO_FN_TSIF_TS0_SDEN, TSIF_TS0_SDEN_MARK), + PINMUX_GPIO(GPIO_FN_TSIF_TS0_SPSYNC, TSIF_TS0_SPSYNC_MARK), + + /* IRQ */ + PINMUX_GPIO(GPIO_FN_INTC_IRQ7, INTC_IRQ7_MARK), + PINMUX_GPIO(GPIO_FN_INTC_IRQ6, INTC_IRQ6_MARK), + PINMUX_GPIO(GPIO_FN_INTC_IRQ5, INTC_IRQ5_MARK), + PINMUX_GPIO(GPIO_FN_INTC_IRQ4, INTC_IRQ4_MARK), + PINMUX_GPIO(GPIO_FN_INTC_IRQ3, INTC_IRQ3_MARK), + PINMUX_GPIO(GPIO_FN_INTC_IRQ2, INTC_IRQ2_MARK), + PINMUX_GPIO(GPIO_FN_INTC_IRQ1, INTC_IRQ1_MARK), + PINMUX_GPIO(GPIO_FN_INTC_IRQ0, INTC_IRQ0_MARK), + }; + +static struct pinmux_cfg_reg pinmux_config_regs[] = { + { PINMUX_CFG_REG("PACR", 0xa4050100, 16, 2) { + PTA7_FN, PTA7_OUT, PTA7_IN_PU, PTA7_IN, + PTA6_FN, PTA6_OUT, PTA6_IN_PU, PTA6_IN, + PTA5_FN, PTA5_OUT, PTA5_IN_PU, PTA5_IN, + PTA4_FN, PTA4_OUT, PTA4_IN_PU, PTA4_IN, + PTA3_FN, PTA3_OUT, PTA3_IN_PU, PTA3_IN, + PTA2_FN, PTA2_OUT, PTA2_IN_PU, PTA2_IN, + PTA1_FN, PTA1_OUT, PTA1_IN_PU, PTA1_IN, + PTA0_FN, PTA0_OUT, PTA0_IN_PU, PTA0_IN } + }, + { PINMUX_CFG_REG("PBCR", 0xa4050102, 16, 2) { + PTB7_FN, PTB7_OUT, PTB7_IN_PU, PTB7_IN, + PTB6_FN, PTB6_OUT, PTB6_IN_PU, PTB6_IN, + PTB5_FN, PTB5_OUT, PTB5_IN_PU, PTB5_IN, + PTB4_FN, PTB4_OUT, PTB4_IN_PU, PTB4_IN, + PTB3_FN, PTB3_OUT, PTB3_IN_PU, PTB3_IN, + PTB2_FN, PTB2_OUT, PTB2_IN_PU, PTB2_IN, + PTB1_FN, PTB1_OUT, PTB1_IN_PU, PTB1_IN, + PTB0_FN, PTB0_OUT, PTB0_IN_PU, PTB0_IN } + }, + { PINMUX_CFG_REG("PCCR", 0xa4050104, 16, 2) { + PTC7_FN, PTC7_OUT, PTC7_IN_PU, PTC7_IN, + PTC6_FN, PTC6_OUT, PTC6_IN_PU, PTC6_IN, + PTC5_FN, PTC5_OUT, PTC5_IN_PU, PTC5_IN, + PTC4_FN, PTC4_OUT, PTC4_IN_PU, PTC4_IN, + PTC3_FN, PTC3_OUT, PTC3_IN_PU, PTC3_IN, + PTC2_FN, PTC2_OUT, PTC2_IN_PU, PTC2_IN, + PTC1_FN, PTC1_OUT, PTC1_IN_PU, PTC1_IN, + PTC0_FN, PTC0_OUT, PTC0_IN_PU, PTC0_IN } + }, + { PINMUX_CFG_REG("PDCR", 0xa4050106, 16, 2) { + PTD7_FN, PTD7_OUT, PTD7_IN_PU, PTD7_IN, + PTD6_FN, PTD6_OUT, PTD6_IN_PU, PTD6_IN, + PTD5_FN, PTD5_OUT, PTD5_IN_PU, PTD5_IN, + PTD4_FN, PTD4_OUT, PTD4_IN_PU, PTD4_IN, + PTD3_FN, PTD3_OUT, PTD3_IN_PU, PTD3_IN, + PTD2_FN, PTD2_OUT, PTD2_IN_PU, PTD2_IN, + PTD1_FN, PTD1_OUT, PTD1_IN_PU, PTD1_IN, + PTD0_FN, PTD0_OUT, PTD0_IN_PU, PTD0_IN } + }, + { PINMUX_CFG_REG("PECR", 0xa4050108, 16, 2) { + PTE7_FN, PTE7_OUT, PTE7_IN_PU, PTE7_IN, + PTE6_FN, PTE6_OUT, PTE6_IN_PU, PTE6_IN, + PTE5_FN, PTE5_OUT, PTE5_IN_PU, PTE5_IN, + PTE4_FN, PTE4_OUT, PTE4_IN_PU, PTE4_IN, + PTE3_FN, PTE3_OUT, PTE3_IN_PU, PTE3_IN, + PTE2_FN, PTE2_OUT, PTE2_IN_PU, PTE2_IN, + PTE1_FN, PTE1_OUT, PTE1_IN_PU, PTE1_IN, + PTE0_FN, PTE0_OUT, PTE0_IN_PU, PTE0_IN } + }, + { PINMUX_CFG_REG("PFCR", 0xa405010a, 16, 2) { + PTF7_FN, PTF7_OUT, PTF7_IN_PU, PTF7_IN, + PTF6_FN, PTF6_OUT, PTF6_IN_PU, PTF6_IN, + PTF5_FN, PTF5_OUT, PTF5_IN_PU, PTF5_IN, + PTF4_FN, PTF4_OUT, PTF4_IN_PU, PTF4_IN, + PTF3_FN, PTF3_OUT, PTF3_IN_PU, PTF3_IN, + PTF2_FN, PTF2_OUT, PTF2_IN_PU, PTF2_IN, + PTF1_FN, PTF1_OUT, PTF1_IN_PU, PTF1_IN, + PTF0_FN, PTF0_OUT, PTF0_IN_PU, PTF0_IN } + }, + { PINMUX_CFG_REG("PGCR", 0xa405010c, 16, 2) { + 0, 0, 0, 0, + 0, 0, 0, 0, + PTG5_FN, PTG5_OUT, 0, 0, + PTG4_FN, PTG4_OUT, 0, 0, + PTG3_FN, PTG3_OUT, 0, 0, + PTG2_FN, PTG2_OUT, 0, 0, + PTG1_FN, PTG1_OUT, 0, 0, + PTG0_FN, PTG0_OUT, 0, 0 } + }, + { PINMUX_CFG_REG("PHCR", 0xa405010e, 16, 2) { + PTH7_FN, PTH7_OUT, PTH7_IN_PU, PTH7_IN, + PTH6_FN, PTH6_OUT, PTH6_IN_PU, PTH6_IN, + PTH5_FN, PTH5_OUT, PTH5_IN_PU, PTH5_IN, + PTH4_FN, PTH4_OUT, PTH4_IN_PU, PTH4_IN, + PTH3_FN, PTH3_OUT, PTH3_IN_PU, PTH3_IN, + PTH2_FN, PTH2_OUT, PTH2_IN_PU, PTH2_IN, + PTH1_FN, PTH1_OUT, PTH1_IN_PU, PTH1_IN, + PTH0_FN, PTH0_OUT, PTH0_IN_PU, PTH0_IN } + }, + { PINMUX_CFG_REG("PJCR", 0xa4050110, 16, 2) { + PTJ7_FN, PTJ7_OUT, 0, 0, + PTJ6_FN, PTJ6_OUT, 0, 0, + PTJ5_FN, PTJ5_OUT, 0, 0, + 0, 0, 0, 0, + PTJ3_FN, PTJ3_OUT, PTJ3_IN_PU, PTJ3_IN, + PTJ2_FN, PTJ2_OUT, PTJ2_IN_PU, PTJ2_IN, + PTJ1_FN, PTJ1_OUT, PTJ1_IN_PU, PTJ1_IN, + PTJ0_FN, PTJ0_OUT, PTJ0_IN_PU, PTJ0_IN } + }, + { PINMUX_CFG_REG("PKCR", 0xa4050112, 16, 2) { + PTK7_FN, PTK7_OUT, PTK7_IN_PU, PTK7_IN, + PTK6_FN, PTK6_OUT, PTK6_IN_PU, PTK6_IN, + PTK5_FN, PTK5_OUT, PTK5_IN_PU, PTK5_IN, + PTK4_FN, PTK4_OUT, PTK4_IN_PU, PTK4_IN, + PTK3_FN, PTK3_OUT, PTK3_IN_PU, PTK3_IN, + PTK2_FN, PTK2_OUT, PTK2_IN_PU, PTK2_IN, + PTK1_FN, PTK1_OUT, PTK1_IN_PU, PTK1_IN, + PTK0_FN, PTK0_OUT, PTK0_IN_PU, PTK0_IN } + }, + { PINMUX_CFG_REG("PLCR", 0xa4050114, 16, 2) { + PTL7_FN, PTL7_OUT, PTL7_IN_PU, PTL7_IN, + PTL6_FN, PTL6_OUT, PTL6_IN_PU, PTL6_IN, + PTL5_FN, PTL5_OUT, PTL5_IN_PU, PTL5_IN, + PTL4_FN, PTL4_OUT, PTL4_IN_PU, PTL4_IN, + PTL3_FN, PTL3_OUT, PTL3_IN_PU, PTL3_IN, + PTL2_FN, PTL2_OUT, PTL2_IN_PU, PTL2_IN, + PTL1_FN, PTL1_OUT, PTL1_IN_PU, PTL1_IN, + PTL0_FN, PTL0_OUT, PTL0_IN_PU, PTL0_IN } + }, + { PINMUX_CFG_REG("PMCR", 0xa4050116, 16, 2) { + PTM7_FN, PTM7_OUT, PTM7_IN_PU, PTM7_IN, + PTM6_FN, PTM6_OUT, PTM6_IN_PU, PTM6_IN, + PTM5_FN, PTM5_OUT, PTM5_IN_PU, PTM5_IN, + PTM4_FN, PTM4_OUT, PTM4_IN_PU, PTM4_IN, + PTM3_FN, PTM3_OUT, PTM3_IN_PU, PTM3_IN, + PTM2_FN, PTM2_OUT, PTM2_IN_PU, PTM2_IN, + PTM1_FN, PTM1_OUT, PTM1_IN_PU, PTM1_IN, + PTM0_FN, PTM0_OUT, PTM0_IN_PU, PTM0_IN } + }, + { PINMUX_CFG_REG("PNCR", 0xa4050118, 16, 2) { + PTN7_FN, PTN7_OUT, PTN7_IN_PU, PTN7_IN, + PTN6_FN, PTN6_OUT, PTN6_IN_PU, PTN6_IN, + PTN5_FN, PTN5_OUT, PTN5_IN_PU, PTN5_IN, + PTN4_FN, PTN4_OUT, PTN4_IN_PU, PTN4_IN, + PTN3_FN, PTN3_OUT, PTN3_IN_PU, PTN3_IN, + PTN2_FN, PTN2_OUT, PTN2_IN_PU, PTN2_IN, + PTN1_FN, PTN1_OUT, PTN1_IN_PU, PTN1_IN, + PTN0_FN, PTN0_OUT, PTN0_IN_PU, PTN0_IN } + }, + { PINMUX_CFG_REG("PQCR", 0xa405011a, 16, 2) { + PTQ7_FN, PTQ7_OUT, PTQ7_IN_PU, PTQ7_IN, + PTQ6_FN, PTQ6_OUT, PTQ6_IN_PU, PTQ6_IN, + PTQ5_FN, PTQ5_OUT, PTQ5_IN_PU, PTQ5_IN, + PTQ4_FN, PTQ4_OUT, PTQ4_IN_PU, PTQ4_IN, + PTQ3_FN, PTQ3_OUT, PTQ3_IN_PU, PTQ3_IN, + PTQ2_FN, PTQ2_OUT, PTQ2_IN_PU, PTQ2_IN, + PTQ1_FN, PTQ1_OUT, PTQ1_IN_PU, PTQ1_IN, + PTQ0_FN, PTQ0_OUT, PTQ0_IN_PU, PTQ0_IN } + }, + { PINMUX_CFG_REG("PRCR", 0xa405011c, 16, 2) { + PTR7_FN, PTR7_OUT, PTR7_IN_PU, PTR7_IN, + PTR6_FN, PTR6_OUT, PTR6_IN_PU, PTR6_IN, + PTR5_FN, PTR5_OUT, PTR5_IN_PU, PTR5_IN, + PTR4_FN, PTR4_OUT, PTR4_IN_PU, PTR4_IN, + PTR3_FN, 0, PTR3_IN_PU, PTR3_IN, + PTR2_FN, 0, PTR2_IN_PU, PTR2_IN, + PTR1_FN, PTR1_OUT, PTR1_IN_PU, PTR1_IN, + PTR0_FN, PTR0_OUT, PTR0_IN_PU, PTR0_IN } + }, + { PINMUX_CFG_REG("PSCR", 0xa405011e, 16, 2) { + 0, 0, 0, 0, + PTS6_FN, PTS6_OUT, PTS6_IN_PU, PTS6_IN, + PTS5_FN, PTS5_OUT, PTS5_IN_PU, PTS5_IN, + PTS4_FN, PTS4_OUT, PTS4_IN_PU, PTS4_IN, + PTS3_FN, PTS3_OUT, PTS3_IN_PU, PTS3_IN, + PTS2_FN, PTS2_OUT, PTS2_IN_PU, PTS2_IN, + PTS1_FN, PTS1_OUT, PTS1_IN_PU, PTS1_IN, + PTS0_FN, PTS0_OUT, PTS0_IN_PU, PTS0_IN } + }, + { PINMUX_CFG_REG("PTCR", 0xa4050140, 16, 2) { + PTT7_FN, PTT7_OUT, PTT7_IN_PU, PTT7_IN, + PTT6_FN, PTT6_OUT, PTT6_IN_PU, PTT6_IN, + PTT5_FN, PTT5_OUT, PTT5_IN_PU, PTT5_IN, + PTT4_FN, PTT4_OUT, PTT4_IN_PU, PTT4_IN, + PTT3_FN, PTT3_OUT, PTT3_IN_PU, PTT3_IN, + PTT2_FN, PTT2_OUT, PTT2_IN_PU, PTT2_IN, + PTT1_FN, PTT1_OUT, PTT1_IN_PU, PTT1_IN, + PTT0_FN, PTT0_OUT, PTT0_IN_PU, PTT0_IN } + }, + { PINMUX_CFG_REG("PUCR", 0xa4050142, 16, 2) { + PTU7_FN, PTU7_OUT, PTU7_IN_PU, PTU7_IN, + PTU6_FN, PTU6_OUT, PTU6_IN_PU, PTU6_IN, + PTU5_FN, PTU5_OUT, PTU5_IN_PU, PTU5_IN, + PTU4_FN, PTU4_OUT, PTU4_IN_PU, PTU4_IN, + PTU3_FN, PTU3_OUT, PTU3_IN_PU, PTU3_IN, + PTU2_FN, PTU2_OUT, PTU2_IN_PU, PTU2_IN, + PTU1_FN, PTU1_OUT, PTU1_IN_PU, PTU1_IN, + PTU0_FN, PTU0_OUT, PTU0_IN_PU, PTU0_IN } + }, + { PINMUX_CFG_REG("PVCR", 0xa4050144, 16, 2) { + PTV7_FN, PTV7_OUT, PTV7_IN_PU, PTV7_IN, + PTV6_FN, PTV6_OUT, PTV6_IN_PU, PTV6_IN, + PTV5_FN, PTV5_OUT, PTV5_IN_PU, PTV5_IN, + PTV4_FN, PTV4_OUT, PTV4_IN_PU, PTV4_IN, + PTV3_FN, PTV3_OUT, PTV3_IN_PU, PTV3_IN, + PTV2_FN, PTV2_OUT, PTV2_IN_PU, PTV2_IN, + PTV1_FN, PTV1_OUT, PTV1_IN_PU, PTV1_IN, + PTV0_FN, PTV0_OUT, PTV0_IN_PU, PTV0_IN } + }, + { PINMUX_CFG_REG("PWCR", 0xa4050146, 16, 2) { + PTW7_FN, PTW7_OUT, PTW7_IN_PU, PTW7_IN, + PTW6_FN, PTW6_OUT, PTW6_IN_PU, PTW6_IN, + PTW5_FN, PTW5_OUT, PTW5_IN_PU, PTW5_IN, + PTW4_FN, PTW4_OUT, PTW4_IN_PU, PTW4_IN, + PTW3_FN, PTW3_OUT, PTW3_IN_PU, PTW3_IN, + PTW2_FN, PTW2_OUT, PTW2_IN_PU, PTW2_IN, + PTW1_FN, PTW1_OUT, PTW1_IN_PU, PTW1_IN, + PTW0_FN, PTW0_OUT, PTW0_IN_PU, PTW0_IN } + }, + { PINMUX_CFG_REG("PXCR", 0xa4050148, 16, 2) { + PTX7_FN, PTX7_OUT, PTX7_IN_PU, PTX7_IN, + PTX6_FN, PTX6_OUT, PTX6_IN_PU, PTX6_IN, + PTX5_FN, PTX5_OUT, PTX5_IN_PU, PTX5_IN, + PTX4_FN, PTX4_OUT, PTX4_IN_PU, PTX4_IN, + PTX3_FN, PTX3_OUT, PTX3_IN_PU, PTX3_IN, + PTX2_FN, PTX2_OUT, PTX2_IN_PU, PTX2_IN, + PTX1_FN, PTX1_OUT, PTX1_IN_PU, PTX1_IN, + PTX0_FN, PTX0_OUT, PTX0_IN_PU, PTX0_IN } + }, + { PINMUX_CFG_REG("PYCR", 0xa405014a, 16, 2) { + PTY7_FN, PTY7_OUT, PTY7_IN_PU, PTY7_IN, + PTY6_FN, PTY6_OUT, PTY6_IN_PU, PTY6_IN, + PTY5_FN, PTY5_OUT, PTY5_IN_PU, PTY5_IN, + PTY4_FN, PTY4_OUT, PTY4_IN_PU, PTY4_IN, + PTY3_FN, PTY3_OUT, PTY3_IN_PU, PTY3_IN, + PTY2_FN, PTY2_OUT, PTY2_IN_PU, PTY2_IN, + PTY1_FN, PTY1_OUT, PTY1_IN_PU, PTY1_IN, + PTY0_FN, PTY0_OUT, PTY0_IN_PU, PTY0_IN } + }, + { PINMUX_CFG_REG("PZCR", 0xa405014c, 16, 2) { + PTZ7_FN, PTZ7_OUT, PTZ7_IN_PU, PTZ7_IN, + PTZ6_FN, PTZ6_OUT, PTZ6_IN_PU, PTZ6_IN, + PTZ5_FN, PTZ5_OUT, PTZ5_IN_PU, PTZ5_IN, + PTZ4_FN, PTZ4_OUT, PTZ4_IN_PU, PTZ4_IN, + PTZ3_FN, PTZ3_OUT, PTZ3_IN_PU, PTZ3_IN, + PTZ2_FN, PTZ2_OUT, PTZ2_IN_PU, PTZ2_IN, + PTZ1_FN, PTZ1_OUT, PTZ1_IN_PU, PTZ1_IN, + PTZ0_FN, PTZ0_OUT, PTZ0_IN_PU, PTZ0_IN } + }, + { PINMUX_CFG_REG("PSELA", 0xa405014e, 16, 1) { + PSA15_0, PSA15_1, + PSA14_0, PSA14_1, + PSA13_0, PSA13_1, + PSA12_0, PSA12_1, + 0, 0, + PSA10_0, PSA10_1, + PSA9_0, PSA9_1, + PSA8_0, PSA8_1, + PSA7_0, PSA7_1, + PSA6_0, PSA6_1, + PSA5_0, PSA5_1, + 0, 0, + PSA3_0, PSA3_1, + PSA2_0, PSA2_1, + PSA1_0, PSA1_1, + PSA0_0, PSA0_1} + }, + { PINMUX_CFG_REG("PSELB", 0xa4050150, 16, 1) { + 0, 0, + PSB14_0, PSB14_1, + PSB13_0, PSB13_1, + PSB12_0, PSB12_1, + PSB11_0, PSB11_1, + PSB10_0, PSB10_1, + PSB9_0, PSB9_1, + PSB8_0, PSB8_1, + PSB7_0, PSB7_1, + PSB6_0, PSB6_1, + PSB5_0, PSB5_1, + PSB4_0, PSB4_1, + PSB3_0, PSB3_1, + PSB2_0, PSB2_1, + PSB1_0, PSB1_1, + PSB0_0, PSB0_1} + }, + { PINMUX_CFG_REG("PSELC", 0xa4050152, 16, 1) { + PSC15_0, PSC15_1, + PSC14_0, PSC14_1, + PSC13_0, PSC13_1, + PSC12_0, PSC12_1, + PSC11_0, PSC11_1, + PSC10_0, PSC10_1, + PSC9_0, PSC9_1, + PSC8_0, PSC8_1, + PSC7_0, PSC7_1, + PSC6_0, PSC6_1, + PSC5_0, PSC5_1, + PSC4_0, PSC4_1, + 0, 0, + PSC2_0, PSC2_1, + PSC1_0, PSC1_1, + PSC0_0, PSC0_1} + }, + { PINMUX_CFG_REG("PSELD", 0xa4050154, 16, 1) { + PSD15_0, PSD15_1, + PSD14_0, PSD14_1, + PSD13_0, PSD13_1, + PSD12_0, PSD12_1, + PSD11_0, PSD11_1, + PSD10_0, PSD10_1, + PSD9_0, PSD9_1, + PSD8_0, PSD8_1, + PSD7_0, PSD7_1, + PSD6_0, PSD6_1, + PSD5_0, PSD5_1, + PSD4_0, PSD4_1, + PSD3_0, PSD3_1, + PSD2_0, PSD2_1, + PSD1_0, PSD1_1, + PSD0_0, PSD0_1} + }, + { PINMUX_CFG_REG("PSELE", 0xa4050156, 16, 1) { + PSE15_0, PSE15_1, + PSE14_0, PSE14_1, + PSE13_0, PSE13_1, + PSE12_0, PSE12_1, + PSE11_0, PSE11_1, + PSE10_0, PSE10_1, + PSE9_0, PSE9_1, + PSE8_0, PSE8_1, + PSE7_0, PSE7_1, + PSE6_0, PSE6_1, + PSE5_0, PSE5_1, + PSE4_0, PSE4_1, + PSE3_0, PSE3_1, + PSE2_0, PSE2_1, + PSE1_0, PSE1_1, + PSE0_0, PSE0_1} + }, + {} +}; + +static struct pinmux_data_reg pinmux_data_regs[] = { + { PINMUX_DATA_REG("PADR", 0xa4050120, 8) { + PTA7_DATA, PTA6_DATA, PTA5_DATA, PTA4_DATA, + PTA3_DATA, PTA2_DATA, PTA1_DATA, PTA0_DATA } + }, + { PINMUX_DATA_REG("PBDR", 0xa4050122, 8) { + PTB7_DATA, PTB6_DATA, PTB5_DATA, PTB4_DATA, + PTB3_DATA, PTB2_DATA, PTB1_DATA, PTB0_DATA } + }, + { PINMUX_DATA_REG("PCDR", 0xa4050124, 8) { + PTC7_DATA, PTC6_DATA, PTC5_DATA, PTC4_DATA, + PTC3_DATA, PTC2_DATA, PTC1_DATA, PTC0_DATA } + }, + { PINMUX_DATA_REG("PDDR", 0xa4050126, 8) { + PTD7_DATA, PTD6_DATA, PTD5_DATA, PTD4_DATA, + PTD3_DATA, PTD2_DATA, PTD1_DATA, PTD0_DATA } + }, + { PINMUX_DATA_REG("PEDR", 0xa4050128, 8) { + PTE7_DATA, PTE6_DATA, PTE5_DATA, PTE4_DATA, + PTE3_DATA, PTE2_DATA, PTE1_DATA, PTE0_DATA } + }, + { PINMUX_DATA_REG("PFDR", 0xa405012a, 8) { + PTF7_DATA, PTF6_DATA, PTF5_DATA, PTF4_DATA, + PTF3_DATA, PTF2_DATA, PTF1_DATA, PTF0_DATA } + }, + { PINMUX_DATA_REG("PGDR", 0xa405012c, 8) { + 0, 0, PTG5_DATA, PTG4_DATA, + PTG3_DATA, PTG2_DATA, PTG1_DATA, PTG0_DATA } + }, + { PINMUX_DATA_REG("PHDR", 0xa405012e, 8) { + PTH7_DATA, PTH6_DATA, PTH5_DATA, PTH4_DATA, + PTH3_DATA, PTH2_DATA, PTH1_DATA, PTH0_DATA } + }, + { PINMUX_DATA_REG("PJDR", 0xa4050130, 8) { + PTJ7_DATA, PTJ6_DATA, PTJ5_DATA, 0, + PTJ3_DATA, PTJ2_DATA, PTJ1_DATA, PTJ0_DATA } + }, + { PINMUX_DATA_REG("PKDR", 0xa4050132, 8) { + PTK7_DATA, PTK6_DATA, PTK5_DATA, PTK4_DATA, + PTK3_DATA, PTK2_DATA, PTK1_DATA, PTK0_DATA } + }, + { PINMUX_DATA_REG("PLDR", 0xa4050134, 8) { + PTL7_DATA, PTL6_DATA, PTL5_DATA, PTL4_DATA, + PTL3_DATA, PTL2_DATA, PTL1_DATA, PTL0_DATA } + }, + { PINMUX_DATA_REG("PMDR", 0xa4050136, 8) { + PTM7_DATA, PTM6_DATA, PTM5_DATA, PTM4_DATA, + PTM3_DATA, PTM2_DATA, PTM1_DATA, PTM0_DATA } + }, + { PINMUX_DATA_REG("PNDR", 0xa4050138, 8) { + PTN7_DATA, PTN6_DATA, PTN5_DATA, PTN4_DATA, + PTN3_DATA, PTN2_DATA, PTN1_DATA, PTN0_DATA } + }, + { PINMUX_DATA_REG("PQDR", 0xa405013a, 8) { + PTQ7_DATA, PTQ6_DATA, PTQ5_DATA, PTQ4_DATA, + PTQ3_DATA, PTQ2_DATA, PTQ1_DATA, PTQ0_DATA } + }, + { PINMUX_DATA_REG("PRDR", 0xa405013c, 8) { + PTR7_DATA, PTR6_DATA, PTR5_DATA, PTR4_DATA, + PTR3_DATA, PTR2_DATA, PTR1_DATA, PTR0_DATA } + }, + { PINMUX_DATA_REG("PSDR", 0xa405013e, 8) { + 0, PTS6_DATA, PTS5_DATA, PTS4_DATA, + PTS3_DATA, PTS2_DATA, PTS1_DATA, PTS0_DATA } + }, + { PINMUX_DATA_REG("PTDR", 0xa4050160, 8) { + PTT7_DATA, PTT6_DATA, PTT5_DATA, PTT4_DATA, + PTT3_DATA, PTT2_DATA, PTT1_DATA, PTT0_DATA } + }, + { PINMUX_DATA_REG("PUDR", 0xa4050162, 8) { + PTU7_DATA, PTU6_DATA, PTU5_DATA, PTU4_DATA, + PTU3_DATA, PTU2_DATA, PTU1_DATA, PTU0_DATA } + }, + { PINMUX_DATA_REG("PVDR", 0xa4050164, 8) { + PTV7_DATA, PTV6_DATA, PTV5_DATA, PTV4_DATA, + PTV3_DATA, PTV2_DATA, PTV1_DATA, PTV0_DATA } + }, + { PINMUX_DATA_REG("PWDR", 0xa4050166, 8) { + PTW7_DATA, PTW6_DATA, PTW5_DATA, PTW4_DATA, + PTW3_DATA, PTW2_DATA, PTW1_DATA, PTW0_DATA } + }, + { PINMUX_DATA_REG("PXDR", 0xa4050168, 8) { + PTX7_DATA, PTX6_DATA, PTX5_DATA, PTX4_DATA, + PTX3_DATA, PTX2_DATA, PTX1_DATA, PTX0_DATA } + }, + { PINMUX_DATA_REG("PYDR", 0xa405016a, 8) { + PTY7_DATA, PTY6_DATA, PTY5_DATA, PTY4_DATA, + PTY3_DATA, PTY2_DATA, PTY1_DATA, PTY0_DATA } + }, + { PINMUX_DATA_REG("PZDR", 0xa405016c, 8) { + PTZ7_DATA, PTZ6_DATA, PTZ5_DATA, PTZ4_DATA, + PTZ3_DATA, PTZ2_DATA, PTZ1_DATA, PTZ0_DATA } + }, + { }, +}; + +static struct pinmux_info sh7724_pinmux_info = { + .name = "sh7724_pfc", + .reserved_id = PINMUX_RESERVED, + .data = { PINMUX_DATA_BEGIN, PINMUX_DATA_END }, + .input = { PINMUX_INPUT_BEGIN, PINMUX_INPUT_END }, + .input_pu = { PINMUX_INPUT_PULLUP_BEGIN, PINMUX_INPUT_PULLUP_END }, + .output = { PINMUX_OUTPUT_BEGIN, PINMUX_OUTPUT_END }, + .mark = { PINMUX_MARK_BEGIN, PINMUX_MARK_END }, + .function = { PINMUX_FUNCTION_BEGIN, PINMUX_FUNCTION_END }, + + .first_gpio = GPIO_PTA7, + .last_gpio = GPIO_FN_INTC_IRQ0, + + .gpios = pinmux_gpios, + .cfg_regs = pinmux_config_regs, + .data_regs = pinmux_data_regs, + + .gpio_data = pinmux_data, + .gpio_data_size = ARRAY_SIZE(pinmux_data), +}; + +static int __init plat_pinmux_setup(void) +{ + return register_pinmux(&sh7724_pinmux_info); +} +arch_initcall(plat_pinmux_setup); diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c new file mode 100644 index 0000000..4327b1e --- /dev/null +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c @@ -0,0 +1,371 @@ +/* + * SH7724 Setup + * + * Copyright (C) 2009 Renesas Solutions Corp. + * + * Kuninori Morimoto + * + * Based on SH7723 Setup + * Copyright (C) 2008 Paul Mundt + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Serial */ +static struct plat_sci_port sci_platform_data[] = { + { + .mapbase = 0xffe00000, + .flags = UPF_BOOT_AUTOCONF, + .type = PORT_SCIF, + .irqs = { 80, 80, 80, 80 }, + }, { + .mapbase = 0xffe10000, + .flags = UPF_BOOT_AUTOCONF, + .type = PORT_SCIF, + .irqs = { 81, 81, 81, 81 }, + }, { + .mapbase = 0xffe20000, + .flags = UPF_BOOT_AUTOCONF, + .type = PORT_SCIF, + .irqs = { 82, 82, 82, 82 }, + }, { + .mapbase = 0xa4e30000, + .flags = UPF_BOOT_AUTOCONF, + .type = PORT_SCIFA, + .irqs = { 56, 56, 56, 56 }, + }, { + .mapbase = 0xa4e40000, + .flags = UPF_BOOT_AUTOCONF, + .type = PORT_SCIFA, + .irqs = { 88, 88, 88, 88 }, + }, { + .mapbase = 0xa4e50000, + .flags = UPF_BOOT_AUTOCONF, + .type = PORT_SCIFA, + .irqs = { 109, 109, 109, 109 }, + }, { + .flags = 0, + } +}; + +static struct platform_device sci_device = { + .name = "sh-sci", + .id = -1, + .dev = { + .platform_data = sci_platform_data, + }, +}; + +/* RTC */ +static struct resource rtc_resources[] = { + [0] = { + .start = 0xa465fec0, + .end = 0xa465fec0 + 0x58 - 1, + .flags = IORESOURCE_IO, + }, + [1] = { + /* Period IRQ */ + .start = 69, + .flags = IORESOURCE_IRQ, + }, + [2] = { + /* Carry IRQ */ + .start = 70, + .flags = IORESOURCE_IRQ, + }, + [3] = { + /* Alarm IRQ */ + .start = 68, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device rtc_device = { + .name = "sh-rtc", + .id = -1, + .num_resources = ARRAY_SIZE(rtc_resources), + .resource = rtc_resources, +}; + +static struct platform_device *sh7724_devices[] __initdata = { + &sci_device, + &rtc_device, +}; + +static int __init sh7724_devices_setup(void) +{ + clk_always_enable("rtc0"); /* RTC */ + + return platform_add_devices(sh7724_devices, + ARRAY_SIZE(sh7724_devices)); +} +device_initcall(sh7724_devices_setup); + +enum { + UNUSED = 0, + + /* interrupt sources */ + IRQ0, IRQ1, IRQ2, IRQ3, IRQ4, IRQ5, IRQ6, IRQ7, + HUDI, + DMAC1A_DEI0, DMAC1A_DEI1, DMAC1A_DEI2, DMAC1A_DEI3, + _2DG_TRI, _2DG_INI, _2DG_CEI, _2DG_BRK, + DMAC0A_DEI0, DMAC0A_DEI1, DMAC0A_DEI2, DMAC0A_DEI3, + VIO_CEU20I, VIO_BEU20I, VIO_VEU3F1, VIO_VOUI, + SCIFA_SCIFA0, + VPU_VPUI, + TPU_TPUI, + CEU21I, + BEU21I, + USB_USI0, + ATAPI, + RTC_ATI, RTC_PRI, RTC_CUI, + DMAC1B_DEI4, DMAC1B_DEI5, DMAC1B_DADERR, + DMAC0B_DEI4, DMAC0B_DEI5, DMAC0B_DADERR, + KEYSC_KEYI, + SCIF_SCIF0, SCIF_SCIF1, SCIF_SCIF2, + VEU3F0I, + MSIOF_MSIOFI0, MSIOF_MSIOFI1, + SPU_SPUI0, SPU_SPUI1, + SCIFA_SCIFA1, +/* ICB_ICBI, */ + ETHI, + I2C1_ALI, I2C1_TACKI, I2C1_WAITI, I2C1_DTEI, + I2C0_ALI, I2C0_TACKI, I2C0_WAITI, I2C0_DTEI, + SDHI0_SDHII0, SDHI0_SDHII1, SDHI0_SDHII2, + CMT_CMTI, + TSIF_TSIFI, +/* ICB_LMBI, */ + FSI_FSI, + SCIFA_SCIFA2, + TMU0_TUNI0, TMU0_TUNI1, TMU0_TUNI2, + IRDA_IRDAI, + SDHI1_SDHII0, SDHI1_SDHII1, SDHI1_SDHII2, + JPU_JPUI, + MMC_MMCI0, MMC_MMCI1, MMC_MMCI2, + LCDC_LCDCI, + TMU1_TUNI0, TMU1_TUNI1, TMU1_TUNI2, + + /* interrupt groups */ + DMAC1A, _2DG, DMAC0A, VIO, RTC, + DMAC1B, DMAC0B, I2C0, I2C1, SDHI0, SDHI1, SPU, MMC, +}; + +static struct intc_vect vectors[] __initdata = { + INTC_VECT(IRQ0, 0x600), INTC_VECT(IRQ1, 0x620), + INTC_VECT(IRQ2, 0x640), INTC_VECT(IRQ3, 0x660), + INTC_VECT(IRQ4, 0x680), INTC_VECT(IRQ5, 0x6a0), + INTC_VECT(IRQ6, 0x6c0), INTC_VECT(IRQ7, 0x6e0), + + INTC_VECT(DMAC1A_DEI0, 0x700), + INTC_VECT(DMAC1A_DEI1, 0x720), + INTC_VECT(DMAC1A_DEI2, 0x740), + INTC_VECT(DMAC1A_DEI3, 0x760), + + INTC_VECT(_2DG_TRI, 0x780), + INTC_VECT(_2DG_INI, 0x7A0), + INTC_VECT(_2DG_CEI, 0x7C0), + INTC_VECT(_2DG_BRK, 0x7E0), + + INTC_VECT(DMAC0A_DEI0, 0x800), + INTC_VECT(DMAC0A_DEI1, 0x820), + INTC_VECT(DMAC0A_DEI2, 0x840), + INTC_VECT(DMAC0A_DEI3, 0x860), + + INTC_VECT(VIO_CEU20I, 0x880), + INTC_VECT(VIO_BEU20I, 0x8A0), + INTC_VECT(VIO_VEU3F1, 0x8C0), + INTC_VECT(VIO_VOUI, 0x8E0), + + INTC_VECT(SCIFA_SCIFA0, 0x900), + INTC_VECT(VPU_VPUI, 0x980), + INTC_VECT(TPU_TPUI, 0x9A0), + INTC_VECT(CEU21I, 0x9E0), + INTC_VECT(BEU21I, 0xA00), + INTC_VECT(USB_USI0, 0xA20), + INTC_VECT(ATAPI, 0xA60), + + INTC_VECT(RTC_ATI, 0xA80), + INTC_VECT(RTC_PRI, 0xAA0), + INTC_VECT(RTC_CUI, 0xAC0), + + INTC_VECT(DMAC1B_DEI4, 0xB00), + INTC_VECT(DMAC1B_DEI5, 0xB20), + INTC_VECT(DMAC1B_DADERR, 0xB40), + + INTC_VECT(DMAC0B_DEI4, 0xB80), + INTC_VECT(DMAC0B_DEI5, 0xBA0), + INTC_VECT(DMAC0B_DADERR, 0xBC0), + + INTC_VECT(KEYSC_KEYI, 0xBE0), + INTC_VECT(SCIF_SCIF0, 0xC00), + INTC_VECT(SCIF_SCIF1, 0xC20), + INTC_VECT(SCIF_SCIF2, 0xC40), + INTC_VECT(VEU3F0I, 0xC60), + INTC_VECT(MSIOF_MSIOFI0, 0xC80), + INTC_VECT(MSIOF_MSIOFI1, 0xCA0), + INTC_VECT(SPU_SPUI0, 0xCC0), + INTC_VECT(SPU_SPUI1, 0xCE0), + INTC_VECT(SCIFA_SCIFA1, 0xD00), + +/* INTC_VECT(ICB_ICBI, 0xD20), */ + INTC_VECT(ETHI, 0xD60), + + INTC_VECT(I2C1_ALI, 0xD80), + INTC_VECT(I2C1_TACKI, 0xDA0), + INTC_VECT(I2C1_WAITI, 0xDC0), + INTC_VECT(I2C1_DTEI, 0xDE0), + + INTC_VECT(I2C0_ALI, 0xE00), + INTC_VECT(I2C0_TACKI, 0xE20), + INTC_VECT(I2C0_WAITI, 0xE40), + INTC_VECT(I2C0_DTEI, 0xE60), + + INTC_VECT(SDHI0_SDHII0, 0xE80), + INTC_VECT(SDHI0_SDHII1, 0xEA0), + INTC_VECT(SDHI0_SDHII2, 0xEC0), + + INTC_VECT(CMT_CMTI, 0xF00), + INTC_VECT(TSIF_TSIFI, 0xF20), +/* INTC_VECT(ICB_LMBI, 0xF60), */ + INTC_VECT(FSI_FSI, 0xF80), + INTC_VECT(SCIFA_SCIFA2, 0xFA0), + + INTC_VECT(TMU0_TUNI0, 0x400), + INTC_VECT(TMU0_TUNI1, 0x420), + INTC_VECT(TMU0_TUNI2, 0x440), + + INTC_VECT(IRDA_IRDAI, 0x480), + + INTC_VECT(SDHI1_SDHII0, 0x4E0), + INTC_VECT(SDHI1_SDHII1, 0x500), + INTC_VECT(SDHI1_SDHII2, 0x520), + + INTC_VECT(JPU_JPUI, 0x560), + + INTC_VECT(MMC_MMCI0, 0x580), + INTC_VECT(MMC_MMCI1, 0x5A0), + INTC_VECT(MMC_MMCI2, 0x5C0), + + INTC_VECT(LCDC_LCDCI, 0xF40), + + INTC_VECT(TMU1_TUNI0, 0x920), + INTC_VECT(TMU1_TUNI1, 0x940), + INTC_VECT(TMU1_TUNI2, 0x960), +}; + +static struct intc_group groups[] __initdata = { + INTC_GROUP(DMAC1A, DMAC1A_DEI0, DMAC1A_DEI1, DMAC1A_DEI2, DMAC1A_DEI3), + INTC_GROUP(_2DG, _2DG_TRI, _2DG_INI, _2DG_CEI, _2DG_BRK), + INTC_GROUP(DMAC0A, DMAC0A_DEI0, DMAC0A_DEI1, DMAC0A_DEI2, DMAC0A_DEI3), + INTC_GROUP(VIO, VIO_CEU20I, VIO_BEU20I, VIO_VEU3F1, VIO_VOUI), + INTC_GROUP(RTC, RTC_ATI, RTC_PRI, RTC_CUI), + INTC_GROUP(DMAC1B, DMAC1B_DEI4, DMAC1B_DEI5, DMAC1B_DADERR), + INTC_GROUP(DMAC0B, DMAC0B_DEI4, DMAC0B_DEI5, DMAC0B_DADERR), + INTC_GROUP(I2C0, I2C0_ALI, I2C0_TACKI, I2C0_WAITI, I2C0_DTEI), + INTC_GROUP(I2C1, I2C1_ALI, I2C1_TACKI, I2C1_WAITI, I2C1_DTEI), + INTC_GROUP(SDHI0, SDHI0_SDHII0, SDHI0_SDHII1, SDHI0_SDHII2), + INTC_GROUP(SDHI1, SDHI1_SDHII0, SDHI1_SDHII1, SDHI1_SDHII2), + INTC_GROUP(SPU, SPU_SPUI0, SPU_SPUI1), + INTC_GROUP(MMC, MMC_MMCI0, MMC_MMCI1, MMC_MMCI2), +}; + +/* FIXMEEEEEEEEEEEEEEEEEEE !!!!! */ +/* very bad manual !! */ +static struct intc_mask_reg mask_registers[] __initdata = { + { 0xa4080080, 0xa40800c0, 8, /* IMR0 / IMCR0 */ + { 0, TMU1_TUNI2, TMU1_TUNI1, TMU1_TUNI0, + /*SDHII3?*/0, SDHI1_SDHII2, SDHI1_SDHII1, SDHI1_SDHII0 } }, + { 0xa4080084, 0xa40800c4, 8, /* IMR1 / IMCR1 */ + { VIO_VOUI, VIO_VEU3F1, VIO_BEU20I, VIO_CEU20I, + DMAC0A_DEI3, DMAC0A_DEI2, DMAC0A_DEI1, DMAC0A_DEI0 } }, + { 0xa4080088, 0xa40800c8, 8, /* IMR2 / IMCR2 */ + { 0, 0, 0, VPU_VPUI, ATAPI, ETHI, 0, /*SCIFA3*/SCIFA_SCIFA0 } }, + { 0xa408008c, 0xa40800cc, 8, /* IMR3 / IMCR3 */ + { DMAC1A_DEI3, DMAC1A_DEI2, DMAC1A_DEI1, DMAC1A_DEI0, + SPU_SPUI1, SPU_SPUI0, BEU21I, IRDA_IRDAI } }, + { 0xa4080090, 0xa40800d0, 8, /* IMR4 / IMCR4 */ + { 0, TMU0_TUNI2, TMU0_TUNI1, TMU0_TUNI0, + JPU_JPUI, 0, 0, LCDC_LCDCI } }, + { 0xa4080094, 0xa40800d4, 8, /* IMR5 / IMCR5 */ + { KEYSC_KEYI, DMAC0B_DADERR, DMAC0B_DEI5, DMAC0B_DEI4, + VEU3F0I, SCIF_SCIF2, SCIF_SCIF1, SCIF_SCIF0 } }, + { 0xa4080098, 0xa40800d8, 8, /* IMR6 / IMCR6 */ + { 0, 0, /*ICB_ICBI*/0, /*SCIFA4*/SCIFA_SCIFA1, + CEU21I, 0, MSIOF_MSIOFI1, MSIOF_MSIOFI0 } }, + { 0xa408009c, 0xa40800dc, 8, /* IMR7 / IMCR7 */ + { I2C0_DTEI, I2C0_WAITI, I2C0_TACKI, I2C0_ALI, + I2C1_DTEI, I2C1_WAITI, I2C1_TACKI, I2C1_ALI } }, + { 0xa40800a0, 0xa40800e0, 8, /* IMR8 / IMCR8 */ + { /*SDHII3*/0, SDHI0_SDHII2, SDHI0_SDHII1, SDHI0_SDHII0, + 0, 0, /*SCIFA5*/SCIFA_SCIFA2, FSI_FSI } }, + { 0xa40800a4, 0xa40800e4, 8, /* IMR9 / IMCR9 */ + { 0, 0, 0, CMT_CMTI, 0, /*USB1*/0, USB_USI0, 0 } }, + { 0xa40800a8, 0xa40800e8, 8, /* IMR10 / IMCR10 */ + { 0, DMAC1B_DADERR, DMAC1B_DEI5, DMAC1B_DEI4, + 0, RTC_ATI, RTC_PRI, RTC_CUI } }, + { 0xa40800ac, 0xa40800ec, 8, /* IMR11 / IMCR11 */ + { _2DG_BRK, _2DG_CEI, _2DG_INI, _2DG_TRI, + 0, TPU_TPUI, /*ICB_LMBI*/0, TSIF_TSIFI } }, + { 0xa40800b0, 0xa40800f0, 8, /* IMR12 / IMCR12 */ + { 0, 0, 0, 0, 0, 0, 0, 0/*2DDMAC*/ } }, + { 0xa4140044, 0xa4140064, 8, /* INTMSK00 / INTMSKCLR00 */ + { IRQ0, IRQ1, IRQ2, IRQ3, IRQ4, IRQ5, IRQ6, IRQ7 } }, +}; + +static struct intc_prio_reg prio_registers[] __initdata = { + { 0xa4080000, 0, 16, 4, /* IPRA */ { TMU0_TUNI0, TMU0_TUNI1, + TMU0_TUNI2, IRDA_IRDAI } }, + { 0xa4080004, 0, 16, 4, /* IPRB */ { JPU_JPUI, LCDC_LCDCI, + DMAC1A, BEU21I } }, + { 0xa4080008, 0, 16, 4, /* IPRC */ { TMU1_TUNI0, TMU1_TUNI1, + TMU1_TUNI2, SPU } }, + { 0xa408000c, 0, 16, 4, /* IPRD */ { 0, MMC, 0, ATAPI } }, + { 0xa4080010, 0, 16, 4, /* IPRE */ + { DMAC0A, /*BEU?VEU?*/VIO, /*SCIFA3*/SCIFA_SCIFA0, /*VPU5F*/ + VPU_VPUI } }, + { 0xa4080014, 0, 16, 4, /* IPRF */ { KEYSC_KEYI, DMAC0B, + USB_USI0, CMT_CMTI } }, + { 0xa4080018, 0, 16, 4, /* IPRG */ { SCIF_SCIF0, SCIF_SCIF1, + SCIF_SCIF2, VEU3F0I } }, + { 0xa408001c, 0, 16, 4, /* IPRH */ { MSIOF_MSIOFI0, MSIOF_MSIOFI1, + I2C1, I2C0 } }, + { 0xa4080020, 0, 16, 4, /* IPRI */ { /*SCIFA4*/SCIFA_SCIFA1, /*ICB*/0, + TSIF_TSIFI, _2DG/*ICB?*/ } }, + { 0xa4080024, 0, 16, 4, /* IPRJ */ { CEU21I, ETHI, FSI_FSI, SDHI1 } }, + { 0xa4080028, 0, 16, 4, /* IPRK */ { RTC, DMAC1B, /*ICB?*/0, SDHI0 } }, + { 0xa408002c, 0, 16, 4, /* IPRL */ { /*SCIFA5*/SCIFA_SCIFA2, 0, + TPU_TPUI, /*2DDMAC*/0 } }, + { 0xa4140010, 0, 32, 4, /* INTPRI00 */ + { IRQ0, IRQ1, IRQ2, IRQ3, IRQ4, IRQ5, IRQ6, IRQ7 } }, +}; + +static struct intc_sense_reg sense_registers[] __initdata = { + { 0xa414001c, 16, 2, /* ICR1 */ + { IRQ0, IRQ1, IRQ2, IRQ3, IRQ4, IRQ5, IRQ6, IRQ7 } }, +}; + +static struct intc_mask_reg ack_registers[] __initdata = { + { 0xa4140024, 0, 8, /* INTREQ00 */ + { IRQ0, IRQ1, IRQ2, IRQ3, IRQ4, IRQ5, IRQ6, IRQ7 } }, +}; + +static DECLARE_INTC_DESC_ACK(intc_desc, "sh7724", vectors, groups, + mask_registers, prio_registers, sense_registers, + ack_registers); + +void __init plat_irq_setup(void) +{ + register_intc_controller(&intc_desc); +} diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index 04a6004..0e6ed80 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -435,7 +435,8 @@ static const char *cpu_name[] = { [CPU_SH7722] = "SH7722", [CPU_SHX3] = "SH-X3", [CPU_SH5_101] = "SH5-101", [CPU_SH5_103] = "SH5-103", [CPU_MXG] = "MX-G", [CPU_SH7723] = "SH7723", - [CPU_SH7366] = "SH7366", [CPU_SH_NONE] = "Unknown" + [CPU_SH7366] = "SH7366", [CPU_SH7724] = "SH7724", + [CPU_SH_NONE] = "Unknown" }; const char *get_cpu_subtype(struct sh_cpuinfo *c) diff --git a/arch/sh/oprofile/common.c b/arch/sh/oprofile/common.c index 1b9d430..44f4e31 100644 --- a/arch/sh/oprofile/common.c +++ b/arch/sh/oprofile/common.c @@ -109,6 +109,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) case CPU_SH7785: case CPU_SH7786: case CPU_SH7723: + case CPU_SH7724: case CPU_SHX3: lmodel = &op_model_sh4a_ops; break; -- cgit v0.10.2 From 47948d2bd6d27648a107a27357b3bc5ad054ff64 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 15 Apr 2009 11:42:47 +0900 Subject: serial: sh-sci: SH7724 support. Signed-off-by: Kuninori Morimoto Signed-off-by: Paul Mundt diff --git a/drivers/serial/sh-sci.h b/drivers/serial/sh-sci.h index d0aa82d..84cc651 100644 --- a/drivers/serial/sh-sci.h +++ b/drivers/serial/sh-sci.h @@ -91,6 +91,9 @@ # define SCSPTR5 0xa4050128 # define SCIF_ORER 0x0001 /* overrun error bit */ # define SCSCR_INIT(port) 0x0038 /* TIE=0,RIE=0,TE=1,RE=1,REIE=1 */ +#elif defined(CONFIG_CPU_SUBTYPE_SH7724) +# define SCIF_ORER 0x0001 /* overrun error bit */ +# define SCSCR_INIT(port) 0x0038 /* TIE=0,RIE=0,TE=1,RE=1,REIE=1 */ #elif defined(CONFIG_CPU_SUBTYPE_SH4_202) # define SCSPTR2 0xffe80020 /* 16 bit SCIF */ # define SCIF_ORER 0x0001 /* overrun error bit */ @@ -361,7 +364,8 @@ h8_sci_offset, h8_sci_size) \ CPU_SCI_FNS(name, h8_sci_offset, h8_sci_size) #define SCIF_FNS(name, sh3_scif_offset, sh3_scif_size, sh4_scif_offset, sh4_scif_size) -#elif defined(CONFIG_CPU_SUBTYPE_SH7723) +#elif defined(CONFIG_CPU_SUBTYPE_SH7723) ||\ + defined(CONFIG_CPU_SUBTYPE_SH7724) #define SCIx_FNS(name, sh4_scifa_offset, sh4_scifa_size, sh4_scif_offset, sh4_scif_size) \ CPU_SCIx_FNS(name, sh4_scifa_offset, sh4_scifa_size, sh4_scif_offset, sh4_scif_size) #define SCIF_FNS(name, sh4_scif_offset, sh4_scif_size) \ @@ -390,7 +394,8 @@ SCIF_FNS(SCFDR, 0x1c, 16) SCIF_FNS(SCxTDR, 0x20, 8) SCIF_FNS(SCxRDR, 0x24, 8) SCIF_FNS(SCLSR, 0x24, 16) -#elif defined(CONFIG_CPU_SUBTYPE_SH7723) +#elif defined(CONFIG_CPU_SUBTYPE_SH7723) ||\ + defined(CONFIG_CPU_SUBTYPE_SH7724) SCIx_FNS(SCSMR, 0x00, 16, 0x00, 16) SCIx_FNS(SCBRR, 0x04, 8, 0x04, 8) SCIx_FNS(SCSCR, 0x08, 16, 0x08, 16) @@ -604,6 +609,17 @@ static inline int sci_rxd_in(struct uart_port *port) return ctrl_inb(SCSPTR5) & 0x0008 ? 1 : 0; /* SCIF5 */ return 1; } +#elif defined(CONFIG_CPU_SUBTYPE_SH7724) +# define SCFSR 0x0010 +# define SCASSR 0x0014 +static inline int sci_rxd_in(struct uart_port *port) +{ + if (port->type == PORT_SCIF) + return ctrl_inw((port->mapbase + SCFSR)) & SCIF_BRK ? 1 : 0; + if (port->type == PORT_SCIFA) + return ctrl_inw((port->mapbase + SCASSR)) & SCIF_BRK ? 1 : 0; + return 1; +} #elif defined(CONFIG_CPU_SUBTYPE_SH5_101) || defined(CONFIG_CPU_SUBTYPE_SH5_103) static inline int sci_rxd_in(struct uart_port *port) { @@ -757,7 +773,8 @@ static inline int sci_rxd_in(struct uart_port *port) defined(CONFIG_CPU_SUBTYPE_SH7720) || \ defined(CONFIG_CPU_SUBTYPE_SH7721) #define SCBRR_VALUE(bps, clk) (((clk*2)+16*bps)/(32*bps)-1) -#elif defined(CONFIG_CPU_SUBTYPE_SH7723) +#elif defined(CONFIG_CPU_SUBTYPE_SH7723) ||\ + defined(CONFIG_CPU_SUBTYPE_SH7724) static inline int scbrr_calc(struct uart_port *port, int bps, int clk) { if (port->type == PORT_SCIF) -- cgit v0.10.2 From 40c7e8be556715079d0a9d7454ceb5371a2f0b39 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 16 Apr 2009 13:16:07 +0900 Subject: sh: sh7724: Add I2C support. This adds support for the SH-Mobile I2C controller on the SH7724. Signed-off-by: Kuninori Morimoto Signed-off-by: Paul Mundt diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c index 4327b1e..f17eda6 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c @@ -99,9 +99,55 @@ static struct platform_device rtc_device = { .resource = rtc_resources, }; +/* I2C0 */ +static struct resource iic0_resources[] = { + [0] = { + .name = "IIC0", + .start = 0x04470000, + .end = 0x04470018 - 1, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = 96, + .end = 99, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device iic0_device = { + .name = "i2c-sh_mobile", + .id = 0, /* "i2c0" clock */ + .num_resources = ARRAY_SIZE(iic0_resources), + .resource = iic0_resources, +}; + +/* I2C1 */ +static struct resource iic1_resources[] = { + [0] = { + .name = "IIC1", + .start = 0x04750000, + .end = 0x04750018 - 1, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = 92, + .end = 95, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device iic1_device = { + .name = "i2c-sh_mobile", + .id = 1, /* "i2c1" clock */ + .num_resources = ARRAY_SIZE(iic1_resources), + .resource = iic1_resources, +}; + static struct platform_device *sh7724_devices[] __initdata = { &sci_device, &rtc_device, + &iic0_device, + &iic1_device, }; static int __init sh7724_devices_setup(void) -- cgit v0.10.2 From cd5b9ef776feff440e7a889d1a565ceabfecbfa1 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 15 Apr 2009 11:43:03 +0900 Subject: sh: sh7724: Add VPU support. This adds uio_pdrv_genirq support for the VPU. Signed-off-by: Kuninori Morimoto Signed-off-by: Paul Mundt diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c index f17eda6..499a6fc 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c @@ -143,16 +143,49 @@ static struct platform_device iic1_device = { .resource = iic1_resources, }; +/* VPU */ +static struct uio_info vpu_platform_data = { + .name = "VPU5F", + .version = "0", + .irq = 60, +}; + +static struct resource vpu_resources[] = { + [0] = { + .name = "VPU", + .start = 0xfe900000, + .end = 0xfe902807, + .flags = IORESOURCE_MEM, + }, + [1] = { + /* place holder for contiguous memory */ + }, +}; + +static struct platform_device vpu_device = { + .name = "uio_pdrv_genirq", + .id = 0, + .dev = { + .platform_data = &vpu_platform_data, + }, + .resource = vpu_resources, + .num_resources = ARRAY_SIZE(vpu_resources), +}; + static struct platform_device *sh7724_devices[] __initdata = { &sci_device, &rtc_device, &iic0_device, &iic1_device, + &vpu_device, }; static int __init sh7724_devices_setup(void) { clk_always_enable("rtc0"); /* RTC */ + clk_always_enable("vpu0"); /* VPU */ + + platform_resource_setup_memory(&vpu_device, "vpu", 2 << 20); return platform_add_devices(sh7724_devices, ARRAY_SIZE(sh7724_devices)); -- cgit v0.10.2 From ad95b78c9f735da11ff9ec760e9b038cd82aead6 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 15 Apr 2009 11:43:07 +0900 Subject: sh: sh7724: Add VEU support. This adds uio_pdrv_genirq support for the VEU. Signed-off-by: Kuninori Morimoto Signed-off-by: Paul Mundt diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c index 499a6fc..65570ed 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c @@ -172,20 +172,84 @@ static struct platform_device vpu_device = { .num_resources = ARRAY_SIZE(vpu_resources), }; +/* VEU0 */ +static struct uio_info veu0_platform_data = { + .name = "VEU3F0", + .version = "0", + .irq = 83, +}; + +static struct resource veu0_resources[] = { + [0] = { + .name = "VEU3F0", + .start = 0xfe920000, + .end = 0xfe9200cb - 1, + .flags = IORESOURCE_MEM, + }, + [1] = { + /* place holder for contiguous memory */ + }, +}; + +static struct platform_device veu0_device = { + .name = "uio_pdrv_genirq", + .id = 1, + .dev = { + .platform_data = &veu0_platform_data, + }, + .resource = veu0_resources, + .num_resources = ARRAY_SIZE(veu0_resources), +}; + +/* VEU1 */ +static struct uio_info veu1_platform_data = { + .name = "VEU3F1", + .version = "0", + .irq = 54, +}; + +static struct resource veu1_resources[] = { + [0] = { + .name = "VEU3F1", + .start = 0xfe924000, + .end = 0xfe9240cb - 1, + .flags = IORESOURCE_MEM, + }, + [1] = { + /* place holder for contiguous memory */ + }, +}; + +static struct platform_device veu1_device = { + .name = "uio_pdrv_genirq", + .id = 2, + .dev = { + .platform_data = &veu1_platform_data, + }, + .resource = veu1_resources, + .num_resources = ARRAY_SIZE(veu1_resources), +}; + static struct platform_device *sh7724_devices[] __initdata = { &sci_device, &rtc_device, &iic0_device, &iic1_device, &vpu_device, + &veu0_device, + &veu1_device, }; static int __init sh7724_devices_setup(void) { clk_always_enable("rtc0"); /* RTC */ clk_always_enable("vpu0"); /* VPU */ + clk_always_enable("veu1"); /* VEU3F1 */ + clk_always_enable("veu0"); /* VEU3F0 */ platform_resource_setup_memory(&vpu_device, "vpu", 2 << 20); + platform_resource_setup_memory(&veu0_device, "veu0", 2 << 20); + platform_resource_setup_memory(&veu1_device, "veu1", 2 << 20); return platform_add_devices(sh7724_devices, ARRAY_SIZE(sh7724_devices)); -- cgit v0.10.2 From 6a3395beb99d7ae882ddf701c6fa6005ad7edebf Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Thu, 16 Apr 2009 15:36:13 +0900 Subject: sh: sh7724: Add CMT clockevents support. This enables support for the CMT clockevents driver on SH7724. Signed-off-by: Paul Mundt diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c index 65570ed..8b87ba8 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c @@ -230,7 +230,40 @@ static struct platform_device veu1_device = { .num_resources = ARRAY_SIZE(veu1_resources), }; +static struct sh_cmt_config cmt_platform_data = { + .name = "CMT", + .channel_offset = 0x60, + .timer_bit = 5, + .clk = "cmt0", + .clockevent_rating = 125, + .clocksource_rating = 200, +}; + +static struct resource cmt_resources[] = { + [0] = { + .name = "CMT", + .start = 0x044a0060, + .end = 0x044a006b, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = 104, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device cmt_device = { + .name = "sh_cmt", + .id = 0, + .dev = { + .platform_data = &cmt_platform_data, + }, + .resource = cmt_resources, + .num_resources = ARRAY_SIZE(cmt_resources), +}; + static struct platform_device *sh7724_devices[] __initdata = { + &cmt_device, &sci_device, &rtc_device, &iic0_device, -- cgit v0.10.2 From 59fe700dcbf3d6257ae86ca8c0192fc64b2eea1c Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Thu, 16 Apr 2009 15:43:42 +0900 Subject: sh: Have SH7724 select ARCH_SHMOBILE. This is an SH-Mobile CPU, so select ARCH_SHMOBILE. This enables all of the PM functionality, amongst other things. Signed-off-by: Paul Mundt diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 505d1ac..4c68fde 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -351,6 +351,7 @@ config CPU_SUBTYPE_SH7724 bool "Support SH7724 processor" select CPU_SH4A select CPU_SHX2 + select ARCH_SHMOBILE select ARCH_SPARSEMEM_ENABLE select SYS_SUPPORTS_CMT help -- cgit v0.10.2 From bc9f3d4291f8275924a9197a81208a5df0a15095 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Thu, 16 Apr 2009 15:44:58 +0900 Subject: sh: Add a generic defconfig for SH7724 platforms. Signed-off-by: Paul Mundt diff --git a/arch/sh/configs/sh7724_generic_defconfig b/arch/sh/configs/sh7724_generic_defconfig new file mode 100644 index 0000000..268d04e --- /dev/null +++ b/arch/sh/configs/sh7724_generic_defconfig @@ -0,0 +1,707 @@ +# +# Automatically generated make config: don't edit +# Linux kernel version: 2.6.30-rc2 +# Thu Apr 16 15:42:20 2009 +# +CONFIG_SUPERH=y +CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set +CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" +CONFIG_RWSEM_GENERIC_SPINLOCK=y +CONFIG_GENERIC_BUG=y +CONFIG_GENERIC_FIND_NEXT_BIT=y +CONFIG_GENERIC_HWEIGHT=y +CONFIG_GENERIC_HARDIRQS=y +CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y +CONFIG_GENERIC_IRQ_PROBE=y +# CONFIG_GENERIC_GPIO is not set +CONFIG_GENERIC_TIME=y +CONFIG_GENERIC_CLOCKEVENTS=y +CONFIG_ARCH_SUSPEND_POSSIBLE=y +CONFIG_ARCH_HIBERNATION_POSSIBLE=y +CONFIG_SYS_SUPPORTS_CMT=y +CONFIG_STACKTRACE_SUPPORT=y +CONFIG_LOCKDEP_SUPPORT=y +CONFIG_HAVE_LATENCYTOP_SUPPORT=y +# CONFIG_ARCH_HAS_ILOG2_U32 is not set +# CONFIG_ARCH_HAS_ILOG2_U64 is not set +CONFIG_ARCH_NO_VIRT_TO_BUS=y +CONFIG_ARCH_HAS_DEFAULT_IDLE=y +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" + +# +# General setup +# +CONFIG_EXPERIMENTAL=y +CONFIG_BROKEN_ON_SMP=y +CONFIG_INIT_ENV_ARG_LIMIT=32 +CONFIG_LOCALVERSION="" +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +CONFIG_SYSVIPC_SYSCTL=y +# CONFIG_BSD_PROCESS_ACCT is not set + +# +# RCU Subsystem +# +# CONFIG_CLASSIC_RCU is not set +CONFIG_TREE_RCU=y +# CONFIG_PREEMPT_RCU is not set +# CONFIG_RCU_TRACE is not set +CONFIG_RCU_FANOUT=32 +# CONFIG_RCU_FANOUT_EXACT is not set +# CONFIG_TREE_RCU_TRACE is not set +# CONFIG_PREEMPT_RCU_TRACE is not set +# CONFIG_IKCONFIG is not set +CONFIG_LOG_BUF_SHIFT=17 +CONFIG_GROUP_SCHED=y +CONFIG_FAIR_GROUP_SCHED=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_USER_SCHED=y +# CONFIG_CGROUP_SCHED is not set +CONFIG_CGROUPS=y +# CONFIG_CGROUP_DEBUG is not set +# CONFIG_CGROUP_NS is not set +# CONFIG_CGROUP_FREEZER is not set +# CONFIG_CGROUP_DEVICE is not set +# CONFIG_CPUSETS is not set +# CONFIG_CGROUP_CPUACCT is not set +# CONFIG_RESOURCE_COUNTERS is not set +# CONFIG_RELAY is not set +# CONFIG_NAMESPACES is not set +# CONFIG_BLK_DEV_INITRD is not set +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SYSCTL=y +CONFIG_ANON_INODES=y +CONFIG_EMBEDDED=y +# CONFIG_UID16 is not set +CONFIG_SYSCTL_SYSCALL=y +CONFIG_KALLSYMS=y +# CONFIG_KALLSYMS_EXTRA_PASS is not set +# CONFIG_STRIP_ASM_SYMS is not set +CONFIG_HOTPLUG=y +CONFIG_PRINTK=y +CONFIG_BUG=y +CONFIG_ELF_CORE=y +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_EPOLL=y +CONFIG_SIGNALFD=y +CONFIG_TIMERFD=y +CONFIG_EVENTFD=y +CONFIG_SHMEM=y +CONFIG_AIO=y +CONFIG_VM_EVENT_COUNTERS=y +# CONFIG_COMPAT_BRK is not set +# CONFIG_SLAB is not set +CONFIG_SLUB=y +# CONFIG_SLOB is not set +CONFIG_PROFILING=y +CONFIG_TRACEPOINTS=y +# CONFIG_MARKERS is not set +CONFIG_OPROFILE=y +CONFIG_HAVE_OPROFILE=y +CONFIG_HAVE_IOREMAP_PROT=y +CONFIG_HAVE_KPROBES=y +CONFIG_HAVE_KRETPROBES=y +CONFIG_HAVE_ARCH_TRACEHOOK=y +CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set +CONFIG_HAVE_GENERIC_DMA_COHERENT=y +CONFIG_RT_MUTEXES=y +CONFIG_BASE_SMALL=0 +# CONFIG_MODULES is not set +CONFIG_BLOCK=y +# CONFIG_LBD is not set +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_BLK_DEV_INTEGRITY is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +CONFIG_DEFAULT_AS=y +# CONFIG_DEFAULT_DEADLINE is not set +# CONFIG_DEFAULT_CFQ is not set +# CONFIG_DEFAULT_NOOP is not set +CONFIG_DEFAULT_IOSCHED="anticipatory" +CONFIG_FREEZER=y + +# +# System type +# +CONFIG_CPU_SH4=y +CONFIG_CPU_SH4A=y +CONFIG_CPU_SHX2=y +CONFIG_ARCH_SHMOBILE=y +# CONFIG_CPU_SUBTYPE_SH7619 is not set +# CONFIG_CPU_SUBTYPE_SH7201 is not set +# CONFIG_CPU_SUBTYPE_SH7203 is not set +# CONFIG_CPU_SUBTYPE_SH7206 is not set +# CONFIG_CPU_SUBTYPE_SH7263 is not set +# CONFIG_CPU_SUBTYPE_MXG is not set +# CONFIG_CPU_SUBTYPE_SH7705 is not set +# CONFIG_CPU_SUBTYPE_SH7706 is not set +# CONFIG_CPU_SUBTYPE_SH7707 is not set +# CONFIG_CPU_SUBTYPE_SH7708 is not set +# CONFIG_CPU_SUBTYPE_SH7709 is not set +# CONFIG_CPU_SUBTYPE_SH7710 is not set +# CONFIG_CPU_SUBTYPE_SH7712 is not set +# CONFIG_CPU_SUBTYPE_SH7720 is not set +# CONFIG_CPU_SUBTYPE_SH7721 is not set +# CONFIG_CPU_SUBTYPE_SH7750 is not set +# CONFIG_CPU_SUBTYPE_SH7091 is not set +# CONFIG_CPU_SUBTYPE_SH7750R is not set +# CONFIG_CPU_SUBTYPE_SH7750S is not set +# CONFIG_CPU_SUBTYPE_SH7751 is not set +# CONFIG_CPU_SUBTYPE_SH7751R is not set +# CONFIG_CPU_SUBTYPE_SH7760 is not set +# CONFIG_CPU_SUBTYPE_SH4_202 is not set +# CONFIG_CPU_SUBTYPE_SH7723 is not set +CONFIG_CPU_SUBTYPE_SH7724=y +# CONFIG_CPU_SUBTYPE_SH7763 is not set +# CONFIG_CPU_SUBTYPE_SH7770 is not set +# CONFIG_CPU_SUBTYPE_SH7780 is not set +# CONFIG_CPU_SUBTYPE_SH7785 is not set +# CONFIG_CPU_SUBTYPE_SH7786 is not set +# CONFIG_CPU_SUBTYPE_SHX3 is not set +# CONFIG_CPU_SUBTYPE_SH7343 is not set +# CONFIG_CPU_SUBTYPE_SH7722 is not set +# CONFIG_CPU_SUBTYPE_SH7366 is not set + +# +# Memory management options +# +CONFIG_QUICKLIST=y +CONFIG_MMU=y +CONFIG_PAGE_OFFSET=0x80000000 +CONFIG_MEMORY_START=0x08000000 +CONFIG_MEMORY_SIZE=0x04000000 +CONFIG_29BIT=y +# CONFIG_X2TLB is not set +CONFIG_VSYSCALL=y +CONFIG_ARCH_FLATMEM_ENABLE=y +CONFIG_ARCH_SPARSEMEM_ENABLE=y +CONFIG_ARCH_SPARSEMEM_DEFAULT=y +CONFIG_MAX_ACTIVE_REGIONS=1 +CONFIG_ARCH_POPULATES_NODE_MAP=y +CONFIG_ARCH_SELECT_MEMORY_MODEL=y +CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y +CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y +CONFIG_PAGE_SIZE_4KB=y +# CONFIG_PAGE_SIZE_8KB is not set +# CONFIG_PAGE_SIZE_16KB is not set +# CONFIG_PAGE_SIZE_64KB is not set +CONFIG_ENTRY_OFFSET=0x00001000 +CONFIG_SELECT_MEMORY_MODEL=y +# CONFIG_FLATMEM_MANUAL is not set +# CONFIG_DISCONTIGMEM_MANUAL is not set +CONFIG_SPARSEMEM_MANUAL=y +CONFIG_SPARSEMEM=y +CONFIG_HAVE_MEMORY_PRESENT=y +CONFIG_SPARSEMEM_STATIC=y + +# +# Memory hotplug is currently incompatible with Software Suspend +# +CONFIG_PAGEFLAGS_EXTENDED=y +CONFIG_SPLIT_PTLOCK_CPUS=4 +CONFIG_MIGRATION=y +# CONFIG_PHYS_ADDR_T_64BIT is not set +CONFIG_ZONE_DMA_FLAG=0 +CONFIG_NR_QUICK=2 +CONFIG_UNEVICTABLE_LRU=y +CONFIG_HAVE_MLOCK=y +CONFIG_HAVE_MLOCKED_PAGE_BIT=y + +# +# Cache configuration +# +CONFIG_CACHE_WRITEBACK=y +# CONFIG_CACHE_WRITETHROUGH is not set +# CONFIG_CACHE_OFF is not set + +# +# Processor features +# +CONFIG_CPU_LITTLE_ENDIAN=y +# CONFIG_CPU_BIG_ENDIAN is not set +CONFIG_SH_FPU=y +# CONFIG_SH_STORE_QUEUES is not set +CONFIG_CPU_HAS_INTEVT=y +CONFIG_CPU_HAS_SR_RB=y +CONFIG_CPU_HAS_PTEA=y +CONFIG_CPU_HAS_FPU=y + +# +# Board support +# + +# +# Timer and clock configuration +# +CONFIG_SH_TMU=y +CONFIG_SH_TIMER_CMT=y +CONFIG_SH_TIMER_IRQ=16 +CONFIG_SH_PCLK_FREQ=41666666 +CONFIG_TICK_ONESHOT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_GENERIC_CLOCKEVENTS_BUILD=y + +# +# CPU Frequency scaling +# +CONFIG_CPU_FREQ=y +CONFIG_CPU_FREQ_TABLE=y +# CONFIG_CPU_FREQ_DEBUG is not set +CONFIG_CPU_FREQ_STAT=y +# CONFIG_CPU_FREQ_STAT_DETAILS is not set +CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y +# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set +CONFIG_CPU_FREQ_GOV_PERFORMANCE=y +# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set +# CONFIG_CPU_FREQ_GOV_USERSPACE is not set +# CONFIG_CPU_FREQ_GOV_ONDEMAND is not set +# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set +CONFIG_SH_CPU_FREQ=y + +# +# DMA support +# +# CONFIG_SH_DMA is not set + +# +# Companion Chips +# + +# +# Additional SuperH Device Drivers +# +# CONFIG_HEARTBEAT is not set +# CONFIG_PUSH_SWITCH is not set + +# +# Kernel features +# +# CONFIG_HZ_100 is not set +CONFIG_HZ_250=y +# CONFIG_HZ_300 is not set +# CONFIG_HZ_1000 is not set +CONFIG_HZ=250 +CONFIG_SCHED_HRTICK=y +CONFIG_KEXEC=y +# CONFIG_CRASH_DUMP is not set +CONFIG_KEXEC_JUMP=y +CONFIG_PREEMPT_NONE=y +# CONFIG_PREEMPT_VOLUNTARY is not set +# CONFIG_PREEMPT is not set +CONFIG_GUSA=y + +# +# Boot options +# +CONFIG_ZERO_PAGE_OFFSET=0x00001000 +CONFIG_BOOT_LINK_OFFSET=0x00800000 +# CONFIG_CMDLINE_BOOL is not set + +# +# Bus options +# +# CONFIG_ARCH_SUPPORTS_MSI is not set +# CONFIG_PCCARD is not set + +# +# Executable file formats +# +CONFIG_BINFMT_ELF=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +# CONFIG_HAVE_AOUT is not set +# CONFIG_BINFMT_MISC is not set + +# +# Power management options (EXPERIMENTAL) +# +CONFIG_PM=y +# CONFIG_PM_DEBUG is not set +CONFIG_PM_SLEEP=y +CONFIG_SUSPEND=y +CONFIG_SUSPEND_FREEZER=y +CONFIG_HIBERNATION=y +CONFIG_PM_STD_PARTITION="" +CONFIG_CPU_IDLE=y +CONFIG_CPU_IDLE_GOV_LADDER=y +CONFIG_CPU_IDLE_GOV_MENU=y +# CONFIG_NET is not set + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_STANDALONE=y +# CONFIG_PREVENT_FIRMWARE_BUILD is not set +CONFIG_FW_LOADER=y +CONFIG_FIRMWARE_IN_KERNEL=y +CONFIG_EXTRA_FIRMWARE="" +# CONFIG_SYS_HYPERVISOR is not set +# CONFIG_MTD is not set +# CONFIG_PARPORT is not set +CONFIG_BLK_DEV=y +# CONFIG_BLK_DEV_COW_COMMON is not set +# CONFIG_BLK_DEV_LOOP is not set +# CONFIG_BLK_DEV_RAM is not set +# CONFIG_CDROM_PKTCDVD is not set +# CONFIG_BLK_DEV_HD is not set +# CONFIG_MISC_DEVICES is not set +CONFIG_HAVE_IDE=y +# CONFIG_IDE is not set + +# +# SCSI device support +# +# CONFIG_RAID_ATTRS is not set +# CONFIG_SCSI is not set +# CONFIG_SCSI_DMA is not set +# CONFIG_SCSI_NETLINK is not set +# CONFIG_ATA is not set +# CONFIG_MD is not set +# CONFIG_PHONE is not set + +# +# Input device support +# +# CONFIG_INPUT is not set + +# +# Hardware I/O ports +# +# CONFIG_SERIO is not set +# CONFIG_GAMEPORT is not set + +# +# Character devices +# +# CONFIG_VT is not set +# CONFIG_DEVKMEM is not set +# CONFIG_SERIAL_NONSTANDARD is not set + +# +# Serial drivers +# +# CONFIG_SERIAL_8250 is not set + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_SH_SCI=y +CONFIG_SERIAL_SH_SCI_NR_UARTS=6 +CONFIG_SERIAL_SH_SCI_CONSOLE=y +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_UNIX98_PTYS is not set +# CONFIG_LEGACY_PTYS is not set +# CONFIG_IPMI_HANDLER is not set +# CONFIG_HW_RANDOM is not set +# CONFIG_R3964 is not set +# CONFIG_RAW_DRIVER is not set +# CONFIG_TCG_TPM is not set +CONFIG_I2C=y +CONFIG_I2C_BOARDINFO=y +CONFIG_I2C_CHARDEV=y +CONFIG_I2C_HELPER_AUTO=y + +# +# I2C Hardware Bus support +# + +# +# I2C system bus drivers (mostly embedded / system-on-chip) +# +# CONFIG_I2C_OCORES is not set +CONFIG_I2C_SH_MOBILE=y +# CONFIG_I2C_SIMTEC is not set + +# +# External I2C/SMBus adapter drivers +# +# CONFIG_I2C_PARPORT_LIGHT is not set +# CONFIG_I2C_TAOS_EVM is not set + +# +# Other I2C/SMBus bus drivers +# +# CONFIG_I2C_PCA_PLATFORM is not set + +# +# Miscellaneous I2C Chip support +# +# CONFIG_DS1682 is not set +# CONFIG_SENSORS_PCF8574 is not set +# CONFIG_PCF8575 is not set +# CONFIG_SENSORS_PCA9539 is not set +# CONFIG_SENSORS_MAX6875 is not set +# CONFIG_SENSORS_TSL2550 is not set +# CONFIG_I2C_DEBUG_CORE is not set +# CONFIG_I2C_DEBUG_ALGO is not set +# CONFIG_I2C_DEBUG_BUS is not set +# CONFIG_I2C_DEBUG_CHIP is not set +# CONFIG_SPI is not set +# CONFIG_W1 is not set +# CONFIG_POWER_SUPPLY is not set +# CONFIG_HWMON is not set +# CONFIG_THERMAL is not set +# CONFIG_THERMAL_HWMON is not set +# CONFIG_WATCHDOG is not set +CONFIG_SSB_POSSIBLE=y + +# +# Sonics Silicon Backplane +# +# CONFIG_SSB is not set + +# +# Multifunction device drivers +# +# CONFIG_MFD_CORE is not set +# CONFIG_MFD_SM501 is not set +# CONFIG_HTC_PASIC3 is not set +# CONFIG_TWL4030_CORE is not set +# CONFIG_MFD_TMIO is not set +# CONFIG_PMIC_DA903X is not set +# CONFIG_MFD_WM8400 is not set +# CONFIG_MFD_WM8350_I2C is not set +# CONFIG_MFD_PCF50633 is not set +# CONFIG_REGULATOR is not set + +# +# Multimedia devices +# + +# +# Multimedia core support +# +# CONFIG_VIDEO_DEV is not set +# CONFIG_VIDEO_MEDIA is not set + +# +# Multimedia drivers +# +# CONFIG_DAB is not set + +# +# Graphics support +# +# CONFIG_VGASTATE is not set +# CONFIG_VIDEO_OUTPUT_CONTROL is not set +# CONFIG_FB is not set +# CONFIG_BACKLIGHT_LCD_SUPPORT is not set + +# +# Display device support +# +# CONFIG_DISPLAY_SUPPORT is not set +# CONFIG_SOUND is not set +# CONFIG_USB_SUPPORT is not set +# CONFIG_MMC is not set +# CONFIG_MEMSTICK is not set +# CONFIG_NEW_LEDS is not set +# CONFIG_ACCESSIBILITY is not set +CONFIG_RTC_LIB=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_HCTOSYS=y +CONFIG_RTC_HCTOSYS_DEVICE="rtc0" +# CONFIG_RTC_DEBUG is not set + +# +# RTC interfaces +# +CONFIG_RTC_INTF_DEV=y +# CONFIG_RTC_INTF_DEV_UIE_EMUL is not set +# CONFIG_RTC_DRV_TEST is not set + +# +# I2C RTC drivers +# +# CONFIG_RTC_DRV_DS1307 is not set +# CONFIG_RTC_DRV_DS1374 is not set +# CONFIG_RTC_DRV_DS1672 is not set +# CONFIG_RTC_DRV_MAX6900 is not set +# CONFIG_RTC_DRV_RS5C372 is not set +# CONFIG_RTC_DRV_ISL1208 is not set +# CONFIG_RTC_DRV_X1205 is not set +# CONFIG_RTC_DRV_PCF8563 is not set +# CONFIG_RTC_DRV_PCF8583 is not set +# CONFIG_RTC_DRV_M41T80 is not set +# CONFIG_RTC_DRV_S35390A is not set +# CONFIG_RTC_DRV_FM3130 is not set +# CONFIG_RTC_DRV_RX8581 is not set + +# +# SPI RTC drivers +# + +# +# Platform RTC drivers +# +# CONFIG_RTC_DRV_DS1286 is not set +# CONFIG_RTC_DRV_DS1511 is not set +# CONFIG_RTC_DRV_DS1553 is not set +# CONFIG_RTC_DRV_DS1742 is not set +# CONFIG_RTC_DRV_STK17TA8 is not set +# CONFIG_RTC_DRV_M48T86 is not set +# CONFIG_RTC_DRV_M48T35 is not set +# CONFIG_RTC_DRV_M48T59 is not set +# CONFIG_RTC_DRV_BQ4802 is not set +# CONFIG_RTC_DRV_V3020 is not set + +# +# on-CPU RTC drivers +# +CONFIG_RTC_DRV_SH=y +# CONFIG_DMADEVICES is not set +# CONFIG_AUXDISPLAY is not set +CONFIG_UIO=y +# CONFIG_UIO_PDRV is not set +CONFIG_UIO_PDRV_GENIRQ=y +# CONFIG_UIO_SMX is not set +# CONFIG_UIO_SERCOS3 is not set +# CONFIG_STAGING is not set + +# +# File systems +# +# CONFIG_EXT2_FS is not set +# CONFIG_EXT3_FS is not set +# CONFIG_EXT4_FS is not set +# CONFIG_REISERFS_FS is not set +# CONFIG_JFS_FS is not set +# CONFIG_FS_POSIX_ACL is not set +CONFIG_FILE_LOCKING=y +# CONFIG_XFS_FS is not set +# CONFIG_BTRFS_FS is not set +# CONFIG_DNOTIFY is not set +# CONFIG_INOTIFY is not set +# CONFIG_QUOTA is not set +# CONFIG_AUTOFS_FS is not set +# CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set + +# +# Caches +# +# CONFIG_FSCACHE is not set + +# +# CD-ROM/DVD Filesystems +# +# CONFIG_ISO9660_FS is not set +# CONFIG_UDF_FS is not set + +# +# DOS/FAT/NT Filesystems +# +# CONFIG_MSDOS_FS is not set +# CONFIG_VFAT_FS is not set +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +# CONFIG_PROC_FS is not set +# CONFIG_SYSFS is not set +# CONFIG_TMPFS is not set +# CONFIG_HUGETLBFS is not set +# CONFIG_HUGETLB_PAGE is not set +# CONFIG_MISC_FILESYSTEMS is not set + +# +# Partition Types +# +# CONFIG_PARTITION_ADVANCED is not set +CONFIG_MSDOS_PARTITION=y +# CONFIG_NLS is not set + +# +# Kernel hacking +# +CONFIG_TRACE_IRQFLAGS_SUPPORT=y +# CONFIG_PRINTK_TIME is not set +# CONFIG_ENABLE_WARN_DEPRECATED is not set +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_FRAME_WARN=1024 +# CONFIG_MAGIC_SYSRQ is not set +# CONFIG_UNUSED_SYMBOLS is not set +CONFIG_DEBUG_FS=y +# CONFIG_HEADERS_CHECK is not set +# CONFIG_DEBUG_KERNEL is not set +CONFIG_STACKTRACE=y +# CONFIG_DEBUG_BUGVERBOSE is not set +# CONFIG_DEBUG_MEMORY_INIT is not set +# CONFIG_RCU_CPU_STALL_DETECTOR is not set +# CONFIG_LATENCYTOP is not set +# CONFIG_SYSCTL_SYSCALL_CHECK is not set +CONFIG_NOP_TRACER=y +CONFIG_HAVE_FUNCTION_TRACER=y +CONFIG_HAVE_DYNAMIC_FTRACE=y +CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_RING_BUFFER=y +CONFIG_TRACING=y +CONFIG_TRACING_SUPPORT=y + +# +# Tracers +# +# CONFIG_FUNCTION_TRACER is not set +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_SCHED_TRACER is not set +# CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set +# CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_FTRACE_STARTUP_TEST is not set +# CONFIG_DYNAMIC_DEBUG is not set +# CONFIG_DMA_API_DEBUG is not set +# CONFIG_SAMPLES is not set +CONFIG_HAVE_ARCH_KGDB=y +# CONFIG_SH_STANDARD_BIOS is not set +# CONFIG_EARLY_SCIF_CONSOLE is not set +# CONFIG_MORE_COMPILE_OPTIONS is not set + +# +# Security options +# +# CONFIG_KEYS is not set +# CONFIG_SECURITYFS is not set +# CONFIG_SECURITY_FILE_CAPABILITIES is not set +# CONFIG_CRYPTO is not set +CONFIG_BINARY_PRINTF=y + +# +# Library routines +# +CONFIG_GENERIC_FIND_LAST_BIT=y +# CONFIG_CRC_CCITT is not set +# CONFIG_CRC16 is not set +# CONFIG_CRC_T10DIF is not set +# CONFIG_CRC_ITU_T is not set +# CONFIG_CRC32 is not set +# CONFIG_CRC7 is not set +# CONFIG_LIBCRC32C is not set +CONFIG_HAS_IOMEM=y +CONFIG_HAS_IOPORT=y +CONFIG_HAS_DMA=y -- cgit v0.10.2 From 4e01f54bfd3f423db8fd6c91c4f0471f18aa0c50 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 16 Apr 2009 08:53:34 +0200 Subject: ALSA: hda - Add Creative CA0110-IBG support Added the support for Creative SB X-Fi boards with UAA (HD-audio) mode. In the HD-audio mode, no multiple streams are supported by just it behaves like a normal HD-audio device. Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/Kconfig b/sound/pci/hda/Kconfig index eb2a19b..c710150 100644 --- a/sound/pci/hda/Kconfig +++ b/sound/pci/hda/Kconfig @@ -139,6 +139,19 @@ config SND_HDA_CODEC_CONEXANT snd-hda-codec-conexant. This module is automatically loaded at probing. +config SND_HDA_CODEC_CA0110 + bool "Build Creative CA0110-IBG codec support" + depends on SND_HDA_INTEL + default y + help + Say Y here to include Creative CA0110-IBG codec support in + snd-hda-intel driver, found on some Creative X-Fi cards. + + When the HD-audio driver is built as a module, the codec + support code is also built as another module, + snd-hda-codec-ca0110. + This module is automatically loaded at probing. + config SND_HDA_CODEC_CMEDIA bool "Build C-Media HD-audio codec support" default y diff --git a/sound/pci/hda/Makefile b/sound/pci/hda/Makefile index 50f9d09..e3081d4 100644 --- a/sound/pci/hda/Makefile +++ b/sound/pci/hda/Makefile @@ -13,6 +13,7 @@ snd-hda-codec-analog-objs := patch_analog.o snd-hda-codec-idt-objs := patch_sigmatel.o snd-hda-codec-si3054-objs := patch_si3054.o snd-hda-codec-atihdmi-objs := patch_atihdmi.o +snd-hda-codec-ca0110-objs := patch_ca0110.o snd-hda-codec-conexant-objs := patch_conexant.o snd-hda-codec-via-objs := patch_via.o snd-hda-codec-nvhdmi-objs := patch_nvhdmi.o @@ -40,6 +41,9 @@ endif ifdef CONFIG_SND_HDA_CODEC_ATIHDMI obj-$(CONFIG_SND_HDA_INTEL) += snd-hda-codec-atihdmi.o endif +ifdef CONFIG_SND_HDA_CODEC_CA0110 +obj-$(CONFIG_SND_HDA_INTEL) += snd-hda-codec-ca0110.o +endif ifdef CONFIG_SND_HDA_CODEC_CONEXANT obj-$(CONFIG_SND_HDA_INTEL) += snd-hda-codec-conexant.o endif diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index fd6e6f3..37f24ce 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -48,6 +48,7 @@ static struct hda_vendor_id hda_vendor_ids[] = { { 0x1095, "Silicon Image" }, { 0x10de, "Nvidia" }, { 0x10ec, "Realtek" }, + { 0x1102, "Creative" }, { 0x1106, "VIA" }, { 0x111d, "IDT" }, { 0x11c1, "LSI" }, diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 21e99cf..21a3092 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2513,6 +2513,11 @@ static struct pci_device_id azx_ids[] = { { PCI_DEVICE(0x10de, 0x0d97), .driver_data = AZX_DRIVER_NVIDIA }, /* Teradici */ { PCI_DEVICE(0x6549, 0x1200), .driver_data = AZX_DRIVER_TERA }, + /* Creative X-Fi (CA0110-IBG) */ + { PCI_DEVICE(PCI_VENDOR_ID_CREATIVE, PCI_ANY_ID), + .class = PCI_CLASS_MULTIMEDIA_HD_AUDIO << 8, + .class_mask = 0xffffff, + .driver_data = AZX_DRIVER_GENERIC }, /* AMD Generic, PCI class code and Vendor ID for HD Audio */ { PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_ANY_ID), .class = PCI_CLASS_MULTIMEDIA_HD_AUDIO << 8, diff --git a/sound/pci/hda/patch_ca0110.c b/sound/pci/hda/patch_ca0110.c new file mode 100644 index 0000000..7ec41da --- /dev/null +++ b/sound/pci/hda/patch_ca0110.c @@ -0,0 +1,574 @@ +/* + * HD audio interface patch for Creative X-Fi CA0110-IBG chip + * + * Copyright (c) 2008 Takashi Iwai + * + * This driver is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This driver is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include "hda_codec.h" +#include "hda_local.h" + +/* + */ + +struct ca0110_spec { + struct auto_pin_cfg autocfg; + struct hda_multi_out multiout; + hda_nid_t out_pins[AUTO_CFG_MAX_OUTS]; + hda_nid_t dacs[AUTO_CFG_MAX_OUTS]; + hda_nid_t hp_dac; + hda_nid_t input_pins[AUTO_PIN_LAST]; + hda_nid_t adcs[AUTO_PIN_LAST]; + hda_nid_t dig_out; + hda_nid_t dig_in; + unsigned int num_inputs; + const char *input_labels[AUTO_PIN_LAST]; + struct hda_pcm pcm_rec[2]; /* PCM information */ +}; + +/* + * PCM callbacks + */ +static int ca0110_playback_pcm_open(struct hda_pcm_stream *hinfo, + struct hda_codec *codec, + struct snd_pcm_substream *substream) +{ + struct ca0110_spec *spec = codec->spec; + return snd_hda_multi_out_analog_open(codec, &spec->multiout, substream, + hinfo); +} + +static int ca0110_playback_pcm_prepare(struct hda_pcm_stream *hinfo, + struct hda_codec *codec, + unsigned int stream_tag, + unsigned int format, + struct snd_pcm_substream *substream) +{ + struct ca0110_spec *spec = codec->spec; + return snd_hda_multi_out_analog_prepare(codec, &spec->multiout, + stream_tag, format, substream); +} + +static int ca0110_playback_pcm_cleanup(struct hda_pcm_stream *hinfo, + struct hda_codec *codec, + struct snd_pcm_substream *substream) +{ + struct ca0110_spec *spec = codec->spec; + return snd_hda_multi_out_analog_cleanup(codec, &spec->multiout); +} + +/* + * Digital out + */ +static int ca0110_dig_playback_pcm_open(struct hda_pcm_stream *hinfo, + struct hda_codec *codec, + struct snd_pcm_substream *substream) +{ + struct ca0110_spec *spec = codec->spec; + return snd_hda_multi_out_dig_open(codec, &spec->multiout); +} + +static int ca0110_dig_playback_pcm_close(struct hda_pcm_stream *hinfo, + struct hda_codec *codec, + struct snd_pcm_substream *substream) +{ + struct ca0110_spec *spec = codec->spec; + return snd_hda_multi_out_dig_close(codec, &spec->multiout); +} + +static int ca0110_dig_playback_pcm_prepare(struct hda_pcm_stream *hinfo, + struct hda_codec *codec, + unsigned int stream_tag, + unsigned int format, + struct snd_pcm_substream *substream) +{ + struct ca0110_spec *spec = codec->spec; + return snd_hda_multi_out_dig_prepare(codec, &spec->multiout, stream_tag, + format, substream); +} + +/* + * Analog capture + */ +static int ca0110_capture_pcm_prepare(struct hda_pcm_stream *hinfo, + struct hda_codec *codec, + unsigned int stream_tag, + unsigned int format, + struct snd_pcm_substream *substream) +{ + struct ca0110_spec *spec = codec->spec; + + snd_hda_codec_setup_stream(codec, spec->adcs[substream->number], + stream_tag, 0, format); + return 0; +} + +static int ca0110_capture_pcm_cleanup(struct hda_pcm_stream *hinfo, + struct hda_codec *codec, + struct snd_pcm_substream *substream) +{ + struct ca0110_spec *spec = codec->spec; + + snd_hda_codec_cleanup_stream(codec, spec->adcs[substream->number]); + return 0; +} + +/* + */ + +static char *dirstr[2] = { "Playback", "Capture" }; + +static int _add_switch(struct hda_codec *codec, hda_nid_t nid, const char *pfx, + int chan, int dir) +{ + char namestr[44]; + int type = dir ? HDA_INPUT : HDA_OUTPUT; + struct snd_kcontrol_new knew = + HDA_CODEC_MUTE_MONO(namestr, nid, chan, 0, type); + sprintf(namestr, "%s %s Switch", pfx, dirstr[dir]); + return snd_hda_ctl_add(codec, snd_ctl_new1(&knew, codec)); +} + +static int _add_volume(struct hda_codec *codec, hda_nid_t nid, const char *pfx, + int chan, int dir) +{ + char namestr[44]; + int type = dir ? HDA_INPUT : HDA_OUTPUT; + struct snd_kcontrol_new knew = + HDA_CODEC_VOLUME_MONO(namestr, nid, chan, 0, type); + sprintf(namestr, "%s %s Volume", pfx, dirstr[dir]); + return snd_hda_ctl_add(codec, snd_ctl_new1(&knew, codec)); +} + +#define add_out_switch(codec, nid, pfx) _add_switch(codec, nid, pfx, 3, 0) +#define add_out_volume(codec, nid, pfx) _add_volume(codec, nid, pfx, 3, 0) +#define add_in_switch(codec, nid, pfx) _add_switch(codec, nid, pfx, 3, 1) +#define add_in_volume(codec, nid, pfx) _add_volume(codec, nid, pfx, 3, 1) +#define add_mono_switch(codec, nid, pfx, chan) \ + _add_switch(codec, nid, pfx, chan, 0) +#define add_mono_volume(codec, nid, pfx, chan) \ + _add_volume(codec, nid, pfx, chan, 0) + +static int ca0110_build_controls(struct hda_codec *codec) +{ + struct ca0110_spec *spec = codec->spec; + struct auto_pin_cfg *cfg = &spec->autocfg; + static char *prefix[AUTO_CFG_MAX_OUTS] = { + "Front", "Surround", NULL, "Side", "Multi" + }; + hda_nid_t mutenid; + int i, err; + + for (i = 0; i < spec->multiout.num_dacs; i++) { + if (get_wcaps(codec, spec->out_pins[i]) & AC_WCAP_OUT_AMP) + mutenid = spec->out_pins[i]; + else + mutenid = spec->multiout.dac_nids[i]; + if (!prefix[i]) { + err = add_mono_switch(codec, mutenid, + "Center", 1); + if (err < 0) + return err; + err = add_mono_switch(codec, mutenid, + "LFE", 1); + if (err < 0) + return err; + err = add_mono_volume(codec, spec->multiout.dac_nids[i], + "Center", 1); + if (err < 0) + return err; + err = add_mono_volume(codec, spec->multiout.dac_nids[i], + "LFE", 1); + if (err < 0) + return err; + } else { + err = add_out_switch(codec, mutenid, + prefix[i]); + if (err < 0) + return err; + err = add_out_volume(codec, spec->multiout.dac_nids[i], + prefix[i]); + if (err < 0) + return err; + } + } + if (cfg->hp_outs) { + if (get_wcaps(codec, cfg->hp_pins[0]) & AC_WCAP_OUT_AMP) + mutenid = cfg->hp_pins[0]; + else + mutenid = spec->multiout.dac_nids[i]; + + err = add_out_switch(codec, mutenid, "Headphone"); + if (err < 0) + return err; + if (spec->hp_dac) { + err = add_out_volume(codec, spec->hp_dac, "Headphone"); + if (err < 0) + return err; + } + } + for (i = 0; i < spec->num_inputs; i++) { + const char *label = spec->input_labels[i]; + if (get_wcaps(codec, spec->input_pins[i]) & AC_WCAP_IN_AMP) + mutenid = spec->input_pins[i]; + else + mutenid = spec->adcs[i]; + err = add_in_switch(codec, mutenid, label); + if (err < 0) + return err; + err = add_in_volume(codec, spec->adcs[i], label); + if (err < 0) + return err; + } + + if (spec->dig_out) { + err = snd_hda_create_spdif_out_ctls(codec, spec->dig_out); + if (err < 0) + return err; + err = snd_hda_create_spdif_share_sw(codec, &spec->multiout); + if (err < 0) + return err; + spec->multiout.share_spdif = 1; + } + if (spec->dig_in) { + err = snd_hda_create_spdif_in_ctls(codec, spec->dig_in); + if (err < 0) + return err; + err = add_in_volume(codec, spec->dig_in, "IEC958"); + } + return 0; +} + +/* + */ +static struct hda_pcm_stream ca0110_pcm_analog_playback = { + .substreams = 1, + .channels_min = 2, + .channels_max = 8, + .ops = { + .open = ca0110_playback_pcm_open, + .prepare = ca0110_playback_pcm_prepare, + .cleanup = ca0110_playback_pcm_cleanup + }, +}; + +static struct hda_pcm_stream ca0110_pcm_analog_capture = { + .substreams = 1, + .channels_min = 2, + .channels_max = 2, + .ops = { + .prepare = ca0110_capture_pcm_prepare, + .cleanup = ca0110_capture_pcm_cleanup + }, +}; + +static struct hda_pcm_stream ca0110_pcm_digital_playback = { + .substreams = 1, + .channels_min = 2, + .channels_max = 2, + .ops = { + .open = ca0110_dig_playback_pcm_open, + .close = ca0110_dig_playback_pcm_close, + .prepare = ca0110_dig_playback_pcm_prepare + }, +}; + +static struct hda_pcm_stream ca0110_pcm_digital_capture = { + .substreams = 1, + .channels_min = 2, + .channels_max = 2, +}; + +static int ca0110_build_pcms(struct hda_codec *codec) +{ + struct ca0110_spec *spec = codec->spec; + struct hda_pcm *info = spec->pcm_rec; + + codec->pcm_info = info; + codec->num_pcms = 0; + + info->name = "CA0110 Analog"; + info->stream[SNDRV_PCM_STREAM_PLAYBACK] = ca0110_pcm_analog_playback; + info->stream[SNDRV_PCM_STREAM_PLAYBACK].nid = spec->dacs[0]; + info->stream[SNDRV_PCM_STREAM_PLAYBACK].channels_max = + spec->multiout.num_dacs * 2; + info->stream[SNDRV_PCM_STREAM_CAPTURE] = ca0110_pcm_analog_capture; + info->stream[SNDRV_PCM_STREAM_CAPTURE].substreams = spec->num_inputs; + info->stream[SNDRV_PCM_STREAM_CAPTURE].nid = spec->adcs[0]; + codec->num_pcms++; + + if (!spec->dig_out && !spec->dig_in) + return 0; + + info++; + info->name = "CA0110 Digital"; + info->pcm_type = HDA_PCM_TYPE_SPDIF; + if (spec->dig_out) { + info->stream[SNDRV_PCM_STREAM_PLAYBACK] = + ca0110_pcm_digital_playback; + info->stream[SNDRV_PCM_STREAM_PLAYBACK].nid = spec->dig_out; + } + if (spec->dig_in) { + info->stream[SNDRV_PCM_STREAM_CAPTURE] = + ca0110_pcm_digital_capture; + info->stream[SNDRV_PCM_STREAM_CAPTURE].nid = spec->dig_in; + } + codec->num_pcms++; + + return 0; +} + +static void init_output(struct hda_codec *codec, hda_nid_t pin, hda_nid_t dac) +{ + if (pin) { + snd_hda_codec_write(codec, pin, 0, + AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP); + if (get_wcaps(codec, pin) & AC_WCAP_OUT_AMP) + snd_hda_codec_write(codec, pin, 0, + AC_VERB_SET_AMP_GAIN_MUTE, + AMP_OUT_UNMUTE); + } + if (dac) + snd_hda_codec_write(codec, dac, 0, + AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO); +} + +static void init_input(struct hda_codec *codec, hda_nid_t pin, hda_nid_t adc) +{ + if (pin) { + snd_hda_codec_write(codec, pin, 0, + AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80); + if (get_wcaps(codec, pin) & AC_WCAP_IN_AMP) + snd_hda_codec_write(codec, pin, 0, + AC_VERB_SET_AMP_GAIN_MUTE, + AMP_IN_UNMUTE(0)); + } + if (adc) + snd_hda_codec_write(codec, adc, 0, AC_VERB_SET_AMP_GAIN_MUTE, + AMP_IN_UNMUTE(0)); +} + +static int ca0110_init(struct hda_codec *codec) +{ + struct ca0110_spec *spec = codec->spec; + struct auto_pin_cfg *cfg = &spec->autocfg; + int i; + + for (i = 0; i < spec->multiout.num_dacs; i++) + init_output(codec, spec->out_pins[i], + spec->multiout.dac_nids[i]); + init_output(codec, cfg->hp_pins[0], spec->hp_dac); + init_output(codec, cfg->dig_out_pins[0], spec->dig_out); + + for (i = 0; i < spec->num_inputs; i++) + init_input(codec, spec->input_pins[i], spec->adcs[i]); + init_input(codec, cfg->dig_in_pin, spec->dig_in); + return 0; +} + +static void ca0110_free(struct hda_codec *codec) +{ + kfree(codec->spec); +} + +static struct hda_codec_ops ca0110_patch_ops = { + .build_controls = ca0110_build_controls, + .build_pcms = ca0110_build_pcms, + .init = ca0110_init, + .free = ca0110_free, +}; + + +static void parse_line_outs(struct hda_codec *codec) +{ + struct ca0110_spec *spec = codec->spec; + struct auto_pin_cfg *cfg = &spec->autocfg; + int i, n; + unsigned int def_conf; + hda_nid_t nid; + + n = 0; + for (i = 0; i < cfg->line_outs; i++) { + nid = cfg->line_out_pins[i]; + def_conf = snd_hda_codec_read(codec, nid, 0, + AC_VERB_GET_CONFIG_DEFAULT, 0); + if (!def_conf) + continue; /* invalid pin */ + if (snd_hda_get_connections(codec, nid, &spec->dacs[i], 1) != 1) + continue; + spec->out_pins[n++] = nid; + } + spec->multiout.dac_nids = spec->dacs; + spec->multiout.num_dacs = n; +} + +static void parse_hp_out(struct hda_codec *codec) +{ + struct ca0110_spec *spec = codec->spec; + struct auto_pin_cfg *cfg = &spec->autocfg; + int i; + unsigned int def_conf; + hda_nid_t nid, dac; + + if (!cfg->hp_outs) + return; + nid = cfg->hp_pins[0]; + def_conf = snd_hda_codec_read(codec, nid, 0, + AC_VERB_GET_CONFIG_DEFAULT, 0); + if (!def_conf) { + cfg->hp_outs = 0; + return; + } + if (snd_hda_get_connections(codec, nid, &dac, 1) != 1) + return; + + for (i = 0; i < cfg->line_outs; i++) + if (dac == spec->dacs[i]) + break; + if (i >= cfg->line_outs) { + spec->hp_dac = dac; + spec->multiout.hp_nid = dac; + } +} + +static void parse_input(struct hda_codec *codec) +{ + struct ca0110_spec *spec = codec->spec; + struct auto_pin_cfg *cfg = &spec->autocfg; + hda_nid_t nid, pin; + int n, i, j; + + n = 0; + nid = codec->start_nid; + for (i = 0; i < codec->num_nodes; i++, nid++) { + unsigned int wcaps = get_wcaps(codec, nid); + unsigned int type = (wcaps & AC_WCAP_TYPE) >> + AC_WCAP_TYPE_SHIFT; + if (type != AC_WID_AUD_IN) + continue; + if (snd_hda_get_connections(codec, nid, &pin, 1) != 1) + continue; + if (pin == cfg->dig_in_pin) { + spec->dig_in = nid; + continue; + } + for (j = 0; j < AUTO_PIN_LAST; j++) + if (cfg->input_pins[j] == pin) + break; + if (j >= AUTO_PIN_LAST) + continue; + spec->input_pins[n] = pin; + spec->input_labels[n] = auto_pin_cfg_labels[j]; + spec->adcs[n] = nid; + n++; + } + spec->num_inputs = n; +} + +static void parse_digital(struct hda_codec *codec) +{ + struct ca0110_spec *spec = codec->spec; + struct auto_pin_cfg *cfg = &spec->autocfg; + + if (cfg->dig_outs && + snd_hda_get_connections(codec, cfg->dig_out_pins[0], + &spec->dig_out, 1) == 1) + spec->multiout.dig_out_nid = cfg->dig_out_pins[0]; +} + +static int ca0110_parse_auto_config(struct hda_codec *codec) +{ + struct ca0110_spec *spec = codec->spec; + int err; + + err = snd_hda_parse_pin_def_config(codec, &spec->autocfg, NULL); + if (err < 0) + return err; + + parse_line_outs(codec); + parse_hp_out(codec); + parse_digital(codec); + parse_input(codec); + return 0; +} + + +int patch_ca0110(struct hda_codec *codec) +{ + struct ca0110_spec *spec; + int err; + + spec = kzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + codec->spec = spec; + + codec->bus->needs_damn_long_delay = 1; + + err = ca0110_parse_auto_config(codec); + if (err < 0) + goto error; + + codec->patch_ops = ca0110_patch_ops; + + return 0; + + error: + kfree(codec->spec); + codec->spec = NULL; + return err; +} + + +/* + * patch entries + */ +static struct hda_codec_preset snd_hda_preset_ca0110[] = { + { .id = 0x1102000a, .name = "CA0110-IBG", .patch = patch_ca0110 }, + { .id = 0x1102000b, .name = "CA0110-IBG", .patch = patch_ca0110 }, + { .id = 0x1102000d, .name = "SB0880 X-Fi", .patch = patch_ca0110 }, + {} /* terminator */ +}; + +MODULE_ALIAS("snd-hda-codec-id:1102000a"); +MODULE_ALIAS("snd-hda-codec-id:1102000b"); +MODULE_ALIAS("snd-hda-codec-id:1102000d"); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Creative CA0110-IBG HD-audio codec"); + +static struct hda_codec_preset_list ca0110_list = { + .preset = snd_hda_preset_ca0110, + .owner = THIS_MODULE, +}; + +static int __init patch_ca0110_init(void) +{ + return snd_hda_add_codec_preset(&ca0110_list); +} + +static void __exit patch_ca0110_exit(void) +{ + snd_hda_delete_codec_preset(&ca0110_list); +} + +module_init(patch_ca0110_init) +module_exit(patch_ca0110_exit) -- cgit v0.10.2 From b8b47bfbe4eb1ae0e6891e49c86a5f4fb00413be Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 11 Mar 2009 15:41:51 +0900 Subject: sh: pass along struct pci_channel These patches rework the pci code for the sh architecture. Currently each board implements some kind of ioport to address mapping. Some boards use generic_io_base others try passing addresses as io ports. This is the first set of patches that try to unify the pci code as much as possible to avoid duplicated code. This will in the end lead to fewer lines board specific code and more generic code. This patch makes sure a struct pci_channel pointer is passed along to various pci functions such as pci_read_reg(), pci_write_reg(), pci_fixup_pcic(), sh7751_pcic_init() and sh7780_pcic_init(). Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/fixups-lboxre2.c b/arch/sh/drivers/pci/fixups-lboxre2.c index 1c1d412..a82011d 100644 --- a/arch/sh/drivers/pci/fixups-lboxre2.c +++ b/arch/sh/drivers/pci/fixups-lboxre2.c @@ -9,33 +9,34 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. */ +#include #include "pci-sh4.h" #define PCIMCR_MRSET_OFF 0xBFFFFFFF #define PCIMCR_RFSH_OFF 0xFFFFFFFB -int pci_fixup_pcic(void) +int pci_fixup_pcic(struct pci_channel *chan) { unsigned long bcr1, mcr; bcr1 = ctrl_inl(SH7751_BCR1); bcr1 |= 0x40080000; /* Enable Bit 19 BREQEN, set PCIC to slave */ - pci_write_reg(bcr1, SH4_PCIBCR1); + pci_write_reg(chan, bcr1, SH4_PCIBCR1); /* Enable all interrupts, so we known what to fix */ - pci_write_reg(0x0000c3ff, SH4_PCIINTM); - pci_write_reg(0x0000380f, SH4_PCIAINTM); - pci_write_reg(0xfb900047, SH7751_PCICONF1); - pci_write_reg(0xab000001, SH7751_PCICONF4); + pci_write_reg(chan, 0x0000c3ff, SH4_PCIINTM); + pci_write_reg(chan, 0x0000380f, SH4_PCIAINTM); + pci_write_reg(chan, 0xfb900047, SH7751_PCICONF1); + pci_write_reg(chan, 0xab000001, SH7751_PCICONF4); mcr = ctrl_inl(SH7751_MCR); mcr = (mcr & PCIMCR_MRSET_OFF) & PCIMCR_RFSH_OFF; - pci_write_reg(mcr, SH4_PCIMCR); + pci_write_reg(chan, mcr, SH4_PCIMCR); - pci_write_reg(0x0c000000, SH7751_PCICONF5); - pci_write_reg(0xd0000000, SH7751_PCICONF6); - pci_write_reg(0x0c000000, SH4_PCILAR0); - pci_write_reg(0x00000000, SH4_PCILAR1); + pci_write_reg(chan, 0x0c000000, SH7751_PCICONF5); + pci_write_reg(chan, 0xd0000000, SH7751_PCICONF6); + pci_write_reg(chan, 0x0c000000, SH4_PCILAR0); + pci_write_reg(chan, 0x00000000, SH4_PCILAR1); return 0; } diff --git a/arch/sh/drivers/pci/fixups-r7780rp.c b/arch/sh/drivers/pci/fixups-r7780rp.c index 3e321df..5b25021 100644 --- a/arch/sh/drivers/pci/fixups-r7780rp.c +++ b/arch/sh/drivers/pci/fixups-r7780rp.c @@ -14,32 +14,33 @@ #include "pci-sh4.h" #include -int pci_fixup_pcic(void) +int pci_fixup_pcic(struct pci_channel *chan) { - pci_write_reg(0x000043ff, SH4_PCIINTM); - pci_write_reg(0x0000380f, SH4_PCIAINTM); + pci_write_reg(chan, 0x000043ff, SH4_PCIINTM); + pci_write_reg(chan, 0x0000380f, SH4_PCIAINTM); - pci_write_reg(0xfbb00047, SH7780_PCICMD); - pci_write_reg(0x00000000, SH7780_PCIIBAR); + pci_write_reg(chan, 0xfbb00047, SH7780_PCICMD); + pci_write_reg(chan, 0x00000000, SH7780_PCIIBAR); - pci_write_reg(0x00011912, SH7780_PCISVID); - pci_write_reg(0x08000000, SH7780_PCICSCR0); - pci_write_reg(0x0000001b, SH7780_PCICSAR0); - pci_write_reg(0xfd000000, SH7780_PCICSCR1); - pci_write_reg(0x0000000f, SH7780_PCICSAR1); + pci_write_reg(chan, 0x00011912, SH7780_PCISVID); + pci_write_reg(chan, 0x08000000, SH7780_PCICSCR0); + pci_write_reg(chan, 0x0000001b, SH7780_PCICSAR0); + pci_write_reg(chan, 0xfd000000, SH7780_PCICSCR1); + pci_write_reg(chan, 0x0000000f, SH7780_PCICSAR1); - pci_write_reg(0xfd000000, SH7780_PCIMBR0); - pci_write_reg(0x00fc0000, SH7780_PCIMBMR0); + pci_write_reg(chan, 0xfd000000, SH7780_PCIMBR0); + pci_write_reg(chan, 0x00fc0000, SH7780_PCIMBMR0); #ifdef CONFIG_32BIT - pci_write_reg(0xc0000000, SH7780_PCIMBR2); - pci_write_reg(0x20000000 - SH7780_PCI_IO_SIZE, SH7780_PCIMBMR2); + pci_write_reg(chan, 0xc0000000, SH7780_PCIMBR2); + pci_write_reg(chan, 0x20000000 - SH7780_PCI_IO_SIZE, SH7780_PCIMBMR2); #endif /* Set IOBR for windows containing area specified in pci.h */ - pci_write_reg((PCIBIOS_MIN_IO & ~(SH7780_PCI_IO_SIZE - 1)), + pci_write_reg(chan, (PCIBIOS_MIN_IO & ~(SH7780_PCI_IO_SIZE - 1)), SH7780_PCIIOBR); - pci_write_reg(((SH7780_PCI_IO_SIZE-1) & (7<<18)), SH7780_PCIIOBMR); + pci_write_reg(chan, ((SH7780_PCI_IO_SIZE-1) & (7<<18)), + SH7780_PCIIOBMR); return 0; } diff --git a/arch/sh/drivers/pci/fixups-rts7751r2d.c b/arch/sh/drivers/pci/fixups-rts7751r2d.c index 904bce8..3885233 100644 --- a/arch/sh/drivers/pci/fixups-rts7751r2d.c +++ b/arch/sh/drivers/pci/fixups-rts7751r2d.c @@ -10,34 +10,35 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. */ +#include #include "pci-sh4.h" #define PCIMCR_MRSET_OFF 0xBFFFFFFF #define PCIMCR_RFSH_OFF 0xFFFFFFFB -int pci_fixup_pcic(void) +int pci_fixup_pcic(struct pci_channel *chan) { unsigned long bcr1, mcr; bcr1 = ctrl_inl(SH7751_BCR1); bcr1 |= 0x40080000; /* Enable Bit 19 BREQEN, set PCIC to slave */ - pci_write_reg(bcr1, SH4_PCIBCR1); + pci_write_reg(chan, bcr1, SH4_PCIBCR1); /* Enable all interrupts, so we known what to fix */ - pci_write_reg(0x0000c3ff, SH4_PCIINTM); - pci_write_reg(0x0000380f, SH4_PCIAINTM); + pci_write_reg(chan, 0x0000c3ff, SH4_PCIINTM); + pci_write_reg(chan, 0x0000380f, SH4_PCIAINTM); - pci_write_reg(0xfb900047, SH7751_PCICONF1); - pci_write_reg(0xab000001, SH7751_PCICONF4); + pci_write_reg(chan, 0xfb900047, SH7751_PCICONF1); + pci_write_reg(chan, 0xab000001, SH7751_PCICONF4); mcr = ctrl_inl(SH7751_MCR); mcr = (mcr & PCIMCR_MRSET_OFF) & PCIMCR_RFSH_OFF; - pci_write_reg(mcr, SH4_PCIMCR); + pci_write_reg(chan, mcr, SH4_PCIMCR); - pci_write_reg(0x0c000000, SH7751_PCICONF5); - pci_write_reg(0xd0000000, SH7751_PCICONF6); - pci_write_reg(0x0c000000, SH4_PCILAR0); - pci_write_reg(0x00000000, SH4_PCILAR1); + pci_write_reg(chan, 0x0c000000, SH7751_PCICONF5); + pci_write_reg(chan, 0xd0000000, SH7751_PCICONF6); + pci_write_reg(chan, 0x0c000000, SH4_PCILAR0); + pci_write_reg(chan, 0x00000000, SH4_PCILAR1); return 0; } diff --git a/arch/sh/drivers/pci/fixups-sdk7780.c b/arch/sh/drivers/pci/fixups-sdk7780.c index 2f88630..3f6754a 100644 --- a/arch/sh/drivers/pci/fixups-sdk7780.c +++ b/arch/sh/drivers/pci/fixups-sdk7780.c @@ -14,46 +14,48 @@ #include "pci-sh4.h" #include -int pci_fixup_pcic(void) +int pci_fixup_pcic(struct pci_channel *chan) { ctrl_outl(0x00000001, SH7780_PCI_VCR2); /* Enable all interrupts, so we know what to fix */ - pci_write_reg(0x0000C3FF, SH7780_PCIIMR); - pci_write_reg(0x0000380F, SH7780_PCIAINTM); + pci_write_reg(chan, 0x0000C3FF, SH7780_PCIIMR); + pci_write_reg(chan, 0x0000380F, SH7780_PCIAINTM); /* Set up standard PCI config registers */ - pci_write_reg(0xFB00, SH7780_PCISTATUS); - pci_write_reg(0x0047, SH7780_PCICMD); - pci_write_reg(0x00, SH7780_PCIPIF); - pci_write_reg(0x00, SH7780_PCISUB); - pci_write_reg(0x06, SH7780_PCIBCC); - pci_write_reg(0x1912, SH7780_PCISVID); - pci_write_reg(0x0001, SH7780_PCISID); + pci_write_reg(chan, 0xFB00, SH7780_PCISTATUS); + pci_write_reg(chan, 0x0047, SH7780_PCICMD); + pci_write_reg(chan, 0x00, SH7780_PCIPIF); + pci_write_reg(chan, 0x00, SH7780_PCISUB); + pci_write_reg(chan, 0x06, SH7780_PCIBCC); + pci_write_reg(chan, 0x1912, SH7780_PCISVID); + pci_write_reg(chan, 0x0001, SH7780_PCISID); - pci_write_reg(0x08000000, SH7780_PCIMBAR0); /* PCI */ - pci_write_reg(0x08000000, SH7780_PCILAR0); /* SHwy */ - pci_write_reg(0x07F00001, SH7780_PCILSR); /* size 128M w/ MBAR */ + pci_write_reg(chan, 0x08000000, SH7780_PCIMBAR0); /* PCI */ + pci_write_reg(chan, 0x08000000, SH7780_PCILAR0); /* SHwy */ + pci_write_reg(chan, 0x07F00001, SH7780_PCILSR); /* size 128M w/ MBAR */ - pci_write_reg(0x00000000, SH7780_PCIMBAR1); - pci_write_reg(0x00000000, SH7780_PCILAR1); - pci_write_reg(0x00000000, SH7780_PCILSR1); + pci_write_reg(chan, 0x00000000, SH7780_PCIMBAR1); + pci_write_reg(chan, 0x00000000, SH7780_PCILAR1); + pci_write_reg(chan, 0x00000000, SH7780_PCILSR1); - pci_write_reg(0xAB000801, SH7780_PCIIBAR); + pci_write_reg(chan, 0xAB000801, SH7780_PCIIBAR); /* * Set the MBR so PCI address is one-to-one with window, * meaning all calls go straight through... use ifdef to * catch erroneous assumption. */ - pci_write_reg(0xFD000000 , SH7780_PCIMBR0); - pci_write_reg(0x00FC0000 , SH7780_PCIMBMR0); /* 16M */ + pci_write_reg(chan, 0xFD000000 , SH7780_PCIMBR0); + pci_write_reg(chan, 0x00FC0000 , SH7780_PCIMBMR0); /* 16M */ /* Set IOBR for window containing area specified in pci.h */ - pci_write_reg(PCIBIOS_MIN_IO & ~(SH7780_PCI_IO_SIZE-1), SH7780_PCIIOBR); - pci_write_reg((SH7780_PCI_IO_SIZE-1) & (7 << 18), SH7780_PCIIOBMR); + pci_write_reg(chan, PCIBIOS_MIN_IO & ~(SH7780_PCI_IO_SIZE-1), + SH7780_PCIIOBR); + pci_write_reg(chan, (SH7780_PCI_IO_SIZE-1) & (7 << 18), + SH7780_PCIIOBMR); - pci_write_reg(0xA5000C01, SH7780_PCICR); + pci_write_reg(chan, 0xA5000C01, SH7780_PCICR); return 0; } diff --git a/arch/sh/drivers/pci/fixups-se7780.c b/arch/sh/drivers/pci/fixups-se7780.c index 880cea1..b8e735e 100644 --- a/arch/sh/drivers/pci/fixups-se7780.c +++ b/arch/sh/drivers/pci/fixups-se7780.c @@ -15,13 +15,13 @@ #include "pci-sh4.h" #include -int pci_fixup_pcic(void) +int pci_fixup_pcic(struct pci_channel *chan) { ctrl_outl(0x00000001, SH7780_PCI_VCR2); /* Enable all interrupts, so we know what to fix */ - pci_write_reg(0x0000C3FF, SH7780_PCIIMR); - pci_write_reg(0x0000380F, SH7780_PCIAINTM); + pci_write_reg(chan, 0x0000C3FF, SH7780_PCIIMR); + pci_write_reg(chan, 0x0000380F, SH7780_PCIAINTM); /* Set up standard PCI config registers */ ctrl_outw(0xFB00, PCI_REG(SH7780_PCISTATUS)); @@ -32,29 +32,31 @@ int pci_fixup_pcic(void) ctrl_outw(0x1912, PCI_REG(SH7780_PCISVID)); ctrl_outw(0x0001, PCI_REG(SH7780_PCISID)); - pci_write_reg(0x08000000, SH7780_PCIMBAR0); /* PCI */ - pci_write_reg(0x08000000, SH7780_PCILAR0); /* SHwy */ - pci_write_reg(0x07F00001, SH7780_PCILSR); /* size 128M w/ MBAR */ + pci_write_reg(chan, 0x08000000, SH7780_PCIMBAR0); /* PCI */ + pci_write_reg(chan, 0x08000000, SH7780_PCILAR0); /* SHwy */ + pci_write_reg(chan, 0x07F00001, SH7780_PCILSR); /* size 128M w/ MBAR */ - pci_write_reg(0x00000000, SH7780_PCIMBAR1); - pci_write_reg(0x00000000, SH7780_PCILAR1); - pci_write_reg(0x00000000, SH7780_PCILSR1); + pci_write_reg(chan, 0x00000000, SH7780_PCIMBAR1); + pci_write_reg(chan, 0x00000000, SH7780_PCILAR1); + pci_write_reg(chan, 0x00000000, SH7780_PCILSR1); - pci_write_reg(0xAB000801, SH7780_PCIIBAR); + pci_write_reg(chan, 0xAB000801, SH7780_PCIIBAR); /* * Set the MBR so PCI address is one-to-one with window, * meaning all calls go straight through... use ifdef to * catch erroneous assumption. */ - pci_write_reg(0xFD000000 , SH7780_PCIMBR0); - pci_write_reg(0x00FC0000 , SH7780_PCIMBMR0); /* 16M */ + pci_write_reg(chan, 0xFD000000 , SH7780_PCIMBR0); + pci_write_reg(chan, 0x00FC0000 , SH7780_PCIMBMR0); /* 16M */ /* Set IOBR for window containing area specified in pci.h */ - pci_write_reg(PCIBIOS_MIN_IO & ~(SH7780_PCI_IO_SIZE-1), SH7780_PCIIOBR); - pci_write_reg((SH7780_PCI_IO_SIZE-1) & (7 << 18), SH7780_PCIIOBMR); + pci_write_reg(chan, PCIBIOS_MIN_IO & ~(SH7780_PCI_IO_SIZE-1), + SH7780_PCIIOBR); + pci_write_reg(chan, (SH7780_PCI_IO_SIZE-1) & (7 << 18), + SH7780_PCIIOBMR); - pci_write_reg(0xA5000C01, SH7780_PCICR); + pci_write_reg(chan, 0xA5000C01, SH7780_PCICR); return 0; } diff --git a/arch/sh/drivers/pci/ops-landisk.c b/arch/sh/drivers/pci/ops-landisk.c index bff09ec..343c072 100644 --- a/arch/sh/drivers/pci/ops-landisk.c +++ b/arch/sh/drivers/pci/ops-landisk.c @@ -45,7 +45,7 @@ static struct sh4_pci_address_map sh7751_pci_map = { int __init pcibios_init_platform(void) { - return sh7751_pcic_init(&sh7751_pci_map); + return sh7751_pcic_init(&board_pci_channels[0], &sh7751_pci_map); } int pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin) diff --git a/arch/sh/drivers/pci/ops-lboxre2.c b/arch/sh/drivers/pci/ops-lboxre2.c index 86c0b6f..8bff32a 100644 --- a/arch/sh/drivers/pci/ops-lboxre2.c +++ b/arch/sh/drivers/pci/ops-lboxre2.c @@ -59,5 +59,5 @@ static struct sh4_pci_address_map sh7751_pci_map = { int __init pcibios_init_platform(void) { - return sh7751_pcic_init(&sh7751_pci_map); + return sh7751_pcic_init(&board_pci_channels[0], &sh7751_pci_map); } diff --git a/arch/sh/drivers/pci/ops-r7780rp.c b/arch/sh/drivers/pci/ops-r7780rp.c index 8555238..bf32ee8 100644 --- a/arch/sh/drivers/pci/ops-r7780rp.c +++ b/arch/sh/drivers/pci/ops-r7780rp.c @@ -64,5 +64,5 @@ static struct sh4_pci_address_map sh7780_pci_map = { int __init pcibios_init_platform(void) { - return sh7780_pcic_init(&sh7780_pci_map); + return sh7780_pcic_init(&board_pci_channels[0], &sh7780_pci_map); } diff --git a/arch/sh/drivers/pci/ops-rts7751r2d.c b/arch/sh/drivers/pci/ops-rts7751r2d.c index d6ca74b..e4208a6 100644 --- a/arch/sh/drivers/pci/ops-rts7751r2d.c +++ b/arch/sh/drivers/pci/ops-rts7751r2d.c @@ -69,6 +69,6 @@ static struct sh4_pci_address_map sh7751_pci_map = { int __init pcibios_init_platform(void) { __set_io_port_base(SH7751_PCI_IO_BASE); - return sh7751_pcic_init(&sh7751_pci_map); + return sh7751_pcic_init(&board_pci_channels[0], &sh7751_pci_map); } diff --git a/arch/sh/drivers/pci/ops-sdk7780.c b/arch/sh/drivers/pci/ops-sdk7780.c index 4dcc641..21d59d4 100644 --- a/arch/sh/drivers/pci/ops-sdk7780.c +++ b/arch/sh/drivers/pci/ops-sdk7780.c @@ -69,5 +69,5 @@ static struct sh4_pci_address_map sdk7780_pci_map = { int __init pcibios_init_platform(void) { printk(KERN_INFO "SH7780 PCI: Finished initializing PCI controller\n"); - return sh7780_pcic_init(&sdk7780_pci_map); + return sh7780_pcic_init(&board_pci_channels[0], &sdk7780_pci_map); } diff --git a/arch/sh/drivers/pci/ops-se7780.c b/arch/sh/drivers/pci/ops-se7780.c index 3145c62..78a6f2b 100644 --- a/arch/sh/drivers/pci/ops-se7780.c +++ b/arch/sh/drivers/pci/ops-se7780.c @@ -92,5 +92,5 @@ int __init pcibios_init_platform(void) ctrl_outw(0x0013, FPGA_PCI_INTSEL1); ctrl_outw(0xE402, FPGA_PCI_INTSEL2); - return sh7780_pcic_init(&se7780_pci_map); + return sh7780_pcic_init(&board_pci_channels[0], &se7780_pci_map); } diff --git a/arch/sh/drivers/pci/ops-sh4.c b/arch/sh/drivers/pci/ops-sh4.c index 710a3b0..92d27f7 100644 --- a/arch/sh/drivers/pci/ops-sh4.c +++ b/arch/sh/drivers/pci/ops-sh4.c @@ -34,8 +34,8 @@ static int sh4_pci_read(struct pci_bus *bus, unsigned int devfn, * so we must do byte alignment by hand */ spin_lock_irqsave(&sh4_pci_lock, flags); - pci_write_reg(CONFIG_CMD(bus, devfn, where), SH4_PCIPAR); - data = pci_read_reg(SH4_PCIPDR); + pci_write_reg(NULL, CONFIG_CMD(bus, devfn, where), SH4_PCIPAR); + data = pci_read_reg(NULL, SH4_PCIPDR); spin_unlock_irqrestore(&sh4_pci_lock, flags); switch (size) { @@ -68,8 +68,8 @@ static int sh4_pci_write(struct pci_bus *bus, unsigned int devfn, u32 data; spin_lock_irqsave(&sh4_pci_lock, flags); - pci_write_reg(CONFIG_CMD(bus, devfn, where), SH4_PCIPAR); - data = pci_read_reg(SH4_PCIPDR); + pci_write_reg(NULL, CONFIG_CMD(bus, devfn, where), SH4_PCIPAR); + data = pci_read_reg(NULL, SH4_PCIPDR); spin_unlock_irqrestore(&sh4_pci_lock, flags); switch (size) { @@ -90,7 +90,7 @@ static int sh4_pci_write(struct pci_bus *bus, unsigned int devfn, return PCIBIOS_FUNC_NOT_SUPPORTED; } - pci_write_reg(data, SH4_PCIPDR); + pci_write_reg(NULL, data, SH4_PCIPDR); return PCIBIOS_SUCCESSFUL; } @@ -106,25 +106,25 @@ struct pci_ops sh4_pci_ops = { */ static unsigned int pci_probe = PCI_PROBE_CONF1; -int __init sh4_pci_check_direct(void) +int __init sh4_pci_check_direct(struct pci_channel *chan) { /* * Check if configuration works. */ if (pci_probe & PCI_PROBE_CONF1) { - unsigned int tmp = pci_read_reg(SH4_PCIPAR); + unsigned int tmp = pci_read_reg(chan, SH4_PCIPAR); - pci_write_reg(P1SEG, SH4_PCIPAR); + pci_write_reg(chan, P1SEG, SH4_PCIPAR); - if (pci_read_reg(SH4_PCIPAR) == P1SEG) { - pci_write_reg(tmp, SH4_PCIPAR); + if (pci_read_reg(chan, SH4_PCIPAR) == P1SEG) { + pci_write_reg(chan, tmp, SH4_PCIPAR); printk(KERN_INFO "PCI: Using configuration type 1\n"); request_region(PCI_REG(SH4_PCIPAR), 8, "PCI conf1"); return 0; } - pci_write_reg(tmp, SH4_PCIPAR); + pci_write_reg(chan, tmp, SH4_PCIPAR); } pr_debug("PCI: pci_check_direct failed\n"); @@ -163,7 +163,7 @@ char * __devinit pcibios_setup(char *str) return str; } -int __attribute__((weak)) pci_fixup_pcic(void) +int __attribute__((weak)) pci_fixup_pcic(struct pci_channel *chan) { /* Nothing to do. */ return 0; diff --git a/arch/sh/drivers/pci/ops-snapgear.c b/arch/sh/drivers/pci/ops-snapgear.c index 53dd893..cba8015 100644 --- a/arch/sh/drivers/pci/ops-snapgear.c +++ b/arch/sh/drivers/pci/ops-snapgear.c @@ -66,7 +66,7 @@ static struct sh4_pci_address_map sh7751_pci_map = { */ int __init pcibios_init_platform(void) { - return sh7751_pcic_init(&sh7751_pci_map); + return sh7751_pcic_init(&board_pci_channels[0], &sh7751_pci_map); } int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin) diff --git a/arch/sh/drivers/pci/ops-titan.c b/arch/sh/drivers/pci/ops-titan.c index a8f7801..69fcc5c 100644 --- a/arch/sh/drivers/pci/ops-titan.c +++ b/arch/sh/drivers/pci/ops-titan.c @@ -73,5 +73,5 @@ static struct sh4_pci_address_map sh7751_pci_map = { int __init pcibios_init_platform(void) { - return sh7751_pcic_init(&sh7751_pci_map); + return sh7751_pcic_init(&board_pci_channels[0], &sh7751_pci_map); } diff --git a/arch/sh/drivers/pci/pci-sh4.h b/arch/sh/drivers/pci/pci-sh4.h index a83dcf7..62ba350 100644 --- a/arch/sh/drivers/pci/pci-sh4.h +++ b/arch/sh/drivers/pci/pci-sh4.h @@ -154,8 +154,8 @@ /* arch/sh/kernel/drivers/pci/ops-sh4.c */ extern struct pci_ops sh4_pci_ops; -int sh4_pci_check_direct(void); -int pci_fixup_pcic(void); +int sh4_pci_check_direct(struct pci_channel *chan); +int pci_fixup_pcic(struct pci_channel *chan); struct sh4_pci_address_space { unsigned long base; @@ -168,13 +168,16 @@ struct sh4_pci_address_map { unsigned long flags; }; -static inline void pci_write_reg(unsigned long val, unsigned long reg) +static inline void pci_write_reg(struct pci_channel *chan, + unsigned long val, unsigned long reg) { ctrl_outl(val, PCI_REG(reg)); } -static inline unsigned long pci_read_reg(unsigned long reg) +static inline unsigned long pci_read_reg(struct pci_channel *chan, + unsigned long reg) { return ctrl_inl(PCI_REG(reg)); } + #endif /* __PCI_SH4_H */ diff --git a/arch/sh/drivers/pci/pci-sh7751.c b/arch/sh/drivers/pci/pci-sh7751.c index 3065eb184..9c2c014 100644 --- a/arch/sh/drivers/pci/pci-sh7751.c +++ b/arch/sh/drivers/pci/pci-sh7751.c @@ -40,21 +40,22 @@ static int __init sh7751_pci_init(void) pr_debug("PCI: Starting intialization.\n"); /* check for SH7751/SH7751R hardware */ - id = pci_read_reg(SH7751_PCICONF0); + id = pci_read_reg(NULL, SH7751_PCICONF0); if (id != ((SH7751_DEVICE_ID << 16) | SH7751_VENDOR_ID) && id != ((SH7751R_DEVICE_ID << 16) | SH7751_VENDOR_ID)) { pr_debug("PCI: This is not an SH7751(R) (%x)\n", id); return -ENODEV; } - if ((ret = sh4_pci_check_direct()) != 0) + if ((ret = sh4_pci_check_direct(NULL)) != 0) return ret; return pcibios_init_platform(); } subsys_initcall(sh7751_pci_init); -static int __init __area_sdram_check(unsigned int area) +static int __init __area_sdram_check(struct pci_channel *chan, + unsigned int area) { u32 word; @@ -65,7 +66,7 @@ static int __init __area_sdram_check(unsigned int area) area, word); return 0; } - pci_write_reg(word, SH4_PCIBCR1); + pci_write_reg(chan, word, SH4_PCIBCR1); word = (u16)ctrl_inw(SH7751_BCR2); /* check BCR2 for 32bit SDRAM interface*/ @@ -74,12 +75,13 @@ static int __init __area_sdram_check(unsigned int area) area, word); return 0; } - pci_write_reg(word, SH4_PCIBCR2); + pci_write_reg(chan, word, SH4_PCIBCR2); return 1; } -int __init sh7751_pcic_init(struct sh4_pci_address_map *map) +int __init sh7751_pcic_init(struct pci_channel *chan, + struct sh4_pci_address_map *map) { u32 reg; u32 word; @@ -90,10 +92,10 @@ int __init sh7751_pcic_init(struct sh4_pci_address_map *map) ctrl_outl(reg, SH7751_BCR1); /* Turn the clocks back on (not done in reset)*/ - pci_write_reg(0, SH4_PCICLKR); + pci_write_reg(chan, 0, SH4_PCICLKR); /* Clear Powerdown IRQ's (not done in reset) */ word = SH4_PCIPINT_D3 | SH4_PCIPINT_D0; - pci_write_reg(word, SH4_PCIPINT); + pci_write_reg(chan, word, SH4_PCIPINT); /* * This code is unused for some boards as it is done in the @@ -103,11 +105,11 @@ int __init sh7751_pcic_init(struct sh4_pci_address_map *map) if (!(map->flags & SH4_PCIC_NO_RESET)) { /* toggle PCI reset pin */ word = SH4_PCICR_PREFIX | SH4_PCICR_PRST; - pci_write_reg(word, SH4_PCICR); + pci_write_reg(chan, word, SH4_PCICR); /* Wait for a long time... not 1 sec. but long enough */ mdelay(100); word = SH4_PCICR_PREFIX; - pci_write_reg(word, SH4_PCICR); + pci_write_reg(chan, word, SH4_PCICR); } /* set the command/status bits to: @@ -116,11 +118,11 @@ int __init sh7751_pcic_init(struct sh4_pci_address_map *map) */ word = SH7751_PCICONF1_WCC | SH7751_PCICONF1_PER | SH7751_PCICONF1_BUM | SH7751_PCICONF1_MES; - pci_write_reg(word, SH7751_PCICONF1); + pci_write_reg(chan, word, SH7751_PCICONF1); /* define this host as the host bridge */ word = PCI_BASE_CLASS_BRIDGE << 24; - pci_write_reg(word, SH7751_PCICONF2); + pci_write_reg(chan, word, SH7751_PCICONF2); /* Set IO and Mem windows to local address * Make PCI and local address the same for easy 1 to 1 mapping @@ -128,24 +130,24 @@ int __init sh7751_pcic_init(struct sh4_pci_address_map *map) * Window1 = map->window1.size @ cached area base = SDRAM */ word = map->window0.size - 1; - pci_write_reg(word, SH4_PCILSR0); + pci_write_reg(chan, word, SH4_PCILSR0); word = map->window1.size - 1; - pci_write_reg(word, SH4_PCILSR1); + pci_write_reg(chan, word, SH4_PCILSR1); /* Set the values on window 0 PCI config registers */ word = P2SEGADDR(map->window0.base); - pci_write_reg(word, SH4_PCILAR0); - pci_write_reg(word, SH7751_PCICONF5); + pci_write_reg(chan, word, SH4_PCILAR0); + pci_write_reg(chan, word, SH7751_PCICONF5); /* Set the values on window 1 PCI config registers */ word = PHYSADDR(map->window1.base); - pci_write_reg(word, SH4_PCILAR1); - pci_write_reg(word, SH7751_PCICONF6); + pci_write_reg(chan, word, SH4_PCILAR1); + pci_write_reg(chan, word, SH7751_PCICONF6); /* Set the local 16MB PCI memory space window to * the lowest PCI mapped address */ word = PCIBIOS_MIN_MEM & SH4_PCIMBR_MASK; pr_debug("PCI: Setting upper bits of Memory window to 0x%x\n", word); - pci_write_reg(word , SH4_PCIMBR); + pci_write_reg(chan, word , SH4_PCIMBR); /* Map IO space into PCI IO window * The IO window is 64K-PCIBIOS_MIN_IO in size @@ -160,19 +162,19 @@ int __init sh7751_pcic_init(struct sh4_pci_address_map *map) * correctly */ word = PCIBIOS_MIN_IO & SH4_PCIIOBR_MASK; pr_debug("PCI: Setting upper bits of IO window to 0x%x\n", word); - pci_write_reg(word, SH4_PCIIOBR); + pci_write_reg(chan, word, SH4_PCIIOBR); /* Set PCI WCRx, BCRx's, copy from BSC locations */ /* check BCR for SDRAM in specified area */ switch (map->window0.base) { - case SH7751_CS0_BASE_ADDR: word = __area_sdram_check(0); break; - case SH7751_CS1_BASE_ADDR: word = __area_sdram_check(1); break; - case SH7751_CS2_BASE_ADDR: word = __area_sdram_check(2); break; - case SH7751_CS3_BASE_ADDR: word = __area_sdram_check(3); break; - case SH7751_CS4_BASE_ADDR: word = __area_sdram_check(4); break; - case SH7751_CS5_BASE_ADDR: word = __area_sdram_check(5); break; - case SH7751_CS6_BASE_ADDR: word = __area_sdram_check(6); break; + case SH7751_CS0_BASE_ADDR: word = __area_sdram_check(chan, 0); break; + case SH7751_CS1_BASE_ADDR: word = __area_sdram_check(chan, 1); break; + case SH7751_CS2_BASE_ADDR: word = __area_sdram_check(chan, 2); break; + case SH7751_CS3_BASE_ADDR: word = __area_sdram_check(chan, 3); break; + case SH7751_CS4_BASE_ADDR: word = __area_sdram_check(chan, 4); break; + case SH7751_CS5_BASE_ADDR: word = __area_sdram_check(chan, 5); break; + case SH7751_CS6_BASE_ADDR: word = __area_sdram_check(chan, 6); break; } if (!word) @@ -180,25 +182,25 @@ int __init sh7751_pcic_init(struct sh4_pci_address_map *map) /* configure the wait control registers */ word = ctrl_inl(SH7751_WCR1); - pci_write_reg(word, SH4_PCIWCR1); + pci_write_reg(chan, word, SH4_PCIWCR1); word = ctrl_inl(SH7751_WCR2); - pci_write_reg(word, SH4_PCIWCR2); + pci_write_reg(chan, word, SH4_PCIWCR2); word = ctrl_inl(SH7751_WCR3); - pci_write_reg(word, SH4_PCIWCR3); + pci_write_reg(chan, word, SH4_PCIWCR3); word = ctrl_inl(SH7751_MCR); - pci_write_reg(word, SH4_PCIMCR); + pci_write_reg(chan, word, SH4_PCIMCR); /* NOTE: I'm ignoring the PCI error IRQs for now.. * TODO: add support for the internal error interrupts and * DMA interrupts... */ - pci_fixup_pcic(); + pci_fixup_pcic(chan); /* SH7751 init done, set central function init complete */ /* use round robin mode to stop a device starving/overruning */ word = SH4_PCICR_PREFIX | SH4_PCICR_CFIN | SH4_PCICR_ARBM; - pci_write_reg(word, SH4_PCICR); + pci_write_reg(chan, word, SH4_PCICR); return 1; } diff --git a/arch/sh/drivers/pci/pci-sh7751.h b/arch/sh/drivers/pci/pci-sh7751.h index 68e3cb5..6f101e5 100644 --- a/arch/sh/drivers/pci/pci-sh7751.h +++ b/arch/sh/drivers/pci/pci-sh7751.h @@ -130,6 +130,7 @@ struct sh4_pci_address_map; /* arch/sh/drivers/pci/pci-sh7751.c */ -int sh7751_pcic_init(struct sh4_pci_address_map *map); +int sh7751_pcic_init(struct pci_channel *chan, + struct sh4_pci_address_map *map); #endif /* _PCI_SH7751_H_ */ diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c index bae6a2c..56f673f 100644 --- a/arch/sh/drivers/pci/pci-sh7780.c +++ b/arch/sh/drivers/pci/pci-sh7780.c @@ -55,7 +55,7 @@ static int __init sh7780_pci_init(void) ctrl_outl(0x00000001, SH7780_PCI_VCR2); /* Enable PCIC */ /* check for SH7780/SH7780R hardware */ - id = pci_read_reg(SH7780_PCIVID); + id = pci_read_reg(NULL, SH7780_PCIVID); if ((id & 0xffff) == SH7780_VENDOR_ID) { switch ((id >> 16) & 0xffff) { case SH7763_DEVICE_ID: @@ -82,14 +82,15 @@ static int __init sh7780_pci_init(void) ctrl_outl(0x33333333, INTC_INTPRI); } - if ((ret = sh4_pci_check_direct()) != 0) + if ((ret = sh4_pci_check_direct(NULL)) != 0) return ret; return pcibios_init_platform(); } core_initcall(sh7780_pci_init); -int __init sh7780_pcic_init(struct sh4_pci_address_map *map) +int __init sh7780_pcic_init(struct pci_channel *chan, + struct sh4_pci_address_map *map) { u32 word; @@ -101,34 +102,34 @@ int __init sh7780_pcic_init(struct sh4_pci_address_map *map) if (!(map->flags & SH4_PCIC_NO_RESET)) { /* toggle PCI reset pin */ word = SH4_PCICR_PREFIX | SH4_PCICR_PRST; - pci_write_reg(word, SH4_PCICR); + pci_write_reg(chan, word, SH4_PCICR); /* Wait for a long time... not 1 sec. but long enough */ mdelay(100); word = SH4_PCICR_PREFIX; - pci_write_reg(word, SH4_PCICR); + pci_write_reg(chan, word, SH4_PCICR); } /* set the command/status bits to: * Wait Cycle Control + Parity Enable + Bus Master + * Mem space enable */ - pci_write_reg(0x00000046, SH7780_PCICMD); + pci_write_reg(chan, 0x00000046, SH7780_PCICMD); /* define this host as the host bridge */ word = PCI_BASE_CLASS_BRIDGE << 24; - pci_write_reg(word, SH7780_PCIRID); + pci_write_reg(chan, word, SH7780_PCIRID); /* Set IO and Mem windows to local address * Make PCI and local address the same for easy 1 to 1 mapping */ - pci_write_reg(map->window0.size - 0xfffff, SH4_PCILSR0); - pci_write_reg(map->window1.size - 0xfffff, SH4_PCILSR1); + pci_write_reg(chan, map->window0.size - 0xfffff, SH4_PCILSR0); + pci_write_reg(chan, map->window1.size - 0xfffff, SH4_PCILSR1); /* Set the values on window 0 PCI config registers */ - pci_write_reg(map->window0.base, SH4_PCILAR0); - pci_write_reg(map->window0.base, SH7780_PCIMBAR0); + pci_write_reg(chan, map->window0.base, SH4_PCILAR0); + pci_write_reg(chan, map->window0.base, SH7780_PCIMBAR0); /* Set the values on window 1 PCI config registers */ - pci_write_reg(map->window1.base, SH4_PCILAR1); - pci_write_reg(map->window1.base, SH7780_PCIMBAR1); + pci_write_reg(chan, map->window1.base, SH4_PCILAR1); + pci_write_reg(chan, map->window1.base, SH7780_PCIMBAR1); /* Map IO space into PCI IO window * The IO window is 64K-PCIBIOS_MIN_IO in size @@ -145,12 +146,12 @@ int __init sh7780_pcic_init(struct sh4_pci_address_map *map) */ /* Apply any last-minute PCIC fixups */ - pci_fixup_pcic(); + pci_fixup_pcic(chan); /* SH7780 init done, set central function init complete */ /* use round robin mode to stop a device starving/overruning */ word = SH4_PCICR_PREFIX | SH4_PCICR_CFIN | SH4_PCICR_FTO; - pci_write_reg(word, SH4_PCICR); + pci_write_reg(chan, word, SH4_PCICR); return 1; } diff --git a/arch/sh/drivers/pci/pci-sh7780.h b/arch/sh/drivers/pci/pci-sh7780.h index 93adc71..d349611 100644 --- a/arch/sh/drivers/pci/pci-sh7780.h +++ b/arch/sh/drivers/pci/pci-sh7780.h @@ -109,6 +109,7 @@ struct sh4_pci_address_map; /* arch/sh/drivers/pci/pci-sh7780.c */ -int sh7780_pcic_init(struct sh4_pci_address_map *map); +int sh7780_pcic_init(struct pci_channel *chan, + struct sh4_pci_address_map *map); #endif /* _PCI_SH7780_H_ */ -- cgit v0.10.2 From d0e3db40e2a1352aa2a2f425a7d4631bddc03d51 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 11 Mar 2009 15:46:14 +0900 Subject: sh: add init member to pci_channel data This patch adds an init callback to struct pci_channel and makes sure it is initialized properly. Code is added to call this init function from pcibios_init(). Return values are adjusted and a warning is is printed if init fails. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt diff --git a/arch/sh/boards/mach-dreamcast/setup.c b/arch/sh/boards/mach-dreamcast/setup.c index d1bee48..ebe9922 100644 --- a/arch/sh/boards/mach-dreamcast/setup.c +++ b/arch/sh/boards/mach-dreamcast/setup.c @@ -30,7 +30,6 @@ extern struct irq_chip systemasic_int; extern void aica_time_init(void); -extern int gapspci_init(void); extern int systemasic_irq_demux(int); static void __init dreamcast_setup(char **cmdline_p) @@ -51,11 +50,6 @@ static void __init dreamcast_setup(char **cmdline_p) handle_level_irq); board_time_init = aica_time_init; - -#ifdef CONFIG_PCI - if (gapspci_init() < 0) - printk(KERN_WARNING "GAPSPCI was not detected.\n"); -#endif } static struct sh_machine_vector mv_dreamcast __initmv = { diff --git a/arch/sh/drivers/pci/ops-cayman.c b/arch/sh/drivers/pci/ops-cayman.c index 38ef762..f4a5e14 100644 --- a/arch/sh/drivers/pci/ops-cayman.c +++ b/arch/sh/drivers/pci/ops-cayman.c @@ -77,7 +77,7 @@ int __init pcibios_map_platform_irq(struct pci_dev *dev, u8 slot, u8 pin) } struct pci_channel board_pci_channels[] = { - { &sh5_pci_ops, NULL, NULL, 0, 0xff }, + { sh5_pci_init, &sh5_pci_ops, NULL, NULL, 0, 0xff }, { NULL, NULL, NULL, 0, 0 }, }; EXPORT_SYMBOL(board_pci_channels); diff --git a/arch/sh/drivers/pci/ops-dreamcast.c b/arch/sh/drivers/pci/ops-dreamcast.c index f5d2a2a..f62063e 100644 --- a/arch/sh/drivers/pci/ops-dreamcast.c +++ b/arch/sh/drivers/pci/ops-dreamcast.c @@ -42,15 +42,6 @@ static struct resource gapspci_mem_resource = { .flags = IORESOURCE_MEM, }; -static struct pci_ops gapspci_pci_ops; - -struct pci_channel board_pci_channels[] = { - { &gapspci_pci_ops, &gapspci_io_resource, - &gapspci_mem_resource, 0, 1 }, - { 0, } -}; -EXPORT_SYMBOL(board_pci_channels); - /* * The !gapspci_config_access case really shouldn't happen, ever, unless * someone implicitly messes around with the last devfn value.. otherwise we @@ -116,7 +107,7 @@ static struct pci_ops gapspci_pci_ops = { * gapspci init */ -int __init gapspci_init(void) +static int __init gapspci_init(struct pci_channel *chan) { char idbuf[16]; int i; @@ -168,3 +159,10 @@ char * __devinit pcibios_setup(char *str) { return str; } + +struct pci_channel board_pci_channels[] = { + { gapspci_init, &gapspci_pci_ops, &gapspci_io_resource, + &gapspci_mem_resource, 0, 1 }, + { 0, } +}; +EXPORT_SYMBOL(board_pci_channels); diff --git a/arch/sh/drivers/pci/ops-landisk.c b/arch/sh/drivers/pci/ops-landisk.c index 343c072..c46911d9 100644 --- a/arch/sh/drivers/pci/ops-landisk.c +++ b/arch/sh/drivers/pci/ops-landisk.c @@ -30,7 +30,7 @@ static struct resource sh7751_mem_resource = { }; struct pci_channel board_pci_channels[] = { - {&sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0x3ff}, + { sh7751_pci_init, &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0x3ff}, {NULL, NULL, NULL, 0, 0}, }; diff --git a/arch/sh/drivers/pci/ops-lboxre2.c b/arch/sh/drivers/pci/ops-lboxre2.c index 8bff32a..f606df2 100644 --- a/arch/sh/drivers/pci/ops-lboxre2.c +++ b/arch/sh/drivers/pci/ops-lboxre2.c @@ -39,7 +39,7 @@ static struct resource sh7751_mem_resource = { extern struct pci_ops sh7751_pci_ops; struct pci_channel board_pci_channels[] = { - { &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff }, + { sh7751_pci_init, &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff }, { NULL, NULL, NULL, 0, 0 }, }; diff --git a/arch/sh/drivers/pci/ops-r7780rp.c b/arch/sh/drivers/pci/ops-r7780rp.c index bf32ee8..b51b7e4 100644 --- a/arch/sh/drivers/pci/ops-r7780rp.c +++ b/arch/sh/drivers/pci/ops-r7780rp.c @@ -43,7 +43,7 @@ static struct resource sh7780_mem_resource = { extern struct pci_ops sh7780_pci_ops; struct pci_channel board_pci_channels[] = { - { &sh4_pci_ops, &sh7780_io_resource, &sh7780_mem_resource, 0, 0xff }, + { sh7780_pci_init, &sh4_pci_ops, &sh7780_io_resource, &sh7780_mem_resource, 0, 0xff }, { NULL, NULL, NULL, 0, 0 }, }; EXPORT_SYMBOL(board_pci_channels); diff --git a/arch/sh/drivers/pci/ops-rts7751r2d.c b/arch/sh/drivers/pci/ops-rts7751r2d.c index e4208a6..fe5a231 100644 --- a/arch/sh/drivers/pci/ops-rts7751r2d.c +++ b/arch/sh/drivers/pci/ops-rts7751r2d.c @@ -47,7 +47,7 @@ static struct resource sh7751_mem_resource = { extern struct pci_ops sh7751_pci_ops; struct pci_channel board_pci_channels[] = { - { &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff }, + { sh7751_pci_init, &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff }, { NULL, NULL, NULL, 0, 0 }, }; EXPORT_SYMBOL(board_pci_channels); diff --git a/arch/sh/drivers/pci/ops-sdk7780.c b/arch/sh/drivers/pci/ops-sdk7780.c index 21d59d4..7277cd1 100644 --- a/arch/sh/drivers/pci/ops-sdk7780.c +++ b/arch/sh/drivers/pci/ops-sdk7780.c @@ -49,7 +49,7 @@ static struct resource sdk7780_mem_resource = { }; struct pci_channel board_pci_channels[] = { - { &sh4_pci_ops, &sdk7780_io_resource, &sdk7780_mem_resource, 0, 0xff }, + { sh7780_pci_init, &sh4_pci_ops, &sdk7780_io_resource, &sdk7780_mem_resource, 0, 0xff }, { NULL, NULL, NULL, 0, 0 }, }; EXPORT_SYMBOL(board_pci_channels); diff --git a/arch/sh/drivers/pci/ops-se7780.c b/arch/sh/drivers/pci/ops-se7780.c index 78a6f2b..76a74fb 100644 --- a/arch/sh/drivers/pci/ops-se7780.c +++ b/arch/sh/drivers/pci/ops-se7780.c @@ -58,7 +58,7 @@ static struct resource se7780_mem_resource = { extern struct pci_ops se7780_pci_ops; struct pci_channel board_pci_channels[] = { - { &sh4_pci_ops, &se7780_io_resource, &se7780_mem_resource, 0, 0xff }, + { sh7780_pci_init, &sh4_pci_ops, &se7780_io_resource, &se7780_mem_resource, 0, 0xff }, { NULL, NULL, NULL, 0, 0 }, }; EXPORT_SYMBOL(board_pci_channels); diff --git a/arch/sh/drivers/pci/ops-sh03.c b/arch/sh/drivers/pci/ops-sh03.c index e1703ff..0218135 100644 --- a/arch/sh/drivers/pci/ops-sh03.c +++ b/arch/sh/drivers/pci/ops-sh03.c @@ -39,7 +39,7 @@ static struct resource sh7751_mem_resource = { extern struct pci_ops sh4_pci_ops; struct pci_channel board_pci_channels[] = { - { &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff }, + { sh7751_pci_init, &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff }, { NULL, NULL, NULL, 0, 0 }, }; diff --git a/arch/sh/drivers/pci/ops-snapgear.c b/arch/sh/drivers/pci/ops-snapgear.c index cba8015..2e254c6 100644 --- a/arch/sh/drivers/pci/ops-snapgear.c +++ b/arch/sh/drivers/pci/ops-snapgear.c @@ -40,7 +40,7 @@ static struct resource sh7751_mem_resource = { }; struct pci_channel board_pci_channels[] = { - { &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff }, + { sh7751_pci_init, &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff }, { 0, } }; diff --git a/arch/sh/drivers/pci/ops-titan.c b/arch/sh/drivers/pci/ops-titan.c index 69fcc5c..ffa79bd 100644 --- a/arch/sh/drivers/pci/ops-titan.c +++ b/arch/sh/drivers/pci/ops-titan.c @@ -52,7 +52,7 @@ static struct resource sh7751_mem_resource = { }; struct pci_channel board_pci_channels[] = { - { &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff }, + { sh7751_pci_init, &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff }, { NULL, NULL, NULL, 0, 0 }, }; EXPORT_SYMBOL(board_pci_channels); diff --git a/arch/sh/drivers/pci/pci-sh5.c b/arch/sh/drivers/pci/pci-sh5.c index 7a97438..008a02e 100644 --- a/arch/sh/drivers/pci/pci-sh5.c +++ b/arch/sh/drivers/pci/pci-sh5.c @@ -27,6 +27,12 @@ unsigned long pcicr_virt; unsigned long PCI_IO_AREA; +int __init sh5_pci_init(struct pci_channel *chan) +{ + pr_debug("PCI: Starting intialization.\n"); + return pcibios_init_platform(); +} + /* Rounds a number UP to the nearest power of two. Used for * sizing the PCI window. */ diff --git a/arch/sh/drivers/pci/pci-sh5.h b/arch/sh/drivers/pci/pci-sh5.h index 7cff3fc..af09f38 100644 --- a/arch/sh/drivers/pci/pci-sh5.h +++ b/arch/sh/drivers/pci/pci-sh5.h @@ -108,6 +108,7 @@ extern unsigned long pcicr_virt; extern struct pci_ops sh5_pci_ops; /* arch/sh/drivers/pci/pci-sh5.c */ +int sh5_pci_init(struct pci_channel *chan); int sh5pci_init(unsigned long memStart, unsigned long memSize); #endif /* __PCI_SH5_H */ diff --git a/arch/sh/drivers/pci/pci-sh7751.c b/arch/sh/drivers/pci/pci-sh7751.c index 9c2c014..230db8b 100644 --- a/arch/sh/drivers/pci/pci-sh7751.c +++ b/arch/sh/drivers/pci/pci-sh7751.c @@ -32,7 +32,7 @@ * space mapping) will be called via the platform defined function * pcibios_init_platform(). */ -static int __init sh7751_pci_init(void) +int __init sh7751_pci_init(struct pci_channel *chan) { unsigned int id; int ret; @@ -40,19 +40,18 @@ static int __init sh7751_pci_init(void) pr_debug("PCI: Starting intialization.\n"); /* check for SH7751/SH7751R hardware */ - id = pci_read_reg(NULL, SH7751_PCICONF0); + id = pci_read_reg(chan, SH7751_PCICONF0); if (id != ((SH7751_DEVICE_ID << 16) | SH7751_VENDOR_ID) && id != ((SH7751R_DEVICE_ID << 16) | SH7751_VENDOR_ID)) { pr_debug("PCI: This is not an SH7751(R) (%x)\n", id); return -ENODEV; } - if ((ret = sh4_pci_check_direct(NULL)) != 0) + if ((ret = sh4_pci_check_direct(chan)) != 0) return ret; return pcibios_init_platform(); } -subsys_initcall(sh7751_pci_init); static int __init __area_sdram_check(struct pci_channel *chan, unsigned int area) @@ -178,7 +177,7 @@ int __init sh7751_pcic_init(struct pci_channel *chan, } if (!word) - return 0; + return -1; /* configure the wait control registers */ word = ctrl_inl(SH7751_WCR1); @@ -202,5 +201,5 @@ int __init sh7751_pcic_init(struct pci_channel *chan, word = SH4_PCICR_PREFIX | SH4_PCICR_CFIN | SH4_PCICR_ARBM; pci_write_reg(chan, word, SH4_PCICR); - return 1; + return 0; } diff --git a/arch/sh/drivers/pci/pci-sh7751.h b/arch/sh/drivers/pci/pci-sh7751.h index 6f101e5..0ea4387 100644 --- a/arch/sh/drivers/pci/pci-sh7751.h +++ b/arch/sh/drivers/pci/pci-sh7751.h @@ -130,6 +130,7 @@ struct sh4_pci_address_map; /* arch/sh/drivers/pci/pci-sh7751.c */ +int sh7751_pci_init(struct pci_channel *chan); int sh7751_pcic_init(struct pci_channel *chan, struct sh4_pci_address_map *map); diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c index 56f673f..4706e88 100644 --- a/arch/sh/drivers/pci/pci-sh7780.c +++ b/arch/sh/drivers/pci/pci-sh7780.c @@ -45,7 +45,7 @@ * space mapping) will be called via the platform defined function * pcibios_init_platform(). */ -static int __init sh7780_pci_init(void) +int __init sh7780_pci_init(struct pci_channel *chan) { unsigned int id; int ret, match = 0; @@ -55,7 +55,7 @@ static int __init sh7780_pci_init(void) ctrl_outl(0x00000001, SH7780_PCI_VCR2); /* Enable PCIC */ /* check for SH7780/SH7780R hardware */ - id = pci_read_reg(NULL, SH7780_PCIVID); + id = pci_read_reg(chan, SH7780_PCIVID); if ((id & 0xffff) == SH7780_VENDOR_ID) { switch ((id >> 16) & 0xffff) { case SH7763_DEVICE_ID: @@ -82,12 +82,11 @@ static int __init sh7780_pci_init(void) ctrl_outl(0x33333333, INTC_INTPRI); } - if ((ret = sh4_pci_check_direct(NULL)) != 0) + if ((ret = sh4_pci_check_direct(chan)) != 0) return ret; return pcibios_init_platform(); } -core_initcall(sh7780_pci_init); int __init sh7780_pcic_init(struct pci_channel *chan, struct sh4_pci_address_map *map) @@ -153,5 +152,5 @@ int __init sh7780_pcic_init(struct pci_channel *chan, word = SH4_PCICR_PREFIX | SH4_PCICR_CFIN | SH4_PCICR_FTO; pci_write_reg(chan, word, SH4_PCICR); - return 1; + return 0; } diff --git a/arch/sh/drivers/pci/pci-sh7780.h b/arch/sh/drivers/pci/pci-sh7780.h index d349611..2f3c920 100644 --- a/arch/sh/drivers/pci/pci-sh7780.h +++ b/arch/sh/drivers/pci/pci-sh7780.h @@ -109,6 +109,7 @@ struct sh4_pci_address_map; /* arch/sh/drivers/pci/pci-sh7780.c */ +int sh7780_pci_init(struct pci_channel *chan); int sh7780_pcic_init(struct pci_channel *chan, struct sh4_pci_address_map *map); diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c index 0d6ac7a..29ec16e 100644 --- a/arch/sh/drivers/pci/pci.c +++ b/arch/sh/drivers/pci/pci.c @@ -28,18 +28,31 @@ static int __init pcibios_init(void) struct pci_bus *bus; int busno; + /* init channels */ + busno = 0; + for (p = board_pci_channels; p->init; p++) { + if (p->init(p) == 0) + p->enabled = 1; + else + pr_err("Unable to init pci channel %d\n", busno); + busno++; + } + #ifdef CONFIG_PCI_AUTO /* assign resources */ busno = 0; - for (p = board_pci_channels; p->pci_ops != NULL; p++) - busno = pciauto_assign_resources(busno, p) + 1; + for (p = board_pci_channels; p->init; p++) + if (p->enabled) + busno = pciauto_assign_resources(busno, p) + 1; #endif /* scan the buses */ busno = 0; - for (p = board_pci_channels; p->pci_ops != NULL; p++) { - bus = pci_scan_bus(busno, p->pci_ops, p); - busno = bus->subordinate + 1; + for (p = board_pci_channels; p->init; p++) { + if (p->enabled) { + bus = pci_scan_bus(busno, p->pci_ops, p); + busno = bus->subordinate + 1; + } } pci_fixup_irqs(pci_common_swizzle, pcibios_map_platform_irq); diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h index df1d383..5c7a8f1 100644 --- a/arch/sh/include/asm/pci.h +++ b/arch/sh/include/asm/pci.h @@ -17,11 +17,13 @@ * external) PCI controllers. */ struct pci_channel { + int (*init)(struct pci_channel *chan); struct pci_ops *pci_ops; struct resource *io_resource; struct resource *mem_resource; int first_devfn; int last_devfn; + int enabled; }; /* -- cgit v0.10.2 From 710fa3c81151948ac4d836ef52b57cef91b0ab72 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 11 Mar 2009 15:47:23 +0900 Subject: sh: avoid using PCIBIOS_MIN_xxx Replaces PCIBIOS_MIN_IO and PCIBIOS_MIN_MEM with direct struct pci_channel access. This allows us to have more than one pci channel. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/fixups-r7780rp.c b/arch/sh/drivers/pci/fixups-r7780rp.c index 5b25021..31f8dfb 100644 --- a/arch/sh/drivers/pci/fixups-r7780rp.c +++ b/arch/sh/drivers/pci/fixups-r7780rp.c @@ -37,7 +37,7 @@ int pci_fixup_pcic(struct pci_channel *chan) #endif /* Set IOBR for windows containing area specified in pci.h */ - pci_write_reg(chan, (PCIBIOS_MIN_IO & ~(SH7780_PCI_IO_SIZE - 1)), + pci_write_reg(chan, chan->io_resource->start & ~(SH7780_PCI_IO_SIZE-1), SH7780_PCIIOBR); pci_write_reg(chan, ((SH7780_PCI_IO_SIZE-1) & (7<<18)), SH7780_PCIIOBMR); diff --git a/arch/sh/drivers/pci/fixups-sdk7780.c b/arch/sh/drivers/pci/fixups-sdk7780.c index 3f6754a..c295731 100644 --- a/arch/sh/drivers/pci/fixups-sdk7780.c +++ b/arch/sh/drivers/pci/fixups-sdk7780.c @@ -50,7 +50,7 @@ int pci_fixup_pcic(struct pci_channel *chan) pci_write_reg(chan, 0x00FC0000 , SH7780_PCIMBMR0); /* 16M */ /* Set IOBR for window containing area specified in pci.h */ - pci_write_reg(chan, PCIBIOS_MIN_IO & ~(SH7780_PCI_IO_SIZE-1), + pci_write_reg(chan, chan->io_resource->start & ~(SH7780_PCI_IO_SIZE-1), SH7780_PCIIOBR); pci_write_reg(chan, (SH7780_PCI_IO_SIZE-1) & (7 << 18), SH7780_PCIIOBMR); diff --git a/arch/sh/drivers/pci/fixups-se7780.c b/arch/sh/drivers/pci/fixups-se7780.c index b8e735e..a968af7 100644 --- a/arch/sh/drivers/pci/fixups-se7780.c +++ b/arch/sh/drivers/pci/fixups-se7780.c @@ -51,7 +51,7 @@ int pci_fixup_pcic(struct pci_channel *chan) pci_write_reg(chan, 0x00FC0000 , SH7780_PCIMBMR0); /* 16M */ /* Set IOBR for window containing area specified in pci.h */ - pci_write_reg(chan, PCIBIOS_MIN_IO & ~(SH7780_PCI_IO_SIZE-1), + pci_write_reg(chan, chan->io_resource->start & ~(SH7780_PCI_IO_SIZE-1), SH7780_PCIIOBR); pci_write_reg(chan, (SH7780_PCI_IO_SIZE-1) & (7 << 18), SH7780_PCIIOBMR); diff --git a/arch/sh/drivers/pci/pci-sh5.c b/arch/sh/drivers/pci/pci-sh5.c index 008a02e..7750da2 100644 --- a/arch/sh/drivers/pci/pci-sh5.c +++ b/arch/sh/drivers/pci/pci-sh5.c @@ -206,6 +206,9 @@ int __init sh5pci_init(unsigned long memStart, unsigned long memSize) return 0; } +#define xPCIBIOS_MIN_IO board_pci_channels->io_resource->start +#define xPCIBIOS_MIN_MEM board_pci_channels->mem_resource->start + void __devinit pcibios_fixup_bus(struct pci_bus *bus) { struct pci_dev *dev = bus->self; @@ -223,9 +226,9 @@ void __devinit pcibios_fixup_bus(struct pci_bus *bus) /* For now, propagate host limits to the bus; * we'll adjust them later. */ bus->resource[0]->end = 64*1024 - 1 ; - bus->resource[1]->end = PCIBIOS_MIN_MEM+(256*1024*1024)-1; - bus->resource[0]->start = PCIBIOS_MIN_IO; - bus->resource[1]->start = PCIBIOS_MIN_MEM; + bus->resource[1]->end = xPCIBIOS_MIN_MEM+(256*1024*1024)-1; + bus->resource[0]->start = xPCIBIOS_MIN_IO; + bus->resource[1]->start = xPCIBIOS_MIN_MEM; /* Turn off downstream PF memory address range by default */ bus->resource[2]->start = 1024*1024; diff --git a/arch/sh/drivers/pci/pci-sh7751.c b/arch/sh/drivers/pci/pci-sh7751.c index 230db8b..447234c 100644 --- a/arch/sh/drivers/pci/pci-sh7751.c +++ b/arch/sh/drivers/pci/pci-sh7751.c @@ -144,22 +144,20 @@ int __init sh7751_pcic_init(struct pci_channel *chan, /* Set the local 16MB PCI memory space window to * the lowest PCI mapped address */ - word = PCIBIOS_MIN_MEM & SH4_PCIMBR_MASK; + word = chan->mem_resource->start & SH4_PCIMBR_MASK; pr_debug("PCI: Setting upper bits of Memory window to 0x%x\n", word); pci_write_reg(chan, word , SH4_PCIMBR); - /* Map IO space into PCI IO window - * The IO window is 64K-PCIBIOS_MIN_IO in size - * IO addresses will be translated to the - * PCI IO window base address + /* Map IO space into PCI IO window: + * IO addresses will be translated to the PCI IO window base address */ pr_debug("PCI: Mapping IO address 0x%x - 0x%x to base 0x%x\n", - PCIBIOS_MIN_IO, (64 << 10), - SH7751_PCI_IO_BASE + PCIBIOS_MIN_IO); + chan->io_resource->start, chan->io_resource->end, + SH7751_PCI_IO_BASE + chan->io_resource->start); /* Make sure the MSB's of IO window are set to access PCI space * correctly */ - word = PCIBIOS_MIN_IO & SH4_PCIIOBR_MASK; + word = chan->io_resource->start & SH4_PCIIOBR_MASK; pr_debug("PCI: Setting upper bits of IO window to 0x%x\n", word); pci_write_reg(chan, word, SH4_PCIIOBR); diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c index 4706e88..e8f3a30 100644 --- a/arch/sh/drivers/pci/pci-sh7780.c +++ b/arch/sh/drivers/pci/pci-sh7780.c @@ -130,14 +130,12 @@ int __init sh7780_pcic_init(struct pci_channel *chan, pci_write_reg(chan, map->window1.base, SH4_PCILAR1); pci_write_reg(chan, map->window1.base, SH7780_PCIMBAR1); - /* Map IO space into PCI IO window - * The IO window is 64K-PCIBIOS_MIN_IO in size - * IO addresses will be translated to the - * PCI IO window base address + /* Map IO space into PCI IO window: + * IO addresses will be translated to the PCI IO window base address */ pr_debug("PCI: Mapping IO address 0x%x - 0x%x to base 0x%x\n", - PCIBIOS_MIN_IO, (64 << 10), - SH7780_PCI_IO_BASE + PCIBIOS_MIN_IO); + chan->io_resource->start, chan->io_resource->end, + SH7780_PCI_IO_BASE + chan->io_resource->start); /* NOTE: I'm ignoring the PCI error IRQs for now.. * TODO: add support for the internal error interrupts and -- cgit v0.10.2 From b6706ef10f75921733d7275fd45d268f2f6254c8 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 19 Feb 2008 21:34:55 +0900 Subject: sh: hook in struct pci_channel in sysdata Store a struct pci_channel pointer in bus->sysdata. This makes whatever struct pci_channel assigned to a bus available for sh4_pci_read() and sh4_pci_write(). We also modify PCIBIOS_MIN_IO and PCIBIOS_MIN_MEM to use bus->sysdata - this to gives us support for multiple pci channels. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/ops-sh4.c b/arch/sh/drivers/pci/ops-sh4.c index 92d27f7..ee62e6d 100644 --- a/arch/sh/drivers/pci/ops-sh4.c +++ b/arch/sh/drivers/pci/ops-sh4.c @@ -26,6 +26,7 @@ static DEFINE_SPINLOCK(sh4_pci_lock); static int sh4_pci_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *val) { + struct pci_channel *chan = bus->sysdata; unsigned long flags; u32 data; @@ -34,8 +35,8 @@ static int sh4_pci_read(struct pci_bus *bus, unsigned int devfn, * so we must do byte alignment by hand */ spin_lock_irqsave(&sh4_pci_lock, flags); - pci_write_reg(NULL, CONFIG_CMD(bus, devfn, where), SH4_PCIPAR); - data = pci_read_reg(NULL, SH4_PCIPDR); + pci_write_reg(chan, CONFIG_CMD(bus, devfn, where), SH4_PCIPAR); + data = pci_read_reg(chan, SH4_PCIPDR); spin_unlock_irqrestore(&sh4_pci_lock, flags); switch (size) { @@ -63,13 +64,14 @@ static int sh4_pci_read(struct pci_bus *bus, unsigned int devfn, static int sh4_pci_write(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 val) { + struct pci_channel *chan = bus->sysdata; unsigned long flags; int shift; u32 data; spin_lock_irqsave(&sh4_pci_lock, flags); - pci_write_reg(NULL, CONFIG_CMD(bus, devfn, where), SH4_PCIPAR); - data = pci_read_reg(NULL, SH4_PCIPDR); + pci_write_reg(chan, CONFIG_CMD(bus, devfn, where), SH4_PCIPAR); + data = pci_read_reg(chan, SH4_PCIPDR); spin_unlock_irqrestore(&sh4_pci_lock, flags); switch (size) { @@ -90,7 +92,7 @@ static int sh4_pci_write(struct pci_bus *bus, unsigned int devfn, return PCIBIOS_FUNC_NOT_SUPPORTED; } - pci_write_reg(NULL, data, SH4_PCIPDR); + pci_write_reg(chan, data, SH4_PCIPDR); return PCIBIOS_SUCCESSFUL; } diff --git a/arch/sh/drivers/pci/pci-auto.c b/arch/sh/drivers/pci/pci-auto.c index cf48b12..1d715ec 100644 --- a/arch/sh/drivers/pci/pci-auto.c +++ b/arch/sh/drivers/pci/pci-auto.c @@ -67,6 +67,7 @@ static struct pci_dev *fake_pci_dev(struct pci_channel *hose, dev.devfn = devfn; bus.number = busnr; bus.ops = hose->pci_ops; + bus.sysdata = hose; if(busnr != top_bus) /* Fake a parent bus structure. */ diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h index 5c7a8f1..386587e 100644 --- a/arch/sh/include/asm/pci.h +++ b/arch/sh/include/asm/pci.h @@ -31,8 +31,10 @@ struct pci_channel { */ extern struct pci_channel board_pci_channels[]; -#define PCIBIOS_MIN_IO board_pci_channels->io_resource->start -#define PCIBIOS_MIN_MEM board_pci_channels->mem_resource->start +/* ugly as hell, but makes drivers/pci/setup-res.c compile and work */ +#define __PCI_CHAN(bus) ((struct pci_channel *)bus->sysdata) +#define PCIBIOS_MIN_IO __PCI_CHAN(bus)->io_resource->start +#define PCIBIOS_MIN_MEM __PCI_CHAN(bus)->mem_resource->start /* * I/O routine helpers -- cgit v0.10.2 From e4c6a3604e07185046e2ce4be82a201f4447d788 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 19 Feb 2008 21:35:04 +0900 Subject: sh: add reg_base member to pci_channel Store the base address of the pci host controller registers in struct pci_channel and use the address in pci_read_reg() and pci_write_reg(). Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/ops-sh4.c b/arch/sh/drivers/pci/ops-sh4.c index ee62e6d..540683d 100644 --- a/arch/sh/drivers/pci/ops-sh4.c +++ b/arch/sh/drivers/pci/ops-sh4.c @@ -121,8 +121,8 @@ int __init sh4_pci_check_direct(struct pci_channel *chan) if (pci_read_reg(chan, SH4_PCIPAR) == P1SEG) { pci_write_reg(chan, tmp, SH4_PCIPAR); printk(KERN_INFO "PCI: Using configuration type 1\n"); - request_region(PCI_REG(SH4_PCIPAR), 8, "PCI conf1"); - + request_region(chan->reg_base + SH4_PCIPAR, 8, + "PCI conf1"); return 0; } diff --git a/arch/sh/drivers/pci/pci-sh4.h b/arch/sh/drivers/pci/pci-sh4.h index 62ba350..90abfe3 100644 --- a/arch/sh/drivers/pci/pci-sh4.h +++ b/arch/sh/drivers/pci/pci-sh4.h @@ -171,13 +171,13 @@ struct sh4_pci_address_map { static inline void pci_write_reg(struct pci_channel *chan, unsigned long val, unsigned long reg) { - ctrl_outl(val, PCI_REG(reg)); + ctrl_outl(val, chan->reg_base + reg); } static inline unsigned long pci_read_reg(struct pci_channel *chan, unsigned long reg) { - return ctrl_inl(PCI_REG(reg)); + return ctrl_inl(chan->reg_base + reg); } #endif /* __PCI_SH4_H */ diff --git a/arch/sh/drivers/pci/pci-sh7751.c b/arch/sh/drivers/pci/pci-sh7751.c index 447234c..201266b 100644 --- a/arch/sh/drivers/pci/pci-sh7751.c +++ b/arch/sh/drivers/pci/pci-sh7751.c @@ -39,6 +39,8 @@ int __init sh7751_pci_init(struct pci_channel *chan) pr_debug("PCI: Starting intialization.\n"); + chan->reg_base = 0xfe200000; + /* check for SH7751/SH7751R hardware */ id = pci_read_reg(chan, SH7751_PCICONF0); if (id != ((SH7751_DEVICE_ID << 16) | SH7751_VENDOR_ID) && diff --git a/arch/sh/drivers/pci/pci-sh7751.h b/arch/sh/drivers/pci/pci-sh7751.h index 0ea4387..c390dd2 100644 --- a/arch/sh/drivers/pci/pci-sh7751.h +++ b/arch/sh/drivers/pci/pci-sh7751.h @@ -26,7 +26,6 @@ #define SH7751_PCI_IO_SIZE 0x40000 /* Size of IO window */ #define SH7751_PCIREG_BASE 0xFE200000 /* PCI regs base address */ -#define PCI_REG(n) (SH7751_PCIREG_BASE+ n) #define SH7751_PCICONF0 0x0 /* PCI Config Reg 0 */ #define SH7751_PCICONF0_DEVID 0xFFFF0000 /* Device ID */ diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c index e8f3a30..9d6483a 100644 --- a/arch/sh/drivers/pci/pci-sh7780.c +++ b/arch/sh/drivers/pci/pci-sh7780.c @@ -52,6 +52,8 @@ int __init sh7780_pci_init(struct pci_channel *chan) pr_debug("PCI: Starting intialization.\n"); + chan->reg_base = 0xfe040000; + ctrl_outl(0x00000001, SH7780_PCI_VCR2); /* Enable PCIC */ /* check for SH7780/SH7780R hardware */ diff --git a/arch/sh/drivers/pci/pci-sh7780.h b/arch/sh/drivers/pci/pci-sh7780.h index 2f3c920..fffcf1d 100644 --- a/arch/sh/drivers/pci/pci-sh7780.h +++ b/arch/sh/drivers/pci/pci-sh7780.h @@ -35,7 +35,6 @@ #define SH7780_PCI_IO_SIZE 0x00400000 /* Size of IO window */ #define SH7780_PCIREG_BASE 0xFE040000 /* PCI regs base address */ -#define PCI_REG(n) (SH7780_PCIREG_BASE+n) /* SH7780 PCI Config Registers */ #define SH7780_PCIVID 0x000 /* Vendor ID */ diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h index 386587e..8e9e0ed 100644 --- a/arch/sh/include/asm/pci.h +++ b/arch/sh/include/asm/pci.h @@ -24,6 +24,7 @@ struct pci_channel { int first_devfn; int last_devfn; int enabled; + unsigned long reg_base; }; /* -- cgit v0.10.2 From ef53fdeb7e0cb139aff33665635b886700137abb Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 19 Feb 2008 21:35:14 +0900 Subject: sh: add io_base member to pci_channel Store the io window base address in struct pci_channel and use that one instead of SH77xx_PCI_IO_BASE. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/pci-sh7751.c b/arch/sh/drivers/pci/pci-sh7751.c index 201266b..2a6c7aa 100644 --- a/arch/sh/drivers/pci/pci-sh7751.c +++ b/arch/sh/drivers/pci/pci-sh7751.c @@ -40,6 +40,7 @@ int __init sh7751_pci_init(struct pci_channel *chan) pr_debug("PCI: Starting intialization.\n"); chan->reg_base = 0xfe200000; + chan->io_base = 0xfe240000; /* check for SH7751/SH7751R hardware */ id = pci_read_reg(chan, SH7751_PCICONF0); @@ -153,9 +154,9 @@ int __init sh7751_pcic_init(struct pci_channel *chan, /* Map IO space into PCI IO window: * IO addresses will be translated to the PCI IO window base address */ - pr_debug("PCI: Mapping IO address 0x%x - 0x%x to base 0x%x\n", + pr_debug("PCI: Mapping IO address 0x%x - 0x%x to base 0x%lx\n", chan->io_resource->start, chan->io_resource->end, - SH7751_PCI_IO_BASE + chan->io_resource->start); + chan->io_base + chan->io_resource->start); /* Make sure the MSB's of IO window are set to access PCI space * correctly */ diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c index 9d6483a..87a7f3b 100644 --- a/arch/sh/drivers/pci/pci-sh7780.c +++ b/arch/sh/drivers/pci/pci-sh7780.c @@ -53,6 +53,7 @@ int __init sh7780_pci_init(struct pci_channel *chan) pr_debug("PCI: Starting intialization.\n"); chan->reg_base = 0xfe040000; + chan->io_base = 0xfe200000; ctrl_outl(0x00000001, SH7780_PCI_VCR2); /* Enable PCIC */ @@ -135,9 +136,9 @@ int __init sh7780_pcic_init(struct pci_channel *chan, /* Map IO space into PCI IO window: * IO addresses will be translated to the PCI IO window base address */ - pr_debug("PCI: Mapping IO address 0x%x - 0x%x to base 0x%x\n", + pr_debug("PCI: Mapping IO address 0x%x - 0x%x to base 0x%lx\n", chan->io_resource->start, chan->io_resource->end, - SH7780_PCI_IO_BASE + chan->io_resource->start); + chan->io_base + chan->io_resource->start); /* NOTE: I'm ignoring the PCI error IRQs for now.. * TODO: add support for the internal error interrupts and diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h index 8e9e0ed..84d12eb 100644 --- a/arch/sh/include/asm/pci.h +++ b/arch/sh/include/asm/pci.h @@ -25,6 +25,7 @@ struct pci_channel { int last_devfn; int enabled; unsigned long reg_base; + unsigned long io_base; }; /* -- cgit v0.10.2 From ef339f241b08a16af58897e6288ba200e0c7a8c7 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 19 Feb 2008 21:35:22 +0900 Subject: sh: pci memory range checking code This patch changes the code to use __is_pci_memory() instead of is_pci_memaddr(). __is_pci_memory() loops through all the pci channels on the system to match memory windows. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt diff --git a/arch/sh/boards/mach-titan/io.c b/arch/sh/boards/mach-titan/io.c index 4badad4..053b3ed 100644 --- a/arch/sh/boards/mach-titan/io.c +++ b/arch/sh/boards/mach-titan/io.c @@ -117,7 +117,7 @@ void titan_outsl(unsigned long port, const void *src, unsigned long count) void __iomem *titan_ioport_map(unsigned long port, unsigned int size) { - if (PXSEG(port) || is_pci_memaddr(port)) + if (PXSEG(port)) return (void __iomem *)port; else if (is_pci_ioaddr(port)) return (void __iomem *)pci_ioaddr(port); diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c index 29ec16e..b9aca54 100644 --- a/arch/sh/drivers/pci/pci.c +++ b/arch/sh/drivers/pci/pci.c @@ -167,9 +167,8 @@ void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) /* * Presently the IORESOURCE_MEM case is a bit special, most * SH7751 style PCI controllers have PCI memory at a fixed - * location in the address space where no remapping is desired - * (typically at 0xfd000000, but is_pci_memaddr() will know - * best). With the IORESOURCE_MEM case more care has to be taken + * location in the address space where no remapping is desired. + * With the IORESOURCE_MEM case more care has to be taken * to inhibit page table mapping for legacy cores, but this is * punted off to __ioremap(). * -- PFM. diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h index 84d12eb..ccf5c5f 100644 --- a/arch/sh/include/asm/pci.h +++ b/arch/sh/include/asm/pci.h @@ -61,12 +61,8 @@ extern unsigned long PCI_IO_AREA; #define is_pci_ioaddr(port) \ (((port) >= PCIBIOS_MIN_IO) && \ ((port) < (PCIBIOS_MIN_IO + PCI_IO_SIZE))) -#define is_pci_memaddr(port) \ - (((port) >= PCIBIOS_MIN_MEM) && \ - ((port) < (PCIBIOS_MIN_MEM + PCI_MEM_SIZE))) #else #define is_pci_ioaddr(port) (0) -#define is_pci_memaddr(port) (0) #endif struct pci_dev; @@ -127,6 +123,25 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev, *strat = PCI_DMA_BURST_INFINITY; *strategy_parameter = ~0UL; } + +static inline int __is_pci_memory(unsigned long phys_addr, unsigned long size) +{ + struct pci_channel *p; + struct resource *res; + + for (p = board_pci_channels; p->init; p++) { + res = p->mem_resource; + if (p->enabled && (phys_addr >= res->start) && + (phys_addr + size) <= (res->end + 1)) + return 1; + } + return 0; +} +#else +static inline int __is_pci_memory(unsigned long phys_addr, unsigned long size) +{ + return 0; +} #endif /* Board-specific fixup routines. */ diff --git a/arch/sh/mm/ioremap_32.c b/arch/sh/mm/ioremap_32.c index 60cc486..7e04cc8 100644 --- a/arch/sh/mm/ioremap_32.c +++ b/arch/sh/mm/ioremap_32.c @@ -56,7 +56,7 @@ void __iomem *__ioremap(unsigned long phys_addr, unsigned long size, * P1/P2 space, ioremap() will already do the right thing, * and we'll never get this far. */ - if (is_pci_memaddr(phys_addr) && is_pci_memaddr(last_addr)) + if (__is_pci_memory(phys_addr, size)) return (void __iomem *)phys_addr; #if !defined(CONFIG_PMB_FIXED) @@ -121,7 +121,7 @@ void __iounmap(void __iomem *addr) unsigned long seg = PXSEG(vaddr); struct vm_struct *p; - if (seg < P3SEG || vaddr >= P3_ADDR_MAX || is_pci_memaddr(vaddr)) + if (seg < P3SEG || vaddr >= P3_ADDR_MAX || __is_pci_memory(vaddr, 0)) return; #ifdef CONFIG_PMB -- cgit v0.10.2 From 8ce0143b11cdc519b8e1fd94a262b654ef0bc3ab Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 19 Feb 2008 21:35:31 +0900 Subject: sh: pci io port base address code Adds a __get_pci_io_base() function which is used to match a port range against struct pci_channel. This allows us to detect if a port range is assigned to pci or happens to be legacy port io. While at it, remove unused cpu-specific cruft. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/ops-rts7751r2d.c b/arch/sh/drivers/pci/ops-rts7751r2d.c index fe5a231..58ed1d5 100644 --- a/arch/sh/drivers/pci/ops-rts7751r2d.c +++ b/arch/sh/drivers/pci/ops-rts7751r2d.c @@ -68,7 +68,6 @@ static struct sh4_pci_address_map sh7751_pci_map = { int __init pcibios_init_platform(void) { - __set_io_port_base(SH7751_PCI_IO_BASE); return sh7751_pcic_init(&board_pci_channels[0], &sh7751_pci_map); } diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h index ccf5c5f..bb2c2fc 100644 --- a/arch/sh/include/asm/pci.h +++ b/arch/sh/include/asm/pci.h @@ -38,33 +38,6 @@ extern struct pci_channel board_pci_channels[]; #define PCIBIOS_MIN_IO __PCI_CHAN(bus)->io_resource->start #define PCIBIOS_MIN_MEM __PCI_CHAN(bus)->mem_resource->start -/* - * I/O routine helpers - */ -#if defined(CONFIG_CPU_SUBTYPE_SH7780) || defined(CONFIG_CPU_SUBTYPE_SH7785) -#define PCI_IO_AREA 0xFE400000 -#define PCI_IO_SIZE 0x00400000 -#elif defined(CONFIG_CPU_SH5) -extern unsigned long PCI_IO_AREA; -#define PCI_IO_SIZE 0x00010000 -#else -#define PCI_IO_AREA 0xFE240000 -#define PCI_IO_SIZE 0x00040000 -#endif - -#define PCI_MEM_SIZE 0x01000000 - -#define SH4_PCIIOBR_MASK 0xFFFC0000 -#define pci_ioaddr(addr) (PCI_IO_AREA + (addr & ~SH4_PCIIOBR_MASK)) - -#if defined(CONFIG_PCI) -#define is_pci_ioaddr(port) \ - (((port) >= PCIBIOS_MIN_IO) && \ - ((port) < (PCIBIOS_MIN_IO + PCI_IO_SIZE))) -#else -#define is_pci_ioaddr(port) (0) -#endif - struct pci_dev; extern void pcibios_set_master(struct pci_dev *dev); @@ -137,11 +110,31 @@ static inline int __is_pci_memory(unsigned long phys_addr, unsigned long size) } return 0; } + +static inline void __iomem *__get_pci_io_base(unsigned long port, + unsigned long size) +{ + struct pci_channel *p; + struct resource *res; + + for (p = board_pci_channels; p->init; p++) { + res = p->io_resource; + if (p->enabled && (port >= res->start) && + (port + size) <= (res->end + 1)) + return (void __iomem *)(p->io_base + port); + } + return NULL; +} #else static inline int __is_pci_memory(unsigned long phys_addr, unsigned long size) { return 0; } +static inline void __iomem *__get_pci_io_base(unsigned long port, + unsigned long size) +{ + return NULL; +} #endif /* Board-specific fixup routines. */ diff --git a/arch/sh/kernel/io.c b/arch/sh/kernel/io.c index 29cf458..59fb020 100644 --- a/arch/sh/kernel/io.c +++ b/arch/sh/kernel/io.c @@ -12,6 +12,7 @@ * for more details. */ #include +#include #include #include @@ -69,6 +70,10 @@ void __iomem *ioport_map(unsigned long port, unsigned int nr) if (ret) return ret; + ret = __get_pci_io_base(port, nr); + if (ret) + return ret; + return __ioport_map(port, nr); } EXPORT_SYMBOL(ioport_map); -- cgit v0.10.2 From aa5d3ff99cc1f3dfe262ab9dd9179131fcfe39fd Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 19 Feb 2008 21:35:40 +0900 Subject: sh: export board_pci_channels in one place Instead of sometimes exporting board_pci_channels[] in the board specific code just export it in one place. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/ops-cayman.c b/arch/sh/drivers/pci/ops-cayman.c index f4a5e14..cbaaf02 100644 --- a/arch/sh/drivers/pci/ops-cayman.c +++ b/arch/sh/drivers/pci/ops-cayman.c @@ -80,7 +80,6 @@ struct pci_channel board_pci_channels[] = { { sh5_pci_init, &sh5_pci_ops, NULL, NULL, 0, 0xff }, { NULL, NULL, NULL, 0, 0 }, }; -EXPORT_SYMBOL(board_pci_channels); int __init pcibios_init_platform(void) { diff --git a/arch/sh/drivers/pci/ops-lboxre2.c b/arch/sh/drivers/pci/ops-lboxre2.c index f606df2..781496b 100644 --- a/arch/sh/drivers/pci/ops-lboxre2.c +++ b/arch/sh/drivers/pci/ops-lboxre2.c @@ -43,8 +43,6 @@ struct pci_channel board_pci_channels[] = { { NULL, NULL, NULL, 0, 0 }, }; -EXPORT_SYMBOL(board_pci_channels); - static struct sh4_pci_address_map sh7751_pci_map = { .window0 = { .base = SH7751_CS3_BASE_ADDR, diff --git a/arch/sh/drivers/pci/ops-r7780rp.c b/arch/sh/drivers/pci/ops-r7780rp.c index b51b7e4..c58f1cf 100644 --- a/arch/sh/drivers/pci/ops-r7780rp.c +++ b/arch/sh/drivers/pci/ops-r7780rp.c @@ -46,7 +46,6 @@ struct pci_channel board_pci_channels[] = { { sh7780_pci_init, &sh4_pci_ops, &sh7780_io_resource, &sh7780_mem_resource, 0, 0xff }, { NULL, NULL, NULL, 0, 0 }, }; -EXPORT_SYMBOL(board_pci_channels); static struct sh4_pci_address_map sh7780_pci_map = { .window0 = { diff --git a/arch/sh/drivers/pci/ops-rts7751r2d.c b/arch/sh/drivers/pci/ops-rts7751r2d.c index 58ed1d5..d374cd3 100644 --- a/arch/sh/drivers/pci/ops-rts7751r2d.c +++ b/arch/sh/drivers/pci/ops-rts7751r2d.c @@ -50,7 +50,6 @@ struct pci_channel board_pci_channels[] = { { sh7751_pci_init, &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff }, { NULL, NULL, NULL, 0, 0 }, }; -EXPORT_SYMBOL(board_pci_channels); static struct sh4_pci_address_map sh7751_pci_map = { .window0 = { diff --git a/arch/sh/drivers/pci/ops-sdk7780.c b/arch/sh/drivers/pci/ops-sdk7780.c index 7277cd1..b34fbc5 100644 --- a/arch/sh/drivers/pci/ops-sdk7780.c +++ b/arch/sh/drivers/pci/ops-sdk7780.c @@ -52,7 +52,6 @@ struct pci_channel board_pci_channels[] = { { sh7780_pci_init, &sh4_pci_ops, &sdk7780_io_resource, &sdk7780_mem_resource, 0, 0xff }, { NULL, NULL, NULL, 0, 0 }, }; -EXPORT_SYMBOL(board_pci_channels); static struct sh4_pci_address_map sdk7780_pci_map = { .window0 = { diff --git a/arch/sh/drivers/pci/ops-se7780.c b/arch/sh/drivers/pci/ops-se7780.c index 76a74fb..4730207 100644 --- a/arch/sh/drivers/pci/ops-se7780.c +++ b/arch/sh/drivers/pci/ops-se7780.c @@ -61,7 +61,6 @@ struct pci_channel board_pci_channels[] = { { sh7780_pci_init, &sh4_pci_ops, &se7780_io_resource, &se7780_mem_resource, 0, 0xff }, { NULL, NULL, NULL, 0, 0 }, }; -EXPORT_SYMBOL(board_pci_channels); static struct sh4_pci_address_map se7780_pci_map = { .window0 = { diff --git a/arch/sh/drivers/pci/ops-titan.c b/arch/sh/drivers/pci/ops-titan.c index ffa79bd..31ed037 100644 --- a/arch/sh/drivers/pci/ops-titan.c +++ b/arch/sh/drivers/pci/ops-titan.c @@ -55,7 +55,6 @@ struct pci_channel board_pci_channels[] = { { sh7751_pci_init, &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff }, { NULL, NULL, NULL, 0, 0 }, }; -EXPORT_SYMBOL(board_pci_channels); static struct sh4_pci_address_map sh7751_pci_map = { .window0 = { diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c index b9aca54..8aaacc9 100644 --- a/arch/sh/drivers/pci/pci.c +++ b/arch/sh/drivers/pci/pci.c @@ -187,3 +187,5 @@ void pci_iounmap(struct pci_dev *dev, void __iomem *addr) iounmap(addr); } EXPORT_SYMBOL(pci_iounmap); + +EXPORT_SYMBOL(board_pci_channels); -- cgit v0.10.2 From 10591578c84825a320734e59272f161385edcc41 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Wed, 11 Mar 2009 15:58:04 +0900 Subject: sh: drop duplicate symbol export on dreamcast and sh7785lcr. With board_pci_channels now being exported in a single place, update the boards that duplicated the export. Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/ops-dreamcast.c b/arch/sh/drivers/pci/ops-dreamcast.c index f62063e..5bc81d5 100644 --- a/arch/sh/drivers/pci/ops-dreamcast.c +++ b/arch/sh/drivers/pci/ops-dreamcast.c @@ -165,4 +165,3 @@ struct pci_channel board_pci_channels[] = { &gapspci_mem_resource, 0, 1 }, { 0, } }; -EXPORT_SYMBOL(board_pci_channels); diff --git a/arch/sh/drivers/pci/ops-sh7785lcr.c b/arch/sh/drivers/pci/ops-sh7785lcr.c index fb0869f..2a9f9a5 100644 --- a/arch/sh/drivers/pci/ops-sh7785lcr.c +++ b/arch/sh/drivers/pci/ops-sh7785lcr.c @@ -44,7 +44,6 @@ struct pci_channel board_pci_channels[] = { { &sh4_pci_ops, &sh7785_io_resource, &sh7785_mem_resource, 0, 0xff }, { NULL, NULL, NULL, 0, 0 }, }; -EXPORT_SYMBOL(board_pci_channels); static struct sh4_pci_address_map sh7785_pci_map = { .window0 = { -- cgit v0.10.2 From 3aabce8d3d2f9af2c08c2f590ac9acb272ca8c95 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Wed, 11 Mar 2009 16:12:39 +0900 Subject: sh: sh7785lcr: Update for recent PCI changes. Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/fixups-sh7785lcr.c b/arch/sh/drivers/pci/fixups-sh7785lcr.c index 4949e60..9e7dc79 100644 --- a/arch/sh/drivers/pci/fixups-sh7785lcr.c +++ b/arch/sh/drivers/pci/fixups-sh7785lcr.c @@ -15,32 +15,33 @@ #include #include "pci-sh4.h" -int pci_fixup_pcic(void) +int pci_fixup_pcic(struct pci_channel *chan) { - pci_write_reg(0x000043ff, SH4_PCIINTM); - pci_write_reg(0x0000380f, SH4_PCIAINTM); + pci_write_reg(chan, 0x000043ff, SH4_PCIINTM); + pci_write_reg(chan, 0x0000380f, SH4_PCIAINTM); - pci_write_reg(0xfbb00047, SH7780_PCICMD); - pci_write_reg(0x00000000, SH7780_PCIIBAR); + pci_write_reg(chan, 0xfbb00047, SH7780_PCICMD); + pci_write_reg(chan, 0x00000000, SH7780_PCIIBAR); - pci_write_reg(0x00011912, SH7780_PCISVID); - pci_write_reg(0x08000000, SH7780_PCICSCR0); - pci_write_reg(0x0000001b, SH7780_PCICSAR0); - pci_write_reg(0xfd000000, SH7780_PCICSCR1); - pci_write_reg(0x0000000f, SH7780_PCICSAR1); + pci_write_reg(chan, 0x00011912, SH7780_PCISVID); + pci_write_reg(chan, 0x08000000, SH7780_PCICSCR0); + pci_write_reg(chan, 0x0000001b, SH7780_PCICSAR0); + pci_write_reg(chan, 0xfd000000, SH7780_PCICSCR1); + pci_write_reg(chan, 0x0000000f, SH7780_PCICSAR1); - pci_write_reg(0xfd000000, SH7780_PCIMBR0); - pci_write_reg(0x00fc0000, SH7780_PCIMBMR0); + pci_write_reg(chan, 0xfd000000, SH7780_PCIMBR0); + pci_write_reg(chan, 0x00fc0000, SH7780_PCIMBMR0); #ifdef CONFIG_32BIT - pci_write_reg(0xc0000000, SH7780_PCIMBR2); - pci_write_reg(0x20000000 - SH7780_PCI_IO_SIZE, SH7780_PCIMBMR2); + pci_write_reg(chan, 0xc0000000, SH7780_PCIMBR2); + pci_write_reg(chan, 0x20000000 - SH7780_PCI_IO_SIZE, SH7780_PCIMBMR2); #endif /* Set IOBR for windows containing area specified in pci.h */ - pci_write_reg((PCIBIOS_MIN_IO & ~(SH7780_PCI_IO_SIZE - 1)), + pci_write_reg(chan, chan->io_resource->start & ~(SH7780_PCI_IO_SIZE - 1), SH7780_PCIIOBR); - pci_write_reg(((SH7780_PCI_IO_SIZE - 1) & (7 << 18)), SH7780_PCIIOBMR); + pci_write_reg(chan, ((SH7780_PCI_IO_SIZE - 1) & (7 << 18)), + SH7780_PCIIOBMR); return 0; } diff --git a/arch/sh/drivers/pci/ops-sh7785lcr.c b/arch/sh/drivers/pci/ops-sh7785lcr.c index 2a9f9a5..afbb9bd 100644 --- a/arch/sh/drivers/pci/ops-sh7785lcr.c +++ b/arch/sh/drivers/pci/ops-sh7785lcr.c @@ -41,7 +41,7 @@ static struct resource sh7785_mem_resource = { }; struct pci_channel board_pci_channels[] = { - { &sh4_pci_ops, &sh7785_io_resource, &sh7785_mem_resource, 0, 0xff }, + { sh7780_pci_init, &sh4_pci_ops, &sh7785_io_resource, &sh7785_mem_resource, 0, 0xff }, { NULL, NULL, NULL, 0, 0 }, }; @@ -61,5 +61,5 @@ static struct sh4_pci_address_map sh7785_pci_map = { int __init pcibios_init_platform(void) { - return sh7780_pcic_init(&sh7785_pci_map); + return sh7780_pcic_init(&board_pci_channels[0], &sh7785_pci_map); } -- cgit v0.10.2 From f1a9ba8f15f89f3f444985251efaf809cd16da53 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Wed, 11 Mar 2009 16:17:53 +0900 Subject: sh: pci: drop duplicate PCIC fixups for SE7780 and SH7785LCR. SE7780 has the same PCIC fixup as SDK7780, and SH7785LCR the same as R7780RP. Switch to using those, and drop the duplicate code. Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/Makefile b/arch/sh/drivers/pci/Makefile index 847e9089..67d710a 100644 --- a/arch/sh/drivers/pci/Makefile +++ b/arch/sh/drivers/pci/Makefile @@ -21,6 +21,6 @@ obj-$(CONFIG_SH_SDK7780) += ops-sdk7780.o fixups-sdk7780.o obj-$(CONFIG_SH_TITAN) += ops-titan.o obj-$(CONFIG_SH_LANDISK) += ops-landisk.o obj-$(CONFIG_SH_LBOX_RE2) += ops-lboxre2.o fixups-lboxre2.o -obj-$(CONFIG_SH_7780_SOLUTION_ENGINE) += ops-se7780.o fixups-se7780.o +obj-$(CONFIG_SH_7780_SOLUTION_ENGINE) += ops-se7780.o fixups-sdk7780.o obj-$(CONFIG_SH_CAYMAN) += ops-cayman.o -obj-$(CONFIG_SH_SH7785LCR) += ops-sh7785lcr.o fixups-sh7785lcr.o +obj-$(CONFIG_SH_SH7785LCR) += ops-sh7785lcr.o fixups-r7780rp.o diff --git a/arch/sh/drivers/pci/fixups-se7780.c b/arch/sh/drivers/pci/fixups-se7780.c deleted file mode 100644 index a968af7..0000000 --- a/arch/sh/drivers/pci/fixups-se7780.c +++ /dev/null @@ -1,62 +0,0 @@ -/* - * arch/sh/drivers/pci/fixups-se7780.c - * - * HITACHI UL Solution Engine 7780 PCI fixups - * - * Copyright (C) 2003 Lineo uSolutions, Inc. - * Copyright (C) 2004 - 2006 Paul Mundt - * Copyright (C) 2006 Nobuhiro Iwamatsu - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ -#include -#include "pci-sh4.h" -#include - -int pci_fixup_pcic(struct pci_channel *chan) -{ - ctrl_outl(0x00000001, SH7780_PCI_VCR2); - - /* Enable all interrupts, so we know what to fix */ - pci_write_reg(chan, 0x0000C3FF, SH7780_PCIIMR); - pci_write_reg(chan, 0x0000380F, SH7780_PCIAINTM); - - /* Set up standard PCI config registers */ - ctrl_outw(0xFB00, PCI_REG(SH7780_PCISTATUS)); - ctrl_outw(0x0047, PCI_REG(SH7780_PCICMD)); - ctrl_outb( 0x00, PCI_REG(SH7780_PCIPIF)); - ctrl_outb( 0x00, PCI_REG(SH7780_PCISUB)); - ctrl_outb( 0x06, PCI_REG(SH7780_PCIBCC)); - ctrl_outw(0x1912, PCI_REG(SH7780_PCISVID)); - ctrl_outw(0x0001, PCI_REG(SH7780_PCISID)); - - pci_write_reg(chan, 0x08000000, SH7780_PCIMBAR0); /* PCI */ - pci_write_reg(chan, 0x08000000, SH7780_PCILAR0); /* SHwy */ - pci_write_reg(chan, 0x07F00001, SH7780_PCILSR); /* size 128M w/ MBAR */ - - pci_write_reg(chan, 0x00000000, SH7780_PCIMBAR1); - pci_write_reg(chan, 0x00000000, SH7780_PCILAR1); - pci_write_reg(chan, 0x00000000, SH7780_PCILSR1); - - pci_write_reg(chan, 0xAB000801, SH7780_PCIIBAR); - - /* - * Set the MBR so PCI address is one-to-one with window, - * meaning all calls go straight through... use ifdef to - * catch erroneous assumption. - */ - pci_write_reg(chan, 0xFD000000 , SH7780_PCIMBR0); - pci_write_reg(chan, 0x00FC0000 , SH7780_PCIMBMR0); /* 16M */ - - /* Set IOBR for window containing area specified in pci.h */ - pci_write_reg(chan, chan->io_resource->start & ~(SH7780_PCI_IO_SIZE-1), - SH7780_PCIIOBR); - pci_write_reg(chan, (SH7780_PCI_IO_SIZE-1) & (7 << 18), - SH7780_PCIIOBMR); - - pci_write_reg(chan, 0xA5000C01, SH7780_PCICR); - - return 0; -} diff --git a/arch/sh/drivers/pci/fixups-sh7785lcr.c b/arch/sh/drivers/pci/fixups-sh7785lcr.c deleted file mode 100644 index 9e7dc79..0000000 --- a/arch/sh/drivers/pci/fixups-sh7785lcr.c +++ /dev/null @@ -1,47 +0,0 @@ -/* - * arch/sh/drivers/pci/fixups-sh7785lcr.c - * - * R0P7785LC0011RL PCI fixups - * Copyright (C) 2008 Yoshihiro Shimoda - * - * Based on arch/sh/drivers/pci/fixups-r7780rp.c - * Copyright (C) 2003 Lineo uSolutions, Inc. - * Copyright (C) 2004 - 2006 Paul Mundt - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ -#include -#include "pci-sh4.h" - -int pci_fixup_pcic(struct pci_channel *chan) -{ - pci_write_reg(chan, 0x000043ff, SH4_PCIINTM); - pci_write_reg(chan, 0x0000380f, SH4_PCIAINTM); - - pci_write_reg(chan, 0xfbb00047, SH7780_PCICMD); - pci_write_reg(chan, 0x00000000, SH7780_PCIIBAR); - - pci_write_reg(chan, 0x00011912, SH7780_PCISVID); - pci_write_reg(chan, 0x08000000, SH7780_PCICSCR0); - pci_write_reg(chan, 0x0000001b, SH7780_PCICSAR0); - pci_write_reg(chan, 0xfd000000, SH7780_PCICSCR1); - pci_write_reg(chan, 0x0000000f, SH7780_PCICSAR1); - - pci_write_reg(chan, 0xfd000000, SH7780_PCIMBR0); - pci_write_reg(chan, 0x00fc0000, SH7780_PCIMBMR0); - -#ifdef CONFIG_32BIT - pci_write_reg(chan, 0xc0000000, SH7780_PCIMBR2); - pci_write_reg(chan, 0x20000000 - SH7780_PCI_IO_SIZE, SH7780_PCIMBMR2); -#endif - - /* Set IOBR for windows containing area specified in pci.h */ - pci_write_reg(chan, chan->io_resource->start & ~(SH7780_PCI_IO_SIZE - 1), - SH7780_PCIIOBR); - pci_write_reg(chan, ((SH7780_PCI_IO_SIZE - 1) & (7 << 18)), - SH7780_PCIIOBMR); - - return 0; -} -- cgit v0.10.2 From d0deef5b14af7d5bbd0003a0a2a1a32326e20a6d Mon Sep 17 00:00:00 2001 From: Shawn Du Date: Tue, 14 Apr 2009 13:58:56 +0800 Subject: blktrace: support per-partition tracing Though one can specify '-d /dev/sda1' when using blktrace, it still traces the whole sda. To support per-partition tracing, when we start tracing, we initialize bt->start_lba and bt->end_lba to the start and end sector of that partition. Note some actions are per device, thus we don't filter 0-sector events. The original patch and discussion can be found here: http://marc.info/?l=linux-btrace&m=122949374214540&w=2 Signed-off-by: Shawn Du Signed-off-by: Li Zefan Acked-by: "Theodore Ts'o" Cc: Arnaldo Carvalho de Melo Cc: Jens Axboe LKML-Reference: <49E42620.4050701@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index f87615d..f8c218c 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -568,7 +568,7 @@ static int compat_blk_trace_setup(struct block_device *bdev, char __user *arg) memcpy(&buts.name, &cbuts.name, 32); mutex_lock(&bdev->bd_mutex); - ret = do_blk_trace_setup(q, b, bdev->bd_dev, &buts); + ret = do_blk_trace_setup(q, b, bdev->bd_dev, bdev, &buts); mutex_unlock(&bdev->bd_mutex); if (ret) return ret; diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 82312df..49c9873 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1065,6 +1065,7 @@ sg_ioctl(struct inode *inode, struct file *filp, return blk_trace_setup(sdp->device->request_queue, sdp->disk->disk_name, MKDEV(SCSI_GENERIC_MAJOR, sdp->index), + NULL, (char *)arg); case BLKTRACESTART: return blk_trace_startstop(sdp->device->request_queue, 1); diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index d960889..267edc4 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -165,8 +165,9 @@ struct blk_trace { extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *); extern void blk_trace_shutdown(struct request_queue *); -extern int do_blk_trace_setup(struct request_queue *q, - char *name, dev_t dev, struct blk_user_trace_setup *buts); +extern int do_blk_trace_setup(struct request_queue *q, char *name, + dev_t dev, struct block_device *bdev, + struct blk_user_trace_setup *buts); extern void __trace_note_message(struct blk_trace *, const char *fmt, ...); /** @@ -193,6 +194,7 @@ extern void __trace_note_message(struct blk_trace *, const char *fmt, ...); extern void blk_add_driver_data(struct request_queue *q, struct request *rq, void *data, size_t len); extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, + struct block_device *bdev, char __user *arg); extern int blk_trace_startstop(struct request_queue *q, int start); extern int blk_trace_remove(struct request_queue *q); @@ -200,15 +202,15 @@ extern int blk_trace_remove(struct request_queue *q); extern struct attribute_group blk_trace_attr_group; #else /* !CONFIG_BLK_DEV_IO_TRACE */ -#define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) -#define blk_trace_shutdown(q) do { } while (0) -#define do_blk_trace_setup(q, name, dev, buts) (-ENOTTY) -#define blk_add_driver_data(q, rq, data, len) do {} while (0) -#define blk_trace_setup(q, name, dev, arg) (-ENOTTY) -#define blk_trace_startstop(q, start) (-ENOTTY) -#define blk_trace_remove(q) (-ENOTTY) -#define blk_add_trace_msg(q, fmt, ...) do { } while (0) - +# define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) +# define blk_trace_shutdown(q) do { } while (0) +# define do_blk_trace_setup(q, name, dev, bdev, buts) (-ENOTTY) +# define blk_add_driver_data(q, rq, data, len) do {} while (0) +# define blk_trace_setup(q, name, dev, bdev, arg) (-ENOTTY) +# define blk_trace_startstop(q, start) (-ENOTTY) +# define blk_trace_remove(q) (-ENOTTY) +# define blk_add_trace_msg(q, fmt, ...) do { } while (0) #endif /* CONFIG_BLK_DEV_IO_TRACE */ + #endif /* __KERNEL__ */ #endif diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 2b98195..e932654 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -147,7 +147,7 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector, { if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0) return 1; - if (sector < bt->start_lba || sector > bt->end_lba) + if (sector && (sector < bt->start_lba || sector > bt->end_lba)) return 1; if (bt->pid && pid != bt->pid) return 1; @@ -192,7 +192,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, what |= MASK_TC_BIT(rw, DISCARD); pid = tsk->pid; - if (unlikely(act_log_check(bt, what, sector, pid))) + if (act_log_check(bt, what, sector, pid)) return; cpu = raw_smp_processor_id(); @@ -407,11 +407,13 @@ static struct rchan_callbacks blk_relay_callbacks = { * Setup everything required to start tracing */ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, - struct blk_user_trace_setup *buts) + struct block_device *bdev, + struct blk_user_trace_setup *buts) { struct blk_trace *old_bt, *bt = NULL; struct dentry *dir = NULL; int ret, i; + struct hd_struct *part = NULL; if (!buts->buf_size || !buts->buf_nr) return -EINVAL; @@ -480,11 +482,21 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, if (!bt->act_mask) bt->act_mask = (u16) -1; - bt->start_lba = buts->start_lba; - bt->end_lba = buts->end_lba; - if (!bt->end_lba) + if (bdev) + part = bdev->bd_part; + + if (part) { + bt->start_lba = part->start_sect; + bt->end_lba = part->start_sect + part->nr_sects; + } else bt->end_lba = -1ULL; + /* overwrite with user settings */ + if (buts->start_lba) + bt->start_lba = buts->start_lba; + if (buts->end_lba) + bt->end_lba = buts->end_lba; + bt->pid = buts->pid; bt->trace_state = Blktrace_setup; @@ -505,6 +517,7 @@ err: } int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, + struct block_device *bdev, char __user *arg) { struct blk_user_trace_setup buts; @@ -514,7 +527,7 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, if (ret) return -EFAULT; - ret = do_blk_trace_setup(q, name, dev, &buts); + ret = do_blk_trace_setup(q, name, dev, bdev, &buts); if (ret) return ret; @@ -582,7 +595,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) switch (cmd) { case BLKTRACESETUP: bdevname(bdev, b); - ret = blk_trace_setup(q, b, bdev->bd_dev, arg); + ret = blk_trace_setup(q, b, bdev->bd_dev, bdev, arg); break; case BLKTRACESTART: start = 1; -- cgit v0.10.2 From 9908c30997b8a73c95f836170b9998dae9aa3f4a Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 14 Apr 2009 13:59:34 +0800 Subject: blktrace: support per-partition tracing for ftrace plugin The previous patch adds support to trace a single partition for relay+ioctl blktrace, and this patch is for ftrace plugin blktrace: # echo 1 > /sys/block/sda/sda7/enable # cat start_lba 102398373 # cat end_lba 102703545 Signed-off-by: Li Zefan Acked-by: "Theodore Ts'o" Cc: Arnaldo Carvalho de Melo Cc: Shawn Du Cc: Jens Axboe LKML-Reference: <49E42646.4060608@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index e932654..d109898 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -403,6 +403,23 @@ static struct rchan_callbacks blk_relay_callbacks = { .remove_buf_file = blk_remove_buf_file_callback, }; +static void blk_trace_setup_lba(struct blk_trace *bt, + struct block_device *bdev) +{ + struct hd_struct *part = NULL; + + if (bdev) + part = bdev->bd_part; + + if (part) { + bt->start_lba = part->start_sect; + bt->end_lba = part->start_sect + part->nr_sects; + } else { + bt->start_lba = 0; + bt->end_lba = -1ULL; + } +} + /* * Setup everything required to start tracing */ @@ -413,7 +430,6 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, struct blk_trace *old_bt, *bt = NULL; struct dentry *dir = NULL; int ret, i; - struct hd_struct *part = NULL; if (!buts->buf_size || !buts->buf_nr) return -EINVAL; @@ -482,14 +498,7 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, if (!bt->act_mask) bt->act_mask = (u16) -1; - if (bdev) - part = bdev->bd_part; - - if (part) { - bt->start_lba = part->start_sect; - bt->end_lba = part->start_sect + part->nr_sects; - } else - bt->end_lba = -1ULL; + blk_trace_setup_lba(bt, bdev); /* overwrite with user settings */ if (buts->start_lba) @@ -1370,7 +1379,8 @@ static int blk_trace_remove_queue(struct request_queue *q) /* * Setup everything required to start tracing */ -static int blk_trace_setup_queue(struct request_queue *q, dev_t dev) +static int blk_trace_setup_queue(struct request_queue *q, + struct block_device *bdev) { struct blk_trace *old_bt, *bt = NULL; int ret = -ENOMEM; @@ -1383,9 +1393,10 @@ static int blk_trace_setup_queue(struct request_queue *q, dev_t dev) if (!bt->msg_data) goto free_bt; - bt->dev = dev; + bt->dev = bdev->bd_dev; bt->act_mask = (u16)-1; - bt->end_lba = -1ULL; + + blk_trace_setup_lba(bt, bdev); old_bt = xchg(&q->blk_trace, bt); if (old_bt != NULL) { @@ -1602,7 +1613,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, if (attr == &dev_attr_enable) { if (value) - ret = blk_trace_setup_queue(q, bdev->bd_dev); + ret = blk_trace_setup_queue(q, bdev); else ret = blk_trace_remove_queue(q); goto out_unlock_bdev; @@ -1610,7 +1621,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, ret = 0; if (q->blk_trace == NULL) - ret = blk_trace_setup_queue(q, bdev->bd_dev); + ret = blk_trace_setup_queue(q, bdev); if (ret == 0) { if (attr == &dev_attr_act_mask) -- cgit v0.10.2 From 1d54ad6da9192fed5dd3b60224d9f2dfea0dcd82 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 14 Apr 2009 14:00:05 +0800 Subject: blktrace: add trace/ to /sys/block/sda Impact: allow ftrace-plugin blktrace to trace device-mapper devices To trace a single partition: # echo 1 > /sys/block/sda/sda1/enable To trace the whole sda instead: # echo 1 > /sys/block/sda/enable Thus we also fix an issue reported by Ted, that ftrace-plugin blktrace can't be used to trace device-mapper devices. Now: # echo 1 > /sys/block/dm-0/trace/enable echo: write error: No such device or address # mount -t ext4 /dev/dm-0 /mnt # echo 1 > /sys/block/dm-0/trace/enable # echo blk > /debug/tracing/current_tracer Reported-by: Theodore Tso Signed-off-by: Li Zefan Acked-by: "Theodore Ts'o" Cc: Arnaldo Carvalho de Melo Cc: Shawn Du Cc: Jens Axboe LKML-Reference: <49E42665.6020506@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 73f36be..8653d71 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -387,16 +387,21 @@ struct kobj_type blk_queue_ktype = { int blk_register_queue(struct gendisk *disk) { int ret; + struct device *dev = disk_to_dev(disk); struct request_queue *q = disk->queue; if (WARN_ON(!q)) return -ENXIO; + ret = blk_trace_init_sysfs(dev); + if (ret) + return ret; + if (!q->request_fn) return 0; - ret = kobject_add(&q->kobj, kobject_get(&disk_to_dev(disk)->kobj), + ret = kobject_add(&q->kobj, kobject_get(&dev->kobj), "%s", "queue"); if (ret < 0) return ret; diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 267edc4..62763c9 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -198,6 +198,7 @@ extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, char __user *arg); extern int blk_trace_startstop(struct request_queue *q, int start); extern int blk_trace_remove(struct request_queue *q); +extern int blk_trace_init_sysfs(struct device *dev); extern struct attribute_group blk_trace_attr_group; @@ -210,6 +211,11 @@ extern struct attribute_group blk_trace_attr_group; # define blk_trace_startstop(q, start) (-ENOTTY) # define blk_trace_remove(q) (-ENOTTY) # define blk_add_trace_msg(q, fmt, ...) do { } while (0) +static inline int blk_trace_init_sysfs(struct device *dev) +{ + return 0; +} + #endif /* CONFIG_BLK_DEV_IO_TRACE */ #endif /* __KERNEL__ */ diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index d109898..8e7c5da 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1644,3 +1644,8 @@ out: return ret ? ret : count; } +int blk_trace_init_sysfs(struct device *dev) +{ + return sysfs_create_group(&dev->kobj, &blk_trace_attr_group); +} + -- cgit v0.10.2 From f3948f8857ef5de239f28a61dddb1554a0ae4c2c Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 15 Apr 2009 11:02:56 +0800 Subject: blktrace: fix context-info when mixed-using blk tracer and trace events When current tracer is set to blk tracer, TRACE_ITER_CONTEXT_INFO is unset, but actually context-info is printed: pdflush-431 [000] 821.181576: 8,0 P N [pdflush] And then if we enable TRACE_ITER_CONTEXT_INFO: # echo context-info > trace_options We'll see context-info printed twice. What's worse, when we use blk tracer and trace events at the same time, we'll see no context-info for trace events at all: jbd2_commit_logging: dev dm-0:8 transaction 333227 jbd2_end_commit: dev dm-0:8 transaction 333227 head 332814 rm-25433 [001] 9578.307485: 8,18 m N cfq25433 slice expired t=0 rm-25433 [001] 9578.307486: 8,18 m N cfq25433 put_queue This patch adds blk_tracer->set_flags(), and context-info flag is unset only when we set the output to classic mode. Note after this patch, one should unset context-info explicitly if he wants to get binary output that can be parsed by blkparse: # echo nocontext-info > trace_options # echo bin > trace_options # echo blk > current_tracer # cat trace_pipe | blkparse -i - Reported-by: Theodore Ts'o Signed-off-by: Li Zefan Cc: Jens Axboe Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Steven Rostedt LKML-Reference: <49E54E60.50408@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 8e7c5da..c32062b 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1211,7 +1211,6 @@ static void blk_tracer_print_header(struct seq_file *m) static void blk_tracer_start(struct trace_array *tr) { blk_tracer_enabled = true; - trace_flags &= ~TRACE_ITER_CONTEXT_INFO; } static int blk_tracer_init(struct trace_array *tr) @@ -1224,7 +1223,6 @@ static int blk_tracer_init(struct trace_array *tr) static void blk_tracer_stop(struct trace_array *tr) { blk_tracer_enabled = false; - trace_flags |= TRACE_ITER_CONTEXT_INFO; } static void blk_tracer_reset(struct trace_array *tr) @@ -1289,9 +1287,6 @@ out: static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, int flags) { - if (!trace_print_context(iter)) - return TRACE_TYPE_PARTIAL_LINE; - return print_one_line(iter, false); } @@ -1326,6 +1321,18 @@ static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter) return print_one_line(iter, true); } +static int blk_tracer_set_flag(u32 old_flags, u32 bit, int set) +{ + /* don't output context-info for blk_classic output */ + if (bit == TRACE_BLK_OPT_CLASSIC) { + if (set) + trace_flags &= ~TRACE_ITER_CONTEXT_INFO; + else + trace_flags |= TRACE_ITER_CONTEXT_INFO; + } + return 0; +} + static struct tracer blk_tracer __read_mostly = { .name = "blk", .init = blk_tracer_init, @@ -1335,6 +1342,7 @@ static struct tracer blk_tracer __read_mostly = { .print_header = blk_tracer_print_header, .print_line = blk_tracer_print_line, .flags = &blk_tracer_flags, + .set_flag = blk_tracer_set_flag, }; static struct trace_event trace_blk_event = { -- cgit v0.10.2 From 18cb7109d3e83195b605ff2905981020e86f72ca Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 16 Apr 2009 10:22:24 +0200 Subject: ALSA: hda - Check strcpy length Check the length to copy via strlen() beforehand to avoid the stack corruption, or use strlcpy() to be safe in HD-audio codes. Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 37f24ce..48f0cea 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -1431,6 +1431,8 @@ _snd_hda_find_mixer_ctl(struct hda_codec *codec, memset(&id, 0, sizeof(id)); id.iface = SNDRV_CTL_ELEM_IFACE_MIXER; id.index = idx; + if (snd_BUG_ON(strlen(name) >= sizeof(id.name))) + return NULL; strcpy(id.name, name); return snd_ctl_find_id(codec->bus->card, &id); } diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 21a3092..41db5d4 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1830,7 +1830,7 @@ azx_attach_pcm_stream(struct hda_bus *bus, struct hda_codec *codec, &pcm); if (err < 0) return err; - strcpy(pcm->name, cpcm->name); + strlcpy(pcm->name, cpcm->name, sizeof(pcm->name)); apcm = kzalloc(sizeof(*apcm), GFP_KERNEL); if (apcm == NULL) return -ENOMEM; @@ -2358,9 +2358,11 @@ static int __devinit azx_create(struct snd_card *card, struct pci_dev *pci, } strcpy(card->driver, "HDA-Intel"); - strcpy(card->shortname, driver_short_names[chip->driver_type]); - sprintf(card->longname, "%s at 0x%lx irq %i", - card->shortname, chip->addr, chip->irq); + strlcpy(card->shortname, driver_short_names[chip->driver_type], + sizeof(card->shortname)); + snprintf(card->longname, sizeof(card->longname), + "%s at 0x%lx irq %i", + card->shortname, chip->addr, chip->irq); *rchip = chip; return 0; -- cgit v0.10.2 From 0232ba9ce031d0fd8f331fa8b3c00e16901f54e6 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Thu, 16 Apr 2009 18:01:31 +0900 Subject: sh: pci: Kill off unused SH4_PCIC_NO_RESET code. Nothing ended up using this anymore, so just kill it off. Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/ops-landisk.c b/arch/sh/drivers/pci/ops-landisk.c index c46911d9..178b778 100644 --- a/arch/sh/drivers/pci/ops-landisk.c +++ b/arch/sh/drivers/pci/ops-landisk.c @@ -39,8 +39,6 @@ static struct sh4_pci_address_map sh7751_pci_map = { .base = SH7751_CS3_BASE_ADDR, .size = (64 << 20), /* 64MB */ }, - - .flags = SH4_PCIC_NO_RESET, }; int __init pcibios_init_platform(void) diff --git a/arch/sh/drivers/pci/ops-lboxre2.c b/arch/sh/drivers/pci/ops-lboxre2.c index 781496b..91cabd8 100644 --- a/arch/sh/drivers/pci/ops-lboxre2.c +++ b/arch/sh/drivers/pci/ops-lboxre2.c @@ -48,11 +48,6 @@ static struct sh4_pci_address_map sh7751_pci_map = { .base = SH7751_CS3_BASE_ADDR, .size = 0x04000000, }, - .window1 = { - .base = 0x00000000, /* Unused */ - .size = 0x00000000, /* Unused */ - }, - .flags = SH4_PCIC_NO_RESET, }; int __init pcibios_init_platform(void) diff --git a/arch/sh/drivers/pci/ops-r7780rp.c b/arch/sh/drivers/pci/ops-r7780rp.c index c58f1cf..8ec6d22 100644 --- a/arch/sh/drivers/pci/ops-r7780rp.c +++ b/arch/sh/drivers/pci/ops-r7780rp.c @@ -57,8 +57,6 @@ static struct sh4_pci_address_map sh7780_pci_map = { .base = SH7780_CS3_BASE_ADDR, .size = 0x04000000, }, - - .flags = SH4_PCIC_NO_RESET, }; int __init pcibios_init_platform(void) diff --git a/arch/sh/drivers/pci/ops-rts7751r2d.c b/arch/sh/drivers/pci/ops-rts7751r2d.c index d374cd3..96b916c 100644 --- a/arch/sh/drivers/pci/ops-rts7751r2d.c +++ b/arch/sh/drivers/pci/ops-rts7751r2d.c @@ -56,13 +56,6 @@ static struct sh4_pci_address_map sh7751_pci_map = { .base = SH7751_CS3_BASE_ADDR, .size = 0x04000000, }, - - .window1 = { - .base = 0x00000000, /* Unused */ - .size = 0x00000000, /* Unused */ - }, - - .flags = SH4_PCIC_NO_RESET, }; int __init pcibios_init_platform(void) diff --git a/arch/sh/drivers/pci/ops-sdk7780.c b/arch/sh/drivers/pci/ops-sdk7780.c index b34fbc5..6a0b7c0 100644 --- a/arch/sh/drivers/pci/ops-sdk7780.c +++ b/arch/sh/drivers/pci/ops-sdk7780.c @@ -62,7 +62,6 @@ static struct sh4_pci_address_map sdk7780_pci_map = { .base = SH7780_CS3_BASE_ADDR, .size = 0x04000000, }, - .flags = SH4_PCIC_NO_RESET, }; int __init pcibios_init_platform(void) diff --git a/arch/sh/drivers/pci/ops-se7780.c b/arch/sh/drivers/pci/ops-se7780.c index 4730207..583b8e8 100644 --- a/arch/sh/drivers/pci/ops-se7780.c +++ b/arch/sh/drivers/pci/ops-se7780.c @@ -67,7 +67,6 @@ static struct sh4_pci_address_map se7780_pci_map = { .base = SH7780_CS2_BASE_ADDR, .size = 0x04000000, }, - .flags = SH4_PCIC_NO_RESET, }; int __init pcibios_init_platform(void) diff --git a/arch/sh/drivers/pci/ops-sh7785lcr.c b/arch/sh/drivers/pci/ops-sh7785lcr.c index afbb9bd..ab0d1de 100644 --- a/arch/sh/drivers/pci/ops-sh7785lcr.c +++ b/arch/sh/drivers/pci/ops-sh7785lcr.c @@ -55,8 +55,6 @@ static struct sh4_pci_address_map sh7785_pci_map = { .size = 0x20000000, #endif }, - - .flags = SH4_PCIC_NO_RESET, }; int __init pcibios_init_platform(void) diff --git a/arch/sh/drivers/pci/ops-snapgear.c b/arch/sh/drivers/pci/ops-snapgear.c index 2e254c6..dd2c5df 100644 --- a/arch/sh/drivers/pci/ops-snapgear.c +++ b/arch/sh/drivers/pci/ops-snapgear.c @@ -54,8 +54,6 @@ static struct sh4_pci_address_map sh7751_pci_map = { .base = SH7751_CS2_BASE_ADDR, .size = SNAPGEAR_LSR1_SIZE, }, - - .flags = SH4_PCIC_NO_RESET, }; /* diff --git a/arch/sh/drivers/pci/ops-titan.c b/arch/sh/drivers/pci/ops-titan.c index 31ed037..e45bb62 100644 --- a/arch/sh/drivers/pci/ops-titan.c +++ b/arch/sh/drivers/pci/ops-titan.c @@ -66,8 +66,6 @@ static struct sh4_pci_address_map sh7751_pci_map = { .base = SH7751_CS2_BASE_ADDR, .size = SH7751_MEM_REGION_SIZE*2, }, - - .flags = SH4_PCIC_NO_RESET, }; int __init pcibios_init_platform(void) diff --git a/arch/sh/drivers/pci/pci-sh4.h b/arch/sh/drivers/pci/pci-sh4.h index 90abfe3..3d5296c 100644 --- a/arch/sh/drivers/pci/pci-sh4.h +++ b/arch/sh/drivers/pci/pci-sh4.h @@ -149,9 +149,6 @@ #define SH4_PCIPDTR_PB0 0x000000001 /* Port 0 Enable */ #define SH4_PCIPDR 0x220 /* Port IO Data Register */ -/* Flags */ -#define SH4_PCIC_NO_RESET 0x0001 - /* arch/sh/kernel/drivers/pci/ops-sh4.c */ extern struct pci_ops sh4_pci_ops; int sh4_pci_check_direct(struct pci_channel *chan); @@ -165,7 +162,6 @@ struct sh4_pci_address_space { struct sh4_pci_address_map { struct sh4_pci_address_space window0; struct sh4_pci_address_space window1; - unsigned long flags; }; static inline void pci_write_reg(struct pci_channel *chan, diff --git a/arch/sh/drivers/pci/pci-sh7751.c b/arch/sh/drivers/pci/pci-sh7751.c index 2a6c7aa..af88744 100644 --- a/arch/sh/drivers/pci/pci-sh7751.c +++ b/arch/sh/drivers/pci/pci-sh7751.c @@ -99,21 +99,6 @@ int __init sh7751_pcic_init(struct pci_channel *chan, word = SH4_PCIPINT_D3 | SH4_PCIPINT_D0; pci_write_reg(chan, word, SH4_PCIPINT); - /* - * This code is unused for some boards as it is done in the - * bootloader and doing it here means the MAC addresses loaded - * by the bootloader get lost. - */ - if (!(map->flags & SH4_PCIC_NO_RESET)) { - /* toggle PCI reset pin */ - word = SH4_PCICR_PREFIX | SH4_PCICR_PRST; - pci_write_reg(chan, word, SH4_PCICR); - /* Wait for a long time... not 1 sec. but long enough */ - mdelay(100); - word = SH4_PCICR_PREFIX; - pci_write_reg(chan, word, SH4_PCICR); - } - /* set the command/status bits to: * Wait Cycle Control + Parity Enable + Bus Master + * Mem space enable diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c index 87a7f3b..282cabe 100644 --- a/arch/sh/drivers/pci/pci-sh7780.c +++ b/arch/sh/drivers/pci/pci-sh7780.c @@ -96,21 +96,6 @@ int __init sh7780_pcic_init(struct pci_channel *chan, { u32 word; - /* - * This code is unused for some boards as it is done in the - * bootloader and doing it here means the MAC addresses loaded - * by the bootloader get lost. - */ - if (!(map->flags & SH4_PCIC_NO_RESET)) { - /* toggle PCI reset pin */ - word = SH4_PCICR_PREFIX | SH4_PCICR_PRST; - pci_write_reg(chan, word, SH4_PCICR); - /* Wait for a long time... not 1 sec. but long enough */ - mdelay(100); - word = SH4_PCICR_PREFIX; - pci_write_reg(chan, word, SH4_PCICR); - } - /* set the command/status bits to: * Wait Cycle Control + Parity Enable + Bus Master + * Mem space enable -- cgit v0.10.2 From 3f1a4d826751d9759fc95da4e47d08d2745e0055 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 15 Apr 2009 21:35:26 +0100 Subject: ASoC: Check we have DAI ops when calling via accessor functions Also make sure we're checking for the right operation while we're here. Signed-off-by: Mark Brown diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index dd28009..9250392 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -2100,7 +2100,7 @@ EXPORT_SYMBOL_GPL(snd_soc_put_volsw_s8); int snd_soc_dai_set_sysclk(struct snd_soc_dai *dai, int clk_id, unsigned int freq, int dir) { - if (dai->ops->set_sysclk) + if (dai->ops && dai->ops->set_sysclk) return dai->ops->set_sysclk(dai, clk_id, freq, dir); else return -EINVAL; @@ -2120,7 +2120,7 @@ EXPORT_SYMBOL_GPL(snd_soc_dai_set_sysclk); int snd_soc_dai_set_clkdiv(struct snd_soc_dai *dai, int div_id, int div) { - if (dai->ops->set_clkdiv) + if (dai->ops && dai->ops->set_clkdiv) return dai->ops->set_clkdiv(dai, div_id, div); else return -EINVAL; @@ -2139,7 +2139,7 @@ EXPORT_SYMBOL_GPL(snd_soc_dai_set_clkdiv); int snd_soc_dai_set_pll(struct snd_soc_dai *dai, int pll_id, unsigned int freq_in, unsigned int freq_out) { - if (dai->ops->set_pll) + if (dai->ops && dai->ops->set_pll) return dai->ops->set_pll(dai, pll_id, freq_in, freq_out); else return -EINVAL; @@ -2155,7 +2155,7 @@ EXPORT_SYMBOL_GPL(snd_soc_dai_set_pll); */ int snd_soc_dai_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) { - if (dai->ops->set_fmt) + if (dai->ops && dai->ops->set_fmt) return dai->ops->set_fmt(dai, fmt); else return -EINVAL; @@ -2174,7 +2174,7 @@ EXPORT_SYMBOL_GPL(snd_soc_dai_set_fmt); int snd_soc_dai_set_tdm_slot(struct snd_soc_dai *dai, unsigned int mask, int slots) { - if (dai->ops->set_sysclk) + if (dai->ops && dai->ops->set_tdm_slot) return dai->ops->set_tdm_slot(dai, mask, slots); else return -EINVAL; @@ -2190,7 +2190,7 @@ EXPORT_SYMBOL_GPL(snd_soc_dai_set_tdm_slot); */ int snd_soc_dai_set_tristate(struct snd_soc_dai *dai, int tristate) { - if (dai->ops->set_sysclk) + if (dai->ops && dai->ops->set_tristate) return dai->ops->set_tristate(dai, tristate); else return -EINVAL; @@ -2206,7 +2206,7 @@ EXPORT_SYMBOL_GPL(snd_soc_dai_set_tristate); */ int snd_soc_dai_digital_mute(struct snd_soc_dai *dai, int mute) { - if (dai->ops->digital_mute) + if (dai->ops && dai->ops->digital_mute) return dai->ops->digital_mute(dai, mute); else return -EINVAL; -- cgit v0.10.2 From fd5dfad9cf51bc3575b5e50193403de4a3de23bc Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 15 Apr 2009 21:37:46 +0100 Subject: ASoC: Volume controls are never of boolean type Some limited volume controls (mostly simple attenuations) have only two settings so the ASoC info functions misreport them as booleans. Since we currently have no better information check for " Volume" in the control name and always report any controls matching as being integer. Signed-off-by: Mark Brown diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 9250392..af11791 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -1779,7 +1779,7 @@ int snd_soc_info_volsw_ext(struct snd_kcontrol *kcontrol, { int max = kcontrol->private_value; - if (max == 1) + if (max == 1 && !strstr(kcontrol->id.name, " Volume")) uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN; else uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; @@ -1809,7 +1809,7 @@ int snd_soc_info_volsw(struct snd_kcontrol *kcontrol, unsigned int shift = mc->shift; unsigned int rshift = mc->rshift; - if (max == 1) + if (max == 1 && !strstr(kcontrol->id.name, " Volume")) uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN; else uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; @@ -1916,7 +1916,7 @@ int snd_soc_info_volsw_2r(struct snd_kcontrol *kcontrol, (struct soc_mixer_control *)kcontrol->private_value; int max = mc->max; - if (max == 1) + if (max == 1 && !strstr(kcontrol->id.name, " Volume")) uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN; else uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; -- cgit v0.10.2 From 0d960e8891459f5af85e5781bce3f1da5f7db0fb Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 16 Apr 2009 10:08:39 +0100 Subject: ASoC: Request shared rates for WM8903 It has a shared LRCLK. Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c index 8cf571f..c539184 100644 --- a/sound/soc/codecs/wm8903.c +++ b/sound/soc/codecs/wm8903.c @@ -1523,6 +1523,7 @@ struct snd_soc_dai wm8903_dai = { .formats = WM8903_FORMATS, }, .ops = &wm8903_dai_ops, + .symmetric_rates = 1, }; EXPORT_SYMBOL_GPL(wm8903_dai); -- cgit v0.10.2 From c29b206ffd0700acb2dc1fdb70856cc4b907216c Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 15 Apr 2009 15:38:55 +0300 Subject: ASoC: OMAP: Use single-phase for DSP mode Use single-phase mode for the DSP mode and keep the dual phase mode for the I2S mode. The mono (1 channel) mode already used single phase mode, now it is more cleaner. There is no need to configure the second phase, when the single phase is used. Signed-off-by: Peter Ujfalusi Acked-by: Jarkko Nikula Signed-off-by: Mark Brown diff --git a/sound/soc/omap/omap-mcbsp.c b/sound/soc/omap/omap-mcbsp.c index 9c09b94..402a1eb 100644 --- a/sound/soc/omap/omap-mcbsp.c +++ b/sound/soc/omap/omap-mcbsp.c @@ -214,8 +214,9 @@ static int omap_mcbsp_dai_hw_params(struct snd_pcm_substream *substream, struct omap_mcbsp_data *mcbsp_data = to_mcbsp(cpu_dai->private_data); struct omap_mcbsp_reg_cfg *regs = &mcbsp_data->regs; int dma, bus_id = mcbsp_data->bus_id, id = cpu_dai->id; - int wlen, channels; + int wlen, channels, wpf; unsigned long port; + unsigned int format; if (cpu_class_is_omap1()) { dma = omap1_dma_reqs[bus_id][substream->stream]; @@ -243,18 +244,23 @@ static int omap_mcbsp_dai_hw_params(struct snd_pcm_substream *substream, return 0; } - channels = params_channels(params); + format = mcbsp_data->fmt & SND_SOC_DAIFMT_FORMAT_MASK; + wpf = channels = params_channels(params); switch (channels) { case 2: - /* Use dual-phase frames */ - regs->rcr2 |= RPHASE; - regs->xcr2 |= XPHASE; + if (format == SND_SOC_DAIFMT_I2S) { + /* Use dual-phase frames */ + regs->rcr2 |= RPHASE; + regs->xcr2 |= XPHASE; + /* Set 1 word per (McBSP) frame for phase1 and phase2 */ + wpf--; + regs->rcr2 |= RFRLEN2(wpf - 1); + regs->xcr2 |= XFRLEN2(wpf - 1); + } case 1: - /* Set 1 word per (McBSP) frame */ - regs->rcr2 |= RFRLEN2(1 - 1); - regs->rcr1 |= RFRLEN1(1 - 1); - regs->xcr2 |= XFRLEN2(1 - 1); - regs->xcr1 |= XFRLEN1(1 - 1); + /* Set word per (McBSP) frame for phase1 */ + regs->rcr1 |= RFRLEN1(wpf - 1); + regs->xcr1 |= XFRLEN1(wpf - 1); break; default: /* Unsupported number of channels */ @@ -276,9 +282,9 @@ static int omap_mcbsp_dai_hw_params(struct snd_pcm_substream *substream, } /* Set FS period and length in terms of bit clock periods */ - switch (mcbsp_data->fmt & SND_SOC_DAIFMT_FORMAT_MASK) { + switch (format) { case SND_SOC_DAIFMT_I2S: - regs->srgr2 |= FPER(wlen * 2 - 1); + regs->srgr2 |= FPER(wlen * channels - 1); regs->srgr1 |= FWID(wlen - 1); break; case SND_SOC_DAIFMT_DSP_B: -- cgit v0.10.2 From 3ba191ce051a32b20757f063120496e860ea8f9d Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 15 Apr 2009 15:38:56 +0300 Subject: ASoC: OMAP: Add DSP_A mode support for mcbsp DSP_A mode is similar to the DSP_B, but the MSB is delayed with one bclk (appears after the FS pulse and not under it). Signed-off-by: Peter Ujfalusi Acked-by: Jarkko Nikula Signed-off-by: Mark Brown diff --git a/sound/soc/omap/omap-mcbsp.c b/sound/soc/omap/omap-mcbsp.c index 402a1eb..2b4a8da 100644 --- a/sound/soc/omap/omap-mcbsp.c +++ b/sound/soc/omap/omap-mcbsp.c @@ -287,6 +287,7 @@ static int omap_mcbsp_dai_hw_params(struct snd_pcm_substream *substream, regs->srgr2 |= FPER(wlen * channels - 1); regs->srgr1 |= FWID(wlen - 1); break; + case SND_SOC_DAIFMT_DSP_A: case SND_SOC_DAIFMT_DSP_B: regs->srgr2 |= FPER(wlen * channels - 1); regs->srgr1 |= FWID(wlen * channels - 2); @@ -330,6 +331,13 @@ static int omap_mcbsp_dai_set_dai_fmt(struct snd_soc_dai *cpu_dai, regs->rcr2 |= RDATDLY(1); regs->xcr2 |= XDATDLY(1); break; + case SND_SOC_DAIFMT_DSP_A: + /* 1-bit data delay */ + regs->rcr2 |= RDATDLY(1); + regs->xcr2 |= XDATDLY(1); + /* Invert FS polarity configuration */ + temp_fmt ^= SND_SOC_DAIFMT_NB_IF; + break; case SND_SOC_DAIFMT_DSP_B: /* 0-bit data delay */ regs->rcr2 |= RDATDLY(0); -- cgit v0.10.2 From d1f0ae5e2e45e74cff4c3bdefb0fc77608cdfeec Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Wed, 15 Apr 2009 21:34:55 +0200 Subject: x86: standardize Kbuild rules Introducing this Kbuild file allow us to: make arch/x86/ And thus building all the core part of x86. Signed-off-by: Sam Ravnborg Cc: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild new file mode 100644 index 0000000..ad8ec35 --- /dev/null +++ b/arch/x86/Kbuild @@ -0,0 +1,16 @@ + +obj-$(CONFIG_KVM) += kvm/ + +# Xen paravirtualization support +obj-$(CONFIG_XEN) += xen/ + +# lguest paravirtualization support +obj-$(CONFIG_LGUEST_GUEST) += lguest/ + +obj-y += kernel/ +obj-y += mm/ + +obj-y += crypto/ +obj-y += vdso/ +obj-$(CONFIG_IA32_EMULATION) += ia32/ + diff --git a/arch/x86/Makefile b/arch/x86/Makefile index f05d8c9..e81f0b2 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -7,8 +7,6 @@ else KBUILD_DEFCONFIG := $(ARCH)_defconfig endif -core-$(CONFIG_KVM) += arch/x86/kvm/ - # BITS is used as extension for files which are available in a 32 bit # and a 64 bit version to simplify shared Makefiles. # e.g.: obj-y += foo_$(BITS).o @@ -118,21 +116,8 @@ head-y += arch/x86/kernel/init_task.o libs-y += arch/x86/lib/ -# Sub architecture files that needs linking first -core-y += $(fcore-y) - -# Xen paravirtualization support -core-$(CONFIG_XEN) += arch/x86/xen/ - -# lguest paravirtualization support -core-$(CONFIG_LGUEST_GUEST) += arch/x86/lguest/ - -core-y += arch/x86/kernel/ -core-y += arch/x86/mm/ - -core-y += arch/x86/crypto/ -core-y += arch/x86/vdso/ -core-$(CONFIG_IA32_EMULATION) += arch/x86/ia32/ +# See arch/x86/Kbuild for content of core part of the kernel +core-y += arch/x86/ # drivers-y are linked after core-y drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/ -- cgit v0.10.2 From 84971bb401866d79c6b353cb1d8861c2b4621867 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Fri, 17 Apr 2009 12:44:25 +0900 Subject: sh: pci: Kill off useless debugging printk() in pci-sh7780 init. Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c index 282cabe..b826c7b 100644 --- a/arch/sh/drivers/pci/pci-sh7780.c +++ b/arch/sh/drivers/pci/pci-sh7780.c @@ -118,18 +118,6 @@ int __init sh7780_pcic_init(struct pci_channel *chan, pci_write_reg(chan, map->window1.base, SH4_PCILAR1); pci_write_reg(chan, map->window1.base, SH7780_PCIMBAR1); - /* Map IO space into PCI IO window: - * IO addresses will be translated to the PCI IO window base address - */ - pr_debug("PCI: Mapping IO address 0x%x - 0x%x to base 0x%lx\n", - chan->io_resource->start, chan->io_resource->end, - chan->io_base + chan->io_resource->start); - - /* NOTE: I'm ignoring the PCI error IRQs for now.. - * TODO: add support for the internal error interrupts and - * DMA interrupts... - */ - /* Apply any last-minute PCIC fixups */ pci_fixup_pcic(chan); -- cgit v0.10.2 From b627b4ed3d056c5d00e8f3cb32d033b0ee6619a9 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Fri, 17 Apr 2009 13:00:18 +0900 Subject: sh: pci: Move se7780 INTC fixups out of pci-sh7780.c. These fixups belong in the board INTC setup code, not in the middle of pci-sh7780.c. Signed-off-by: Paul Mundt diff --git a/arch/sh/boards/mach-se/7780/irq.c b/arch/sh/boards/mach-se/7780/irq.c index 66ad292..44b61a5 100644 --- a/arch/sh/boards/mach-se/7780/irq.c +++ b/arch/sh/boards/mach-se/7780/irq.c @@ -12,10 +12,13 @@ #include #include #include -#include -#include +#include +#include #include +#define INTC_BASE 0xffd00000 +#define INTC_ICR1 (INTC_BASE+0x1c) + /* * Initialize IRQ setting */ @@ -43,4 +46,7 @@ void __init init_se7780_IRQ(void) ctrl_outw((IRQPIN_PCCPW << IRQPOS_PCCPW), FPGA_INTSEL3); plat_irq_setup_pins(IRQ_MODE_IRQ); /* install handlers for IRQ0-7 */ + + /* ICR1: detect low level(for 2ndcut) */ + ctrl_outl(0xAAAA0000, INTC_ICR1); } diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c index b826c7b..45fa423 100644 --- a/arch/sh/drivers/pci/pci-sh7780.c +++ b/arch/sh/drivers/pci/pci-sh7780.c @@ -22,20 +22,6 @@ #include #include "pci-sh4.h" -#define INTC_BASE 0xffd00000 -#define INTC_ICR0 (INTC_BASE+0x0) -#define INTC_ICR1 (INTC_BASE+0x1c) -#define INTC_INTPRI (INTC_BASE+0x10) -#define INTC_INTREQ (INTC_BASE+0x24) -#define INTC_INTMSK0 (INTC_BASE+0x44) -#define INTC_INTMSK1 (INTC_BASE+0x48) -#define INTC_INTMSK2 (INTC_BASE+0x40080) -#define INTC_INTMSKCLR0 (INTC_BASE+0x64) -#define INTC_INTMSKCLR1 (INTC_BASE+0x68) -#define INTC_INTMSKCLR2 (INTC_BASE+0x40084) -#define INTC_INT2MSKR (INTC_BASE+0x40038) -#define INTC_INT2MSKCR (INTC_BASE+0x4003c) - /* * Initialization. Try all known PCI access methods. Note that we support * using both PCI BIOS and direct access: in such cases, we use I/O ports @@ -75,16 +61,6 @@ int __init sh7780_pci_init(struct pci_channel *chan) return -ENODEV; } - /* Setup the INTC */ - if (mach_is_7780se()) { - /* ICR0: IRL=use separately */ - ctrl_outl(0x00C00020, INTC_ICR0); - /* ICR1: detect low level(for 2ndcut) */ - ctrl_outl(0xAAAA0000, INTC_ICR1); - /* INTPRI: priority=3(all) */ - ctrl_outl(0x33333333, INTC_INTPRI); - } - if ((ret = sh4_pci_check_direct(chan)) != 0) return ret; -- cgit v0.10.2 From 7e4ba0d77c96d328ba968ddff4a464d4d2fa7abc Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Fri, 17 Apr 2009 14:07:57 +0900 Subject: sh: pci: Prefer P1SEG over P1SEGADDR for CONFIG_CMD. P1SEGADDR is obsolete and will be killed off completely in the future, so transition off of it and reference P1SEG explicitly. Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/ops-sh4.c b/arch/sh/drivers/pci/ops-sh4.c index 540683d..2a7f7b5 100644 --- a/arch/sh/drivers/pci/ops-sh4.c +++ b/arch/sh/drivers/pci/ops-sh4.c @@ -1,22 +1,22 @@ /* * Generic SH-4 / SH-4A PCIC operations (SH7751, SH7780). * - * Copyright (C) 2002 - 2006 Paul Mundt + * Copyright (C) 2002 - 2009 Paul Mundt * * This file is subject to the terms and conditions of the GNU General Public * License v2. See the file "COPYING" in the main directory of this archive * for more details. */ #include +#include #include -#include #include "pci-sh4.h" /* * Direct access to PCI hardware... */ #define CONFIG_CMD(bus, devfn, where) \ - P1SEGADDR((bus->number << 16) | (devfn << 8) | (where & ~3)) + (P1SEG | (bus->number << 16) | (devfn << 8) | (where & ~3)) static DEFINE_SPINLOCK(sh4_pci_lock); -- cgit v0.10.2 From 0bbc9bc3189f24de946777af43c9033c8c4871e4 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Fri, 17 Apr 2009 14:09:09 +0900 Subject: sh: pci: Set class/sub-class code correctly for SH7780 PCIC. The SH7780 PCI host controller implements a configuration header that requires a fair bit of hand-holding to initialize properly. By default it appears as a pre-2.0 host controller given the zeroed out class code, so fix this up properly. Some boards that happened to be using the R7780RP version of the PCIC fixups had set this correctly, but this belongs in the standard initialization, and is by no means board specific. Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/fixups-sdk7780.c b/arch/sh/drivers/pci/fixups-sdk7780.c index c295731..004efd4 100644 --- a/arch/sh/drivers/pci/fixups-sdk7780.c +++ b/arch/sh/drivers/pci/fixups-sdk7780.c @@ -16,8 +16,6 @@ int pci_fixup_pcic(struct pci_channel *chan) { - ctrl_outl(0x00000001, SH7780_PCI_VCR2); - /* Enable all interrupts, so we know what to fix */ pci_write_reg(chan, 0x0000C3FF, SH7780_PCIIMR); pci_write_reg(chan, 0x0000380F, SH7780_PCIAINTM); @@ -26,8 +24,6 @@ int pci_fixup_pcic(struct pci_channel *chan) pci_write_reg(chan, 0xFB00, SH7780_PCISTATUS); pci_write_reg(chan, 0x0047, SH7780_PCICMD); pci_write_reg(chan, 0x00, SH7780_PCIPIF); - pci_write_reg(chan, 0x00, SH7780_PCISUB); - pci_write_reg(chan, 0x06, SH7780_PCIBCC); pci_write_reg(chan, 0x1912, SH7780_PCISVID); pci_write_reg(chan, 0x0001, SH7780_PCISID); diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c index 45fa423..7f4f590 100644 --- a/arch/sh/drivers/pci/pci-sh7780.c +++ b/arch/sh/drivers/pci/pci-sh7780.c @@ -72,16 +72,15 @@ int __init sh7780_pcic_init(struct pci_channel *chan, { u32 word; + pci_write_reg(chan, PCI_CLASS_BRIDGE_HOST >> 8, SH7780_PCIBCC); + pci_write_reg(chan, PCI_CLASS_BRIDGE_HOST & 0xff, SH7780_PCISUB); + /* set the command/status bits to: * Wait Cycle Control + Parity Enable + Bus Master + * Mem space enable */ pci_write_reg(chan, 0x00000046, SH7780_PCICMD); - /* define this host as the host bridge */ - word = PCI_BASE_CLASS_BRIDGE << 24; - pci_write_reg(chan, word, SH7780_PCIRID); - /* Set IO and Mem windows to local address * Make PCI and local address the same for easy 1 to 1 mapping */ -- cgit v0.10.2 From 4e7b7fdb129995640f144b7de114e109c6b46a2a Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Fri, 17 Apr 2009 15:05:19 +0900 Subject: sh: pci: Rework SH7780 host controller detection. This reworks how the host controller is probed, and makes it a bit more verbose in the event a new type of controller is detected. Additionally, we also log the revision information. This now uses the proper access sizes for the vendor/device registers, rather than relying on a larger access that encapsulated both of them. Not all devices support 32-bit read cycles for these registers. Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c index 7f4f590..63b5151 100644 --- a/arch/sh/drivers/pci/pci-sh7780.c +++ b/arch/sh/drivers/pci/pci-sh7780.c @@ -34,33 +34,39 @@ int __init sh7780_pci_init(struct pci_channel *chan) { unsigned int id; - int ret, match = 0; + const char *type = NULL; + int ret; - pr_debug("PCI: Starting intialization.\n"); + printk(KERN_NOTICE "PCI: Starting intialization.\n"); chan->reg_base = 0xfe040000; chan->io_base = 0xfe200000; - ctrl_outl(0x00000001, SH7780_PCI_VCR2); /* Enable PCIC */ - - /* check for SH7780/SH7780R hardware */ - id = pci_read_reg(chan, SH7780_PCIVID); - if ((id & 0xffff) == SH7780_VENDOR_ID) { - switch ((id >> 16) & 0xffff) { - case SH7763_DEVICE_ID: - case SH7780_DEVICE_ID: - case SH7781_DEVICE_ID: - case SH7785_DEVICE_ID: - match = 1; - break; - } - } + /* Enable CPU access to the PCIC registers. */ + __raw_writel(PCIECR_ENBL, PCIECR); - if (unlikely(!match)) { - printk(KERN_ERR "PCI: This is not an SH7780 (%x)\n", id); + id = __raw_readw(chan->reg_base + SH7780_PCIVID); + if (id != SH7780_VENDOR_ID) { + printk(KERN_ERR "PCI: Unknown vendor ID 0x%04x.\n", id); return -ENODEV; } + id = __raw_readw(chan->reg_base + SH7780_PCIDID); + type = (id == SH7763_DEVICE_ID) ? "SH7763" : + (id == SH7780_DEVICE_ID) ? "SH7780" : + (id == SH7781_DEVICE_ID) ? "SH7781" : + (id == SH7785_DEVICE_ID) ? "SH7785" : + NULL; + if (unlikely(!type)) { + printk(KERN_ERR "PCI: Found an unsupported Renesas host " + "controller, device id 0x%04x.\n", id); + return -EINVAL; + } + + printk(KERN_NOTICE "PCI: Found a Renesas %s host " + "controller, revision %d.\n", type, + __raw_readb(chan->reg_base + SH7780_PCIRID)); + if ((ret = sh4_pci_check_direct(chan)) != 0) return ret; diff --git a/arch/sh/drivers/pci/pci-sh7780.h b/arch/sh/drivers/pci/pci-sh7780.h index fffcf1d..213f1d8 100644 --- a/arch/sh/drivers/pci/pci-sh7780.h +++ b/arch/sh/drivers/pci/pci-sh7780.h @@ -20,9 +20,8 @@ #define SH7785_DEVICE_ID 0x0007 /* SH7780 Control Registers */ -#define SH7780_PCI_VCR0 0xFE000000 -#define SH7780_PCI_VCR1 0xFE000004 -#define SH7780_PCI_VCR2 0xFE000008 +#define PCIECR 0xFE000008 +#define PCIECR_ENBL 0x01 /* SH7780 Specific Values */ #define SH7780_PCI_CONFIG_BASE 0xFD000000 /* Config space base addr */ -- cgit v0.10.2 From ab78cbcf6877334fc20868b7df7887349e2e01c8 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Fri, 17 Apr 2009 15:08:01 +0900 Subject: sh: pci: Use the proper write size for class/sub-class code. Don't use pci_write_reg() for these, as it defaults to 32-bit. Rather than using the helper, use __raw_writeb() directly. Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c index 63b5151..19bac21 100644 --- a/arch/sh/drivers/pci/pci-sh7780.c +++ b/arch/sh/drivers/pci/pci-sh7780.c @@ -78,8 +78,10 @@ int __init sh7780_pcic_init(struct pci_channel *chan, { u32 word; - pci_write_reg(chan, PCI_CLASS_BRIDGE_HOST >> 8, SH7780_PCIBCC); - pci_write_reg(chan, PCI_CLASS_BRIDGE_HOST & 0xff, SH7780_PCISUB); + __raw_writeb(PCI_CLASS_BRIDGE_HOST >> 8, + chan->reg_base + SH7780_PCIBCC); + __raw_writeb(PCI_CLASS_BRIDGE_HOST & 0xff, + chan->reg_base + SH7780_PCISUB); /* set the command/status bits to: * Wait Cycle Control + Parity Enable + Bus Master + -- cgit v0.10.2 From c66c1d79a94a7a302e2dc6c93da40902423eac3e Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Fri, 17 Apr 2009 16:38:00 +0900 Subject: sh: pci: Set pci_cache_line_size on SH7780 via the PCICLS register. The SH7780 PCIC contains a read-only cache line size register that we can derive pci_cache_line_size from. So, make sure that the software idea of the cache line size actually matches the host controller's idea. Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c index 19bac21..fa73b0d 100644 --- a/arch/sh/drivers/pci/pci-sh7780.c +++ b/arch/sh/drivers/pci/pci-sh7780.c @@ -22,15 +22,6 @@ #include #include "pci-sh4.h" -/* - * Initialization. Try all known PCI access methods. Note that we support - * using both PCI BIOS and direct access: in such cases, we use I/O ports - * to access config space. - * - * Note that the platform specific initialization (BSC registers, and memory - * space mapping) will be called via the platform defined function - * pcibios_init_platform(). - */ int __init sh7780_pci_init(struct pci_channel *chan) { unsigned int id; @@ -70,19 +61,31 @@ int __init sh7780_pci_init(struct pci_channel *chan) if ((ret = sh4_pci_check_direct(chan)) != 0) return ret; + /* + * Platform specific initialization (BSC registers, and memory space + * mapping) will be called via the platform defined function + * pcibios_init_platform(). + */ return pcibios_init_platform(); } +extern u8 pci_cache_line_size; + int __init sh7780_pcic_init(struct pci_channel *chan, struct sh4_pci_address_map *map) { u32 word; + /* + * Set the class and sub-class codes. + */ __raw_writeb(PCI_CLASS_BRIDGE_HOST >> 8, chan->reg_base + SH7780_PCIBCC); __raw_writeb(PCI_CLASS_BRIDGE_HOST & 0xff, chan->reg_base + SH7780_PCISUB); + pci_cache_line_size = pci_read_reg(chan, SH7780_PCICLS) / 4; + /* set the command/status bits to: * Wait Cycle Control + Parity Enable + Bus Master + * Mem space enable -- cgit v0.10.2 From f1dcab756687622b658154ded1657538984edcdb Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Fri, 17 Apr 2009 17:00:27 +0900 Subject: sh: pci: Set the I/O port base to the SH7780 I/O window default. Presently the I/O port base isn't being set anywhere, which allows things like generic_inl() to blow up. Fix this up to point at the PCI IO window. Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c index fa73b0d..207b720 100644 --- a/arch/sh/drivers/pci/pci-sh7780.c +++ b/arch/sh/drivers/pci/pci-sh7780.c @@ -112,5 +112,7 @@ int __init sh7780_pcic_init(struct pci_channel *chan, word = SH4_PCICR_PREFIX | SH4_PCICR_CFIN | SH4_PCICR_FTO; pci_write_reg(chan, word, SH4_PCICR); + __set_io_port_base(SH7780_PCI_IO_BASE); + return 0; } -- cgit v0.10.2 From ab1363a8929f32cc163cd3f50ca72f20d901b00c Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Fri, 17 Apr 2009 17:07:47 +0900 Subject: sh: pci: Consolidate PCI I/O and mem window definitions for SH7780. This consolidates all of the PCI I/O and memory window definitions across the pci-sh7780 users in pci-sh7780 itself. No functional changes, in that every platform had exactly the same implementation. Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/ops-r7780rp.c b/arch/sh/drivers/pci/ops-r7780rp.c index 8ec6d22..044525d 100644 --- a/arch/sh/drivers/pci/ops-r7780rp.c +++ b/arch/sh/drivers/pci/ops-r7780rp.c @@ -26,27 +26,6 @@ int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin) return irq_tab[slot]; } -static struct resource sh7780_io_resource = { - .name = "SH7780_IO", - .start = SH7780_PCI_IO_BASE, - .end = SH7780_PCI_IO_BASE + SH7780_PCI_IO_SIZE - 1, - .flags = IORESOURCE_IO -}; - -static struct resource sh7780_mem_resource = { - .name = "SH7780_mem", - .start = SH7780_PCI_MEMORY_BASE, - .end = SH7780_PCI_MEMORY_BASE + SH7780_PCI_MEM_SIZE - 1, - .flags = IORESOURCE_MEM -}; - -extern struct pci_ops sh7780_pci_ops; - -struct pci_channel board_pci_channels[] = { - { sh7780_pci_init, &sh4_pci_ops, &sh7780_io_resource, &sh7780_mem_resource, 0, 0xff }, - { NULL, NULL, NULL, 0, 0 }, -}; - static struct sh4_pci_address_map sh7780_pci_map = { .window0 = { .base = SH7780_CS2_BASE_ADDR, @@ -61,5 +40,5 @@ static struct sh4_pci_address_map sh7780_pci_map = { int __init pcibios_init_platform(void) { - return sh7780_pcic_init(&board_pci_channels[0], &sh7780_pci_map); + return sh7780_pcic_init(&sh7780_pci_map); } diff --git a/arch/sh/drivers/pci/ops-sdk7780.c b/arch/sh/drivers/pci/ops-sdk7780.c index 6a0b7c0..570fd71 100644 --- a/arch/sh/drivers/pci/ops-sdk7780.c +++ b/arch/sh/drivers/pci/ops-sdk7780.c @@ -34,25 +34,6 @@ int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin) return sdk7780_irq_tab[pin-1][slot]; } -static struct resource sdk7780_io_resource = { - .name = "SH7780_IO", - .start = SH7780_PCI_IO_BASE, - .end = SH7780_PCI_IO_BASE + SH7780_PCI_IO_SIZE - 1, - .flags = IORESOURCE_IO -}; - -static struct resource sdk7780_mem_resource = { - .name = "SH7780_mem", - .start = SH7780_PCI_MEMORY_BASE, - .end = SH7780_PCI_MEMORY_BASE + SH7780_PCI_MEM_SIZE - 1, - .flags = IORESOURCE_MEM -}; - -struct pci_channel board_pci_channels[] = { - { sh7780_pci_init, &sh4_pci_ops, &sdk7780_io_resource, &sdk7780_mem_resource, 0, 0xff }, - { NULL, NULL, NULL, 0, 0 }, -}; - static struct sh4_pci_address_map sdk7780_pci_map = { .window0 = { .base = SH7780_CS2_BASE_ADDR, @@ -67,5 +48,5 @@ static struct sh4_pci_address_map sdk7780_pci_map = { int __init pcibios_init_platform(void) { printk(KERN_INFO "SH7780 PCI: Finished initializing PCI controller\n"); - return sh7780_pcic_init(&board_pci_channels[0], &sdk7780_pci_map); + return sh7780_pcic_init(&sdk7780_pci_map); } diff --git a/arch/sh/drivers/pci/ops-se7780.c b/arch/sh/drivers/pci/ops-se7780.c index 583b8e8..8b09415 100644 --- a/arch/sh/drivers/pci/ops-se7780.c +++ b/arch/sh/drivers/pci/ops-se7780.c @@ -41,27 +41,6 @@ int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin) return se7780_irq_tab[pin-1][slot]; } -static struct resource se7780_io_resource = { - .name = "SH7780_IO", - .start = SH7780_PCI_IO_BASE, - .end = SH7780_PCI_IO_BASE + SH7780_PCI_IO_SIZE - 1, - .flags = IORESOURCE_IO -}; - -static struct resource se7780_mem_resource = { - .name = "SH7780_mem", - .start = SH7780_PCI_MEMORY_BASE, - .end = SH7780_PCI_MEMORY_BASE + SH7780_PCI_MEM_SIZE - 1, - .flags = IORESOURCE_MEM -}; - -extern struct pci_ops se7780_pci_ops; - -struct pci_channel board_pci_channels[] = { - { sh7780_pci_init, &sh4_pci_ops, &se7780_io_resource, &se7780_mem_resource, 0, 0xff }, - { NULL, NULL, NULL, 0, 0 }, -}; - static struct sh4_pci_address_map se7780_pci_map = { .window0 = { .base = SH7780_CS2_BASE_ADDR, @@ -90,5 +69,5 @@ int __init pcibios_init_platform(void) ctrl_outw(0x0013, FPGA_PCI_INTSEL1); ctrl_outw(0xE402, FPGA_PCI_INTSEL2); - return sh7780_pcic_init(&board_pci_channels[0], &se7780_pci_map); + return sh7780_pcic_init(&se7780_pci_map); } diff --git a/arch/sh/drivers/pci/ops-sh7785lcr.c b/arch/sh/drivers/pci/ops-sh7785lcr.c index ab0d1de..bac46b5 100644 --- a/arch/sh/drivers/pci/ops-sh7785lcr.c +++ b/arch/sh/drivers/pci/ops-sh7785lcr.c @@ -26,25 +26,6 @@ int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin) return irq_tab[slot]; } -static struct resource sh7785_io_resource = { - .name = "SH7785_IO", - .start = SH7780_PCI_IO_BASE, - .end = SH7780_PCI_IO_BASE + SH7780_PCI_IO_SIZE - 1, - .flags = IORESOURCE_IO -}; - -static struct resource sh7785_mem_resource = { - .name = "SH7785_mem", - .start = SH7780_PCI_MEMORY_BASE, - .end = SH7780_PCI_MEMORY_BASE + SH7780_PCI_MEM_SIZE - 1, - .flags = IORESOURCE_MEM -}; - -struct pci_channel board_pci_channels[] = { - { sh7780_pci_init, &sh4_pci_ops, &sh7785_io_resource, &sh7785_mem_resource, 0, 0xff }, - { NULL, NULL, NULL, 0, 0 }, -}; - static struct sh4_pci_address_map sh7785_pci_map = { .window0 = { #if defined(CONFIG_32BIT) @@ -59,5 +40,5 @@ static struct sh4_pci_address_map sh7785_pci_map = { int __init pcibios_init_platform(void) { - return sh7780_pcic_init(&board_pci_channels[0], &sh7785_pci_map); + return sh7780_pcic_init(&sh7785_pci_map); } diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c index 207b720..eb217dd 100644 --- a/arch/sh/drivers/pci/pci-sh7780.c +++ b/arch/sh/drivers/pci/pci-sh7780.c @@ -22,7 +22,7 @@ #include #include "pci-sh4.h" -int __init sh7780_pci_init(struct pci_channel *chan) +static int __init sh7780_pci_init(struct pci_channel *chan) { unsigned int id; const char *type = NULL; @@ -71,9 +71,28 @@ int __init sh7780_pci_init(struct pci_channel *chan) extern u8 pci_cache_line_size; -int __init sh7780_pcic_init(struct pci_channel *chan, - struct sh4_pci_address_map *map) +static struct resource sh7785_io_resource = { + .name = "SH7785_IO", + .start = SH7780_PCI_IO_BASE, + .end = SH7780_PCI_IO_BASE + SH7780_PCI_IO_SIZE - 1, + .flags = IORESOURCE_IO +}; + +static struct resource sh7785_mem_resource = { + .name = "SH7785_mem", + .start = SH7780_PCI_MEMORY_BASE, + .end = SH7780_PCI_MEMORY_BASE + SH7780_PCI_MEM_SIZE - 1, + .flags = IORESOURCE_MEM +}; + +struct pci_channel board_pci_channels[] = { + { sh7780_pci_init, &sh4_pci_ops, &sh7785_io_resource, &sh7785_mem_resource, 0, 0xff }, + { NULL, NULL, NULL, 0, 0 }, +}; + +int __init sh7780_pcic_init(struct sh4_pci_address_map *map) { + struct pci_channel *chan = &board_pci_channels[0]; u32 word; /* diff --git a/arch/sh/drivers/pci/pci-sh7780.h b/arch/sh/drivers/pci/pci-sh7780.h index 213f1d8..7a4f8a8 100644 --- a/arch/sh/drivers/pci/pci-sh7780.h +++ b/arch/sh/drivers/pci/pci-sh7780.h @@ -107,8 +107,6 @@ struct sh4_pci_address_map; /* arch/sh/drivers/pci/pci-sh7780.c */ -int sh7780_pci_init(struct pci_channel *chan); -int sh7780_pcic_init(struct pci_channel *chan, - struct sh4_pci_address_map *map); +int sh7780_pcic_init(struct sh4_pci_address_map *map); #endif /* _PCI_SH7780_H_ */ -- cgit v0.10.2 From 4c7a47de897e89c25a40e228ac5319cbac7257fe Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Fri, 17 Apr 2009 17:21:36 +0900 Subject: sh: pci: Kill off platform-specific multi-window mappings. Commit 68b42d1b548be1840aff7122fdebeb804daf0fa3 ("sh: sh7785lcr: Map whole PCI address space.") changed around the semantics of how various chip-selects are made accessible to PCI. Now that there is a single large mapping covering from CS0-CS6, there is no longer any need to do multi-window mapping. Subsequently, all of the differing implementations can be consolidated in to pci-sh7780. Signed-off-by: Paul Mundt diff --git a/arch/sh/boards/mach-se/7780/irq.c b/arch/sh/boards/mach-se/7780/irq.c index 44b61a5..b8d43b6 100644 --- a/arch/sh/boards/mach-se/7780/irq.c +++ b/arch/sh/boards/mach-se/7780/irq.c @@ -49,4 +49,21 @@ void __init init_se7780_IRQ(void) /* ICR1: detect low level(for 2ndcut) */ ctrl_outl(0xAAAA0000, INTC_ICR1); + + /* + * FPGA PCISEL register initialize + * + * CPU || SLOT1 | SLOT2 | S-ATA | USB + * ------------------------------------- + * INTA || INTA | INTD | -- | INTB + * ------------------------------------- + * INTB || INTB | INTA | -- | INTC + * ------------------------------------- + * INTC || INTC | INTB | INTA | -- + * ------------------------------------- + * INTD || INTD | INTC | -- | INTA + * ------------------------------------- + */ + ctrl_outw(0x0013, FPGA_PCI_INTSEL1); + ctrl_outw(0xE402, FPGA_PCI_INTSEL2); } diff --git a/arch/sh/drivers/pci/ops-r7780rp.c b/arch/sh/drivers/pci/ops-r7780rp.c index 044525d..4ea136e 100644 --- a/arch/sh/drivers/pci/ops-r7780rp.c +++ b/arch/sh/drivers/pci/ops-r7780rp.c @@ -25,20 +25,3 @@ int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin) { return irq_tab[slot]; } - -static struct sh4_pci_address_map sh7780_pci_map = { - .window0 = { - .base = SH7780_CS2_BASE_ADDR, - .size = 0x04000000, - }, - - .window1 = { - .base = SH7780_CS3_BASE_ADDR, - .size = 0x04000000, - }, -}; - -int __init pcibios_init_platform(void) -{ - return sh7780_pcic_init(&sh7780_pci_map); -} diff --git a/arch/sh/drivers/pci/ops-sdk7780.c b/arch/sh/drivers/pci/ops-sdk7780.c index 570fd71..1d9a91b 100644 --- a/arch/sh/drivers/pci/ops-sdk7780.c +++ b/arch/sh/drivers/pci/ops-sdk7780.c @@ -33,20 +33,3 @@ int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin) { return sdk7780_irq_tab[pin-1][slot]; } - -static struct sh4_pci_address_map sdk7780_pci_map = { - .window0 = { - .base = SH7780_CS2_BASE_ADDR, - .size = 0x04000000, - }, - .window1 = { - .base = SH7780_CS3_BASE_ADDR, - .size = 0x04000000, - }, -}; - -int __init pcibios_init_platform(void) -{ - printk(KERN_INFO "SH7780 PCI: Finished initializing PCI controller\n"); - return sh7780_pcic_init(&sdk7780_pci_map); -} diff --git a/arch/sh/drivers/pci/ops-se7780.c b/arch/sh/drivers/pci/ops-se7780.c index 8b09415..6c088cc 100644 --- a/arch/sh/drivers/pci/ops-se7780.c +++ b/arch/sh/drivers/pci/ops-se7780.c @@ -40,34 +40,3 @@ int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin) { return se7780_irq_tab[pin-1][slot]; } - -static struct sh4_pci_address_map se7780_pci_map = { - .window0 = { - .base = SH7780_CS2_BASE_ADDR, - .size = 0x04000000, - }, -}; - -int __init pcibios_init_platform(void) -{ - printk("SH7780 PCI: Finished initialization of the PCI controller\n"); - - /* - * FPGA PCISEL register initialize - * - * CPU || SLOT1 | SLOT2 | S-ATA | USB - * ------------------------------------- - * INTA || INTA | INTD | -- | INTB - * ------------------------------------- - * INTB || INTB | INTA | -- | INTC - * ------------------------------------- - * INTC || INTC | INTB | INTA | -- - * ------------------------------------- - * INTD || INTD | INTC | -- | INTA - * ------------------------------------- - */ - ctrl_outw(0x0013, FPGA_PCI_INTSEL1); - ctrl_outw(0xE402, FPGA_PCI_INTSEL2); - - return sh7780_pcic_init(&se7780_pci_map); -} diff --git a/arch/sh/drivers/pci/ops-sh7785lcr.c b/arch/sh/drivers/pci/ops-sh7785lcr.c index bac46b5..0fe423e 100644 --- a/arch/sh/drivers/pci/ops-sh7785lcr.c +++ b/arch/sh/drivers/pci/ops-sh7785lcr.c @@ -25,20 +25,3 @@ int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin) { return irq_tab[slot]; } - -static struct sh4_pci_address_map sh7785_pci_map = { - .window0 = { -#if defined(CONFIG_32BIT) - .base = SH7780_32BIT_DDR_BASE_ADDR, - .size = 0x40000000, -#else - .base = SH7780_CS0_BASE_ADDR, - .size = 0x20000000, -#endif - }, -}; - -int __init pcibios_init_platform(void) -{ - return sh7780_pcic_init(&sh7785_pci_map); -} diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c index eb217dd..07c5529 100644 --- a/arch/sh/drivers/pci/pci-sh7780.c +++ b/arch/sh/drivers/pci/pci-sh7780.c @@ -90,7 +90,19 @@ struct pci_channel board_pci_channels[] = { { NULL, NULL, NULL, 0, 0 }, }; -int __init sh7780_pcic_init(struct sh4_pci_address_map *map) +static struct sh4_pci_address_map sh7780_pci_map = { + .window0 = { +#if defined(CONFIG_32BIT) + .base = SH7780_32BIT_DDR_BASE_ADDR, + .size = 0x40000000, +#else + .base = SH7780_CS0_BASE_ADDR, + .size = 0x20000000, +#endif + }, +}; + +int __init pcibios_init_platform(void) { struct pci_channel *chan = &board_pci_channels[0]; u32 word; @@ -114,14 +126,10 @@ int __init sh7780_pcic_init(struct sh4_pci_address_map *map) /* Set IO and Mem windows to local address * Make PCI and local address the same for easy 1 to 1 mapping */ - pci_write_reg(chan, map->window0.size - 0xfffff, SH4_PCILSR0); - pci_write_reg(chan, map->window1.size - 0xfffff, SH4_PCILSR1); + pci_write_reg(chan, sh7780_pci_map.window0.size - 0xfffff, SH4_PCILSR0); /* Set the values on window 0 PCI config registers */ - pci_write_reg(chan, map->window0.base, SH4_PCILAR0); - pci_write_reg(chan, map->window0.base, SH7780_PCIMBAR0); - /* Set the values on window 1 PCI config registers */ - pci_write_reg(chan, map->window1.base, SH4_PCILAR1); - pci_write_reg(chan, map->window1.base, SH7780_PCIMBAR1); + pci_write_reg(chan, sh7780_pci_map.window0.base, SH4_PCILAR0); + pci_write_reg(chan, sh7780_pci_map.window0.base, SH7780_PCIMBAR0); /* Apply any last-minute PCIC fixups */ pci_fixup_pcic(chan); diff --git a/arch/sh/drivers/pci/pci-sh7780.h b/arch/sh/drivers/pci/pci-sh7780.h index 7a4f8a8..4b65d4b 100644 --- a/arch/sh/drivers/pci/pci-sh7780.h +++ b/arch/sh/drivers/pci/pci-sh7780.h @@ -104,9 +104,4 @@ #define SH7780_32BIT_DDR_BASE_ADDR 0x40000000 -struct sh4_pci_address_map; - -/* arch/sh/drivers/pci/pci-sh7780.c */ -int sh7780_pcic_init(struct sh4_pci_address_map *map); - #endif /* _PCI_SH7780_H_ */ -- cgit v0.10.2 From a6d377b6969235a3b5a6e87bdcef387d0976b41c Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Fri, 17 Apr 2009 20:11:44 +0900 Subject: sh: pci: Consolidate SH7780 PCIC IRQ routing. Now that the platform code is a bit leaner, we can start consolidating the various IRQ routing implementations. There are effectively only 2 variants, and the others can use those directly. Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/Makefile b/arch/sh/drivers/pci/Makefile index 67d710a..5003971 100644 --- a/arch/sh/drivers/pci/Makefile +++ b/arch/sh/drivers/pci/Makefile @@ -16,11 +16,11 @@ obj-$(CONFIG_SH_DREAMCAST) += ops-dreamcast.o fixups-dreamcast.o obj-$(CONFIG_SH_SECUREEDGE5410) += ops-snapgear.o obj-$(CONFIG_SH_RTS7751R2D) += ops-rts7751r2d.o fixups-rts7751r2d.o obj-$(CONFIG_SH_SH03) += ops-sh03.o fixups-sh03.o -obj-$(CONFIG_SH_HIGHLANDER) += ops-r7780rp.o fixups-r7780rp.o -obj-$(CONFIG_SH_SDK7780) += ops-sdk7780.o fixups-sdk7780.o +obj-$(CONFIG_SH_HIGHLANDER) += fixups-r7780rp.o +obj-$(CONFIG_SH_SH7785LCR) += fixups-r7780rp.o +obj-$(CONFIG_SH_SDK7780) += fixups-sdk7780.o +obj-$(CONFIG_SH_7780_SOLUTION_ENGINE) += fixups-sdk7780.o obj-$(CONFIG_SH_TITAN) += ops-titan.o obj-$(CONFIG_SH_LANDISK) += ops-landisk.o obj-$(CONFIG_SH_LBOX_RE2) += ops-lboxre2.o fixups-lboxre2.o -obj-$(CONFIG_SH_7780_SOLUTION_ENGINE) += ops-se7780.o fixups-sdk7780.o obj-$(CONFIG_SH_CAYMAN) += ops-cayman.o -obj-$(CONFIG_SH_SH7785LCR) += ops-sh7785lcr.o fixups-r7780rp.o diff --git a/arch/sh/drivers/pci/fixups-r7780rp.c b/arch/sh/drivers/pci/fixups-r7780rp.c index 31f8dfb..864e92f 100644 --- a/arch/sh/drivers/pci/fixups-r7780rp.c +++ b/arch/sh/drivers/pci/fixups-r7780rp.c @@ -11,9 +11,17 @@ * for more details. */ #include +#include #include "pci-sh4.h" -#include +static char irq_tab[] __initdata = { + 65, 66, 67, 68, +}; + +int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin) +{ + return irq_tab[slot]; +} int pci_fixup_pcic(struct pci_channel *chan) { pci_write_reg(chan, 0x000043ff, SH4_PCIINTM); diff --git a/arch/sh/drivers/pci/fixups-sdk7780.c b/arch/sh/drivers/pci/fixups-sdk7780.c index 004efd4..da60e99 100644 --- a/arch/sh/drivers/pci/fixups-sdk7780.c +++ b/arch/sh/drivers/pci/fixups-sdk7780.c @@ -5,15 +5,32 @@ * * Copyright (C) 2003 Lineo uSolutions, Inc. * Copyright (C) 2004 - 2006 Paul Mundt + * Copyright (C) 2006 Nobuhiro Iwamatsu * * This file is subject to the terms and conditions of the GNU General Public * License. See the file "COPYING" in the main directory of this archive * for more details. */ #include +#include #include "pci-sh4.h" -#include +/* IDSEL [16][17][18][19][20][21][22][23][24][25][26][27][28][29][30][31] */ +static char sdk7780_irq_tab[4][16] __initdata = { + /* INTA */ + { 65, 68, 67, 68, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, + /* INTB */ + { 66, 65, -1, 65, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, + /* INTC */ + { 67, 66, -1, 66, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, + /* INTD */ + { 68, 67, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, +}; + +int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin) +{ + return sdk7780_irq_tab[pin-1][slot]; +} int pci_fixup_pcic(struct pci_channel *chan) { /* Enable all interrupts, so we know what to fix */ diff --git a/arch/sh/drivers/pci/ops-r7780rp.c b/arch/sh/drivers/pci/ops-r7780rp.c deleted file mode 100644 index 4ea136e..0000000 --- a/arch/sh/drivers/pci/ops-r7780rp.c +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Author: Ian DaSilva (idasilva@mvista.com) - * - * Highly leveraged from pci-bigsur.c, written by Dustin McIntire. - * - * May be copied or modified under the terms of the GNU General Public - * License. See linux/COPYING for more information. - * - * PCI initialization for the Renesas SH7780 Highlander R7780RP-1 board - */ -#include -#include -#include -#include -#include -#include -#include -#include "pci-sh4.h" - -static char irq_tab[] __initdata = { - 65, 66, 67, 68, -}; - -int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin) -{ - return irq_tab[slot]; -} diff --git a/arch/sh/drivers/pci/ops-sdk7780.c b/arch/sh/drivers/pci/ops-sdk7780.c deleted file mode 100644 index 1d9a91b..0000000 --- a/arch/sh/drivers/pci/ops-sdk7780.c +++ /dev/null @@ -1,35 +0,0 @@ -/* - * linux/arch/sh/drivers/pci/ops-sdk7780.c - * - * Copyright (C) 2006 Nobuhiro Iwamatsu - * - * PCI initialization for the SDK7780SE03 - * - * May be copied or modified under the terms of the GNU General Public - * License. See linux/COPYING for more information. - */ -#include -#include -#include -#include -#include -#include -#include -#include "pci-sh4.h" - -/* IDSEL [16][17][18][19][20][21][22][23][24][25][26][27][28][29][30][31] */ -static char sdk7780_irq_tab[4][16] __initdata = { - /* INTA */ - { 65, 68, 67, 68, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, - /* INTB */ - { 66, 65, -1, 65, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, - /* INTC */ - { 67, 66, -1, 66, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, - /* INTD */ - { 68, 67, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, -}; - -int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin) -{ - return sdk7780_irq_tab[pin-1][slot]; -} diff --git a/arch/sh/drivers/pci/ops-se7780.c b/arch/sh/drivers/pci/ops-se7780.c deleted file mode 100644 index 6c088cc..0000000 --- a/arch/sh/drivers/pci/ops-se7780.c +++ /dev/null @@ -1,42 +0,0 @@ -/* - * linux/arch/sh/drivers/pci/ops-se7780.c - * - * Copyright (C) 2006 Nobuhiro Iwamatsu - * - * PCI initialization for the Hitachi UL Solution Engine 7780SE03 - * - * May be copied or modified under the terms of the GNU General Public - * License. See linux/COPYING for more information. - */ -#include -#include -#include -#include -#include -#include -#include -#include "pci-sh4.h" - -/* - * IDSEL = AD16 PCI slot - * IDSEL = AD17 PCI slot - * IDSEL = AD18 Serial ATA Controller (Silicon Image SiL3512A) - * IDSEL = AD19 USB Host Controller (NEC uPD7210100A) - */ - -/* IDSEL [16][17][18][19][20][21][22][23][24][25][26][27][28][29][30][31] */ -static char se7780_irq_tab[4][16] __initdata = { - /* INTA */ - { 65, 68, 67, 68, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, - /* INTB */ - { 66, 65, -1, 65, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, - /* INTC */ - { 67, 66, -1, 66, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, - /* INTD */ - { 68, 67, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }, -}; - -int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin) -{ - return se7780_irq_tab[pin-1][slot]; -} diff --git a/arch/sh/drivers/pci/ops-sh7785lcr.c b/arch/sh/drivers/pci/ops-sh7785lcr.c deleted file mode 100644 index 0fe423e..0000000 --- a/arch/sh/drivers/pci/ops-sh7785lcr.c +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Author: Ian DaSilva (idasilva@mvista.com) - * - * Highly leveraged from pci-bigsur.c, written by Dustin McIntire. - * - * May be copied or modified under the terms of the GNU General Public - * License. See linux/COPYING for more information. - * - * PCI initialization for the Renesas R0P7785LC0011RL board - * Based on arch/sh/drivers/pci/ops-r7780rp.c - * - */ -#include -#include -#include -#include -#include -#include "pci-sh4.h" - -static char irq_tab[] __initdata = { - 65, 66, 67, 68, -}; - -int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin) -{ - return irq_tab[slot]; -} -- cgit v0.10.2 From 62c7ae87cb5962d3dfaa6d916a15e4faa9e07363 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Fri, 17 Apr 2009 20:37:16 +0900 Subject: sh: pci: Start unifying the SH7780 PCIC initialization. This starts moving out the common initialization bits from the various fixup paths in to the shared init path. Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/fixups-r7780rp.c b/arch/sh/drivers/pci/fixups-r7780rp.c index 864e92f..15ca65c 100644 --- a/arch/sh/drivers/pci/fixups-r7780rp.c +++ b/arch/sh/drivers/pci/fixups-r7780rp.c @@ -22,33 +22,15 @@ int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin) { return irq_tab[slot]; } + int pci_fixup_pcic(struct pci_channel *chan) { pci_write_reg(chan, 0x000043ff, SH4_PCIINTM); - pci_write_reg(chan, 0x0000380f, SH4_PCIAINTM); - - pci_write_reg(chan, 0xfbb00047, SH7780_PCICMD); pci_write_reg(chan, 0x00000000, SH7780_PCIIBAR); - - pci_write_reg(chan, 0x00011912, SH7780_PCISVID); pci_write_reg(chan, 0x08000000, SH7780_PCICSCR0); pci_write_reg(chan, 0x0000001b, SH7780_PCICSAR0); pci_write_reg(chan, 0xfd000000, SH7780_PCICSCR1); pci_write_reg(chan, 0x0000000f, SH7780_PCICSAR1); - pci_write_reg(chan, 0xfd000000, SH7780_PCIMBR0); - pci_write_reg(chan, 0x00fc0000, SH7780_PCIMBMR0); - -#ifdef CONFIG_32BIT - pci_write_reg(chan, 0xc0000000, SH7780_PCIMBR2); - pci_write_reg(chan, 0x20000000 - SH7780_PCI_IO_SIZE, SH7780_PCIMBMR2); -#endif - - /* Set IOBR for windows containing area specified in pci.h */ - pci_write_reg(chan, chan->io_resource->start & ~(SH7780_PCI_IO_SIZE-1), - SH7780_PCIIOBR); - pci_write_reg(chan, ((SH7780_PCI_IO_SIZE-1) & (7<<18)), - SH7780_PCIIOBMR); - return 0; } diff --git a/arch/sh/drivers/pci/fixups-sdk7780.c b/arch/sh/drivers/pci/fixups-sdk7780.c index da60e99..250b0ed 100644 --- a/arch/sh/drivers/pci/fixups-sdk7780.c +++ b/arch/sh/drivers/pci/fixups-sdk7780.c @@ -35,40 +35,18 @@ int pci_fixup_pcic(struct pci_channel *chan) { /* Enable all interrupts, so we know what to fix */ pci_write_reg(chan, 0x0000C3FF, SH7780_PCIIMR); - pci_write_reg(chan, 0x0000380F, SH7780_PCIAINTM); /* Set up standard PCI config registers */ - pci_write_reg(chan, 0xFB00, SH7780_PCISTATUS); - pci_write_reg(chan, 0x0047, SH7780_PCICMD); - pci_write_reg(chan, 0x00, SH7780_PCIPIF); - pci_write_reg(chan, 0x1912, SH7780_PCISVID); - pci_write_reg(chan, 0x0001, SH7780_PCISID); - pci_write_reg(chan, 0x08000000, SH7780_PCIMBAR0); /* PCI */ - pci_write_reg(chan, 0x08000000, SH7780_PCILAR0); /* SHwy */ - pci_write_reg(chan, 0x07F00001, SH7780_PCILSR); /* size 128M w/ MBAR */ + pci_write_reg(chan, 0x08000000, SH4_PCILAR0); /* SHwy */ + pci_write_reg(chan, 0x07F00001, SH4_PCILSR0); /* size 128M w/ MBAR */ pci_write_reg(chan, 0x00000000, SH7780_PCIMBAR1); - pci_write_reg(chan, 0x00000000, SH7780_PCILAR1); - pci_write_reg(chan, 0x00000000, SH7780_PCILSR1); + pci_write_reg(chan, 0x00000000, SH4_PCILAR1); + pci_write_reg(chan, 0x00000000, SH4_PCILSR1); pci_write_reg(chan, 0xAB000801, SH7780_PCIIBAR); - - /* - * Set the MBR so PCI address is one-to-one with window, - * meaning all calls go straight through... use ifdef to - * catch erroneous assumption. - */ - pci_write_reg(chan, 0xFD000000 , SH7780_PCIMBR0); - pci_write_reg(chan, 0x00FC0000 , SH7780_PCIMBMR0); /* 16M */ - - /* Set IOBR for window containing area specified in pci.h */ - pci_write_reg(chan, chan->io_resource->start & ~(SH7780_PCI_IO_SIZE-1), - SH7780_PCIIOBR); - pci_write_reg(chan, (SH7780_PCI_IO_SIZE-1) & (7 << 18), - SH7780_PCIIOBMR); - - pci_write_reg(chan, 0xA5000C01, SH7780_PCICR); + pci_write_reg(chan, 0xA5000C01, SH4_PCICR); return 0; } diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c index 07c5529..f02d9df 100644 --- a/arch/sh/drivers/pci/pci-sh7780.c +++ b/arch/sh/drivers/pci/pci-sh7780.c @@ -1,19 +1,12 @@ /* - * Low-Level PCI Support for the SH7780 + * Low-Level PCI Support for the SH7780 * - * Dustin McIntire (dustin@sensoria.com) - * Derived from arch/i386/kernel/pci-*.c which bore the message: - * (c) 1999--2000 Martin Mares - * - * Ported to the new API by Paul Mundt - * With cleanup by Paul van Gool - * - * May be copied or modified under the terms of the GNU General Public - * License. See linux/COPYING for more information. + * Copyright (C) 2005 - 2009 Paul Mundt * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. */ -#undef DEBUG - #include #include #include @@ -117,13 +110,8 @@ int __init pcibios_init_platform(void) pci_cache_line_size = pci_read_reg(chan, SH7780_PCICLS) / 4; - /* set the command/status bits to: - * Wait Cycle Control + Parity Enable + Bus Master + - * Mem space enable - */ - pci_write_reg(chan, 0x00000046, SH7780_PCICMD); - - /* Set IO and Mem windows to local address + /* + * Set IO and Mem windows to local address * Make PCI and local address the same for easy 1 to 1 mapping */ pci_write_reg(chan, sh7780_pci_map.window0.size - 0xfffff, SH4_PCILSR0); @@ -131,9 +119,33 @@ int __init pcibios_init_platform(void) pci_write_reg(chan, sh7780_pci_map.window0.base, SH4_PCILAR0); pci_write_reg(chan, sh7780_pci_map.window0.base, SH7780_PCIMBAR0); + pci_write_reg(chan, 0x0000380f, SH4_PCIAINTM); + + /* Set up standard PCI config registers */ + __raw_writew(0xFB00, chan->reg_base + SH7780_PCISTATUS); + __raw_writew(0x0047, chan->reg_base + SH7780_PCICMD); + __raw_writew(0x1912, chan->reg_base + SH7780_PCISVID); + __raw_writew(0x0001, chan->reg_base + SH7780_PCISID); + + __raw_writeb(0x00, chan->reg_base + SH7780_PCIPIF); + /* Apply any last-minute PCIC fixups */ pci_fixup_pcic(chan); + pci_write_reg(chan, 0xfd000000, SH7780_PCIMBR0); + pci_write_reg(chan, 0x00fc0000, SH7780_PCIMBMR0); + +#ifdef CONFIG_32BIT + pci_write_reg(chan, 0xc0000000, SH7780_PCIMBR2); + pci_write_reg(chan, 0x20000000 - SH7780_PCI_IO_SIZE, SH7780_PCIMBMR2); +#endif + + /* Set IOBR for windows containing area specified in pci.h */ + pci_write_reg(chan, chan->io_resource->start & ~(SH7780_PCI_IO_SIZE-1), + SH7780_PCIIOBR); + pci_write_reg(chan, ((SH7780_PCI_IO_SIZE-1) & (7<<18)), + SH7780_PCIIOBMR); + /* SH7780 init done, set central function init complete */ /* use round robin mode to stop a device starving/overruning */ word = SH4_PCICR_PREFIX | SH4_PCICR_CFIN | SH4_PCICR_FTO; diff --git a/arch/sh/drivers/pci/pci-sh7780.h b/arch/sh/drivers/pci/pci-sh7780.h index 4b65d4b..4a52478 100644 --- a/arch/sh/drivers/pci/pci-sh7780.h +++ b/arch/sh/drivers/pci/pci-sh7780.h @@ -65,11 +65,6 @@ #define SH7780_PCIPMCSR_BSE 0x046 #define SH7780_PCICDD 0x047 -#define SH7780_PCICR 0x100 /* PCI Control Register */ -#define SH7780_PCILSR 0x104 /* PCI Local Space Register0 */ -#define SH7780_PCILSR1 0x108 /* PCI Local Space Register1 */ -#define SH7780_PCILAR0 0x10C /* PCI Local Address Register1 */ -#define SH7780_PCILAR1 0x110 /* PCI Local Address Register1 */ #define SH7780_PCIIR 0x114 /* PCI Interrupt Register */ #define SH7780_PCIIMR 0x118 /* PCI Interrupt Mask Register */ #define SH7780_PCIAIR 0x11C /* Error Address Register */ -- cgit v0.10.2 From 93eb677d74a4f7d3edfb678c94f6c0544d9fbad2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 15 Apr 2009 13:24:06 -0400 Subject: ftrace: use module notifier for function tracer The hooks in the module code for the function tracer must be called before any of that module code runs. The function tracer hooks modify the module (replacing calls to mcount to nops). If the code is executed while the change occurs, then the CPU can take a GPF. To handle the above with a bit of paranoia, I originally implemented the hooks as calls directly from the module code. After examining the notifier calls, it looks as though the start up notify is called before any of the module's code is executed. This makes the use of the notify safe with ftrace. Only the startup notify is required to be "safe". The shutdown simply removes the entries from the ftrace function list, and does not modify any code. This change has another benefit. It removes a issue with a reverse dependency in the mutexes of ftrace_lock and module_mutex. [ Impact: fix lock dependency bug, cleanup ] Cc: Rusty Russell Signed-off-by: Steven Rostedt diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 53869be..97c83e1 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -233,8 +233,6 @@ extern int ftrace_arch_read_dyn_info(char *buf, int size); extern int skip_trace(unsigned long ip); -extern void ftrace_release(void *start, unsigned long size); - extern void ftrace_disable_daemon(void); extern void ftrace_enable_daemon(void); #else @@ -325,13 +323,8 @@ static inline void __ftrace_enabled_restore(int enabled) #ifdef CONFIG_FTRACE_MCOUNT_RECORD extern void ftrace_init(void); -extern void ftrace_init_module(struct module *mod, - unsigned long *start, unsigned long *end); #else static inline void ftrace_init(void) { } -static inline void -ftrace_init_module(struct module *mod, - unsigned long *start, unsigned long *end) { } #endif /* diff --git a/include/linux/module.h b/include/linux/module.h index 6155fa4..a8f2c0a 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -341,6 +341,10 @@ struct module struct ftrace_event_call *trace_events; unsigned int num_trace_events; #endif +#ifdef CONFIG_FTRACE_MCOUNT_RECORD + unsigned long *ftrace_callsites; + unsigned int num_ftrace_callsites; +#endif #ifdef CONFIG_MODULE_UNLOAD /* What modules depend on me? */ diff --git a/kernel/module.c b/kernel/module.c index a039470..2383e60 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1490,9 +1490,6 @@ static void free_module(struct module *mod) /* Free any allocated parameters. */ destroy_params(mod->kp, mod->num_kp); - /* release any pointers to mcount in this module */ - ftrace_release(mod->module_core, mod->core_size); - /* This may be NULL, but that's OK */ module_free(mod, mod->module_init); kfree(mod->args); @@ -1893,11 +1890,9 @@ static noinline struct module *load_module(void __user *umod, unsigned int symindex = 0; unsigned int strindex = 0; unsigned int modindex, versindex, infoindex, pcpuindex; - unsigned int num_mcount; struct module *mod; long err = 0; void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ - unsigned long *mseg; mm_segment_t old_fs; DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", @@ -2179,7 +2174,13 @@ static noinline struct module *load_module(void __user *umod, sizeof(*mod->trace_events), &mod->num_trace_events); #endif - +#ifdef CONFIG_FTRACE_MCOUNT_RECORD + /* sechdrs[0].sh_size is always zero */ + mod->ftrace_callsites = section_objs(hdr, sechdrs, secstrings, + "__mcount_loc", + sizeof(*mod->ftrace_callsites), + &mod->num_ftrace_callsites); +#endif #ifdef CONFIG_MODVERSIONS if ((mod->num_syms && !mod->crcs) || (mod->num_gpl_syms && !mod->gpl_crcs) @@ -2244,11 +2245,6 @@ static noinline struct module *load_module(void __user *umod, dynamic_debug_setup(debug, num_debug); } - /* sechdrs[0].sh_size is always zero */ - mseg = section_objs(hdr, sechdrs, secstrings, "__mcount_loc", - sizeof(*mseg), &num_mcount); - ftrace_init_module(mod, mseg, mseg + num_mcount); - err = module_finalize(hdr, sechdrs, mod); if (err < 0) goto cleanup; @@ -2309,7 +2305,6 @@ static noinline struct module *load_module(void __user *umod, cleanup: kobject_del(&mod->mkobj.kobj); kobject_put(&mod->mkobj.kobj); - ftrace_release(mod->module_core, mod->core_size); free_unload: module_unload_free(mod); #if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index a234889..5b606f4 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -916,30 +916,6 @@ static void ftrace_free_rec(struct dyn_ftrace *rec) rec->flags |= FTRACE_FL_FREE; } -void ftrace_release(void *start, unsigned long size) -{ - struct dyn_ftrace *rec; - struct ftrace_page *pg; - unsigned long s = (unsigned long)start; - unsigned long e = s + size; - - if (ftrace_disabled || !start) - return; - - mutex_lock(&ftrace_lock); - do_for_each_ftrace_rec(pg, rec) { - if ((rec->ip >= s) && (rec->ip < e)) { - /* - * rec->ip is changed in ftrace_free_rec() - * It should not between s and e if record was freed. - */ - FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE); - ftrace_free_rec(rec); - } - } while_for_each_ftrace_rec(); - mutex_unlock(&ftrace_lock); -} - static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) { struct dyn_ftrace *rec; @@ -2752,14 +2728,72 @@ static int ftrace_convert_nops(struct module *mod, return 0; } -void ftrace_init_module(struct module *mod, - unsigned long *start, unsigned long *end) +#ifdef CONFIG_MODULES +void ftrace_release(void *start, void *end) +{ + struct dyn_ftrace *rec; + struct ftrace_page *pg; + unsigned long s = (unsigned long)start; + unsigned long e = (unsigned long)end; + + if (ftrace_disabled || !start || start == end) + return; + + mutex_lock(&ftrace_lock); + do_for_each_ftrace_rec(pg, rec) { + if ((rec->ip >= s) && (rec->ip < e)) { + /* + * rec->ip is changed in ftrace_free_rec() + * It should not between s and e if record was freed. + */ + FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE); + ftrace_free_rec(rec); + } + } while_for_each_ftrace_rec(); + mutex_unlock(&ftrace_lock); +} + +static void ftrace_init_module(struct module *mod, + unsigned long *start, unsigned long *end) { if (ftrace_disabled || start == end) return; ftrace_convert_nops(mod, start, end); } +static int ftrace_module_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + struct module *mod = data; + + switch (val) { + case MODULE_STATE_COMING: + ftrace_init_module(mod, mod->ftrace_callsites, + mod->ftrace_callsites + + mod->num_ftrace_callsites); + break; + case MODULE_STATE_GOING: + ftrace_release(mod->ftrace_callsites, + mod->ftrace_callsites + + mod->num_ftrace_callsites); + break; + } + + return 0; +} +#else +static int ftrace_module_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + return 0; +} +#endif /* CONFIG_MODULES */ + +struct notifier_block ftrace_module_nb = { + .notifier_call = ftrace_module_notify, + .priority = 0, +}; + extern unsigned long __start_mcount_loc[]; extern unsigned long __stop_mcount_loc[]; @@ -2791,6 +2825,10 @@ void __init ftrace_init(void) __start_mcount_loc, __stop_mcount_loc); + ret = register_module_notifier(&ftrace_module_nb); + if (!ret) + pr_warning("Failed to register trace ftrace module notifier\n"); + return; failed: ftrace_disabled = 1; -- cgit v0.10.2 From e6187007d6c365b551c69ea3df46f06fd1c8bd19 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 15 Apr 2009 13:36:40 -0400 Subject: tracing/events: add startup tests for events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As events start to become popular, and the new way to add tracing infrastructure into ftrace, it is important to catch any problems that might happen with a mistake in the TRACE_EVENT macro. This patch introduces a startup self test on the registered trace events. Note, it can only do a generic test, any type of testing that needs more involement is needed to be implemented by the tracepoint creators. The test goes down one by one enabling a trace point and running some random tasks (random in the sense that I just made them up). Those tasks are creating threads, grabbing mutexes and spinlocks and using workqueues. After testing each event individually, it does the same test after enabling each system of trace points. Like sched, irq, lockdep. Then finally it enables all tracepoints and performs the tasks again. The output to the console on bootup will look like this when everything works: Running tests on trace events: Testing event kfree_skb: OK Testing event kmalloc: OK Testing event kmem_cache_alloc: OK Testing event kmalloc_node: OK Testing event kmem_cache_alloc_node: OK Testing event kfree: OK Testing event kmem_cache_free: OK Testing event irq_handler_exit: OK Testing event irq_handler_entry: OK Testing event softirq_entry: OK Testing event softirq_exit: OK Testing event lock_acquire: OK Testing event lock_release: OK Testing event sched_kthread_stop: OK Testing event sched_kthread_stop_ret: OK Testing event sched_wait_task: OK Testing event sched_wakeup: OK Testing event sched_wakeup_new: OK Testing event sched_switch: OK Testing event sched_migrate_task: OK Testing event sched_process_free: OK Testing event sched_process_exit: OK Testing event sched_process_wait: OK Testing event sched_process_fork: OK Testing event sched_signal_send: OK Running tests on trace event systems: Testing event system skb: OK Testing event system kmem: OK Testing event system irq: OK Testing event system lockdep: OK Testing event system sched: OK Running tests on all trace events: Testing all events: OK [ folded in: tracing: add #include to fix build failure in test_work() This build failure occured on a few rare configs: kernel/trace/trace_events.c: In function ‘test_work’: kernel/trace/trace_events.c:975: error: implicit declaration of function ‘udelay’ kernel/trace/trace_events.c:980: error: implicit declaration of function ‘msleep’ delay.h is included in way too many other headers, hiding cases where new usage is added without header inclusion. [ Impact: build fix ] Signed-off-by: Ingo Molnar ] [ Impact: add event tracer self-tests ] Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 6591d83..f81d6ee 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -8,10 +8,14 @@ * */ +#include +#include +#include #include #include #include #include +#include #include "trace_output.h" @@ -920,3 +924,177 @@ static __init int event_trace_init(void) return 0; } fs_initcall(event_trace_init); + +#ifdef CONFIG_FTRACE_STARTUP_TEST + +static DEFINE_SPINLOCK(test_spinlock); +static DEFINE_SPINLOCK(test_spinlock_irq); +static DEFINE_MUTEX(test_mutex); + +static __init void test_work(struct work_struct *dummy) +{ + spin_lock(&test_spinlock); + spin_lock_irq(&test_spinlock_irq); + udelay(1); + spin_unlock_irq(&test_spinlock_irq); + spin_unlock(&test_spinlock); + + mutex_lock(&test_mutex); + msleep(1); + mutex_unlock(&test_mutex); +} + +static __init int event_test_thread(void *unused) +{ + void *test_malloc; + + test_malloc = kmalloc(1234, GFP_KERNEL); + if (!test_malloc) + pr_info("failed to kmalloc\n"); + + schedule_on_each_cpu(test_work); + + kfree(test_malloc); + + set_current_state(TASK_INTERRUPTIBLE); + while (!kthread_should_stop()) + schedule(); + + return 0; +} + +/* + * Do various things that may trigger events. + */ +static __init void event_test_stuff(void) +{ + struct task_struct *test_thread; + + test_thread = kthread_run(event_test_thread, NULL, "test-events"); + msleep(1); + kthread_stop(test_thread); +} + +/* + * For every trace event defined, we will test each trace point separately, + * and then by groups, and finally all trace points. + */ +static __init int event_trace_self_tests(void) +{ + struct ftrace_event_call *call; + struct event_subsystem *system; + char *sysname; + int ret; + + pr_info("Running tests on trace events:\n"); + + list_for_each_entry(call, &ftrace_events, list) { + + /* Only test those that have a regfunc */ + if (!call->regfunc) + continue; + + pr_info("Testing event %s: ", call->name); + + /* + * If an event is already enabled, someone is using + * it and the self test should not be on. + */ + if (call->enabled) { + pr_warning("Enabled event during self test!\n"); + WARN_ON_ONCE(1); + continue; + } + + call->enabled = 1; + call->regfunc(); + + event_test_stuff(); + + call->unregfunc(); + call->enabled = 0; + + pr_cont("OK\n"); + } + + /* Now test at the sub system level */ + + pr_info("Running tests on trace event systems:\n"); + + list_for_each_entry(system, &event_subsystems, list) { + + /* the ftrace system is special, skip it */ + if (strcmp(system->name, "ftrace") == 0) + continue; + + pr_info("Testing event system %s: ", system->name); + + /* ftrace_set_clr_event can modify the name passed in. */ + sysname = kstrdup(system->name, GFP_KERNEL); + if (WARN_ON(!sysname)) { + pr_warning("Can't allocate memory, giving up!\n"); + return 0; + } + ret = ftrace_set_clr_event(sysname, 1); + kfree(sysname); + if (WARN_ON_ONCE(ret)) { + pr_warning("error enabling system %s\n", + system->name); + continue; + } + + event_test_stuff(); + + sysname = kstrdup(system->name, GFP_KERNEL); + if (WARN_ON(!sysname)) { + pr_warning("Can't allocate memory, giving up!\n"); + return 0; + } + ret = ftrace_set_clr_event(sysname, 0); + kfree(sysname); + + if (WARN_ON_ONCE(ret)) + pr_warning("error disabling system %s\n", + system->name); + + pr_cont("OK\n"); + } + + /* Test with all events enabled */ + + pr_info("Running tests on all trace events:\n"); + pr_info("Testing all events: "); + + sysname = kmalloc(4, GFP_KERNEL); + if (WARN_ON(!sysname)) { + pr_warning("Can't allocate memory, giving up!\n"); + return 0; + } + memcpy(sysname, "*:*", 4); + ret = ftrace_set_clr_event(sysname, 1); + if (WARN_ON_ONCE(ret)) { + kfree(sysname); + pr_warning("error enabling all events\n"); + return 0; + } + + event_test_stuff(); + + /* reset sysname */ + memcpy(sysname, "*:*", 4); + ret = ftrace_set_clr_event(sysname, 0); + kfree(sysname); + + if (WARN_ON_ONCE(ret)) { + pr_warning("error disabling all events\n"); + return 0; + } + + pr_cont("OK\n"); + + return 0; +} + +late_initcall(event_trace_self_tests); + +#endif -- cgit v0.10.2 From 6b87a91f5417226c7fe62100b0e7217e7096b789 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 17 Apr 2009 15:55:08 +0300 Subject: ASoC: TWL4030: Fix for the constraint handling The original implementation of the constraints were good against sane applications. If the opening sequence is: stream1_open, stream1_hw_params, stream2_open, stream2_hw_params -> the constraints are set correctly for stream2. But if the sequence is: stream1_open, stream2_open, stream2_hw_params, stream1_hw_params -> than stream2 would receive constraint rate = 0, sample_bits = 0, since the stream1 has not yet called hw_params... The command to trigger this event: gst-launch-0.10 alsasrc device=hw:0 ! alsasink device=hw:0 sync=false This patch does some 'black magic' in order to always set the correct constraints and sets it only when it is needed for the other stream. Signed-off-by: Peter Ujfalusi Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c index 921b205..a1b76d7 100644 --- a/sound/soc/codecs/twl4030.c +++ b/sound/soc/codecs/twl4030.c @@ -125,6 +125,11 @@ struct twl4030_priv { struct snd_pcm_substream *master_substream; struct snd_pcm_substream *slave_substream; + + unsigned int configured; + unsigned int rate; + unsigned int sample_bits; + unsigned int channels; }; /* @@ -1220,6 +1225,36 @@ static int twl4030_set_bias_level(struct snd_soc_codec *codec, return 0; } +static void twl4030_constraints(struct twl4030_priv *twl4030, + struct snd_pcm_substream *mst_substream) +{ + struct snd_pcm_substream *slv_substream; + + /* Pick the stream, which need to be constrained */ + if (mst_substream == twl4030->master_substream) + slv_substream = twl4030->slave_substream; + else if (mst_substream == twl4030->slave_substream) + slv_substream = twl4030->master_substream; + else /* This should not happen.. */ + return; + + /* Set the constraints according to the already configured stream */ + snd_pcm_hw_constraint_minmax(slv_substream->runtime, + SNDRV_PCM_HW_PARAM_RATE, + twl4030->rate, + twl4030->rate); + + snd_pcm_hw_constraint_minmax(slv_substream->runtime, + SNDRV_PCM_HW_PARAM_SAMPLE_BITS, + twl4030->sample_bits, + twl4030->sample_bits); + + snd_pcm_hw_constraint_minmax(slv_substream->runtime, + SNDRV_PCM_HW_PARAM_CHANNELS, + twl4030->channels, + twl4030->channels); +} + static int twl4030_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { @@ -1228,26 +1263,16 @@ static int twl4030_startup(struct snd_pcm_substream *substream, struct snd_soc_codec *codec = socdev->card->codec; struct twl4030_priv *twl4030 = codec->private_data; - /* If we already have a playback or capture going then constrain - * this substream to match it. - */ if (twl4030->master_substream) { - struct snd_pcm_runtime *master_runtime; - master_runtime = twl4030->master_substream->runtime; - - snd_pcm_hw_constraint_minmax(substream->runtime, - SNDRV_PCM_HW_PARAM_RATE, - master_runtime->rate, - master_runtime->rate); - - snd_pcm_hw_constraint_minmax(substream->runtime, - SNDRV_PCM_HW_PARAM_SAMPLE_BITS, - master_runtime->sample_bits, - master_runtime->sample_bits); - twl4030->slave_substream = substream; - } else + /* The DAI has one configuration for playback and capture, so + * if the DAI has been already configured then constrain this + * substream to match it. */ + if (twl4030->configured) + twl4030_constraints(twl4030, twl4030->master_substream); + } else { twl4030->master_substream = substream; + } return 0; } @@ -1264,6 +1289,13 @@ static void twl4030_shutdown(struct snd_pcm_substream *substream, twl4030->master_substream = twl4030->slave_substream; twl4030->slave_substream = NULL; + + /* If all streams are closed, or the remaining stream has not yet + * been configured than set the DAI as not configured. */ + if (!twl4030->master_substream) + twl4030->configured = 0; + else if (!twl4030->master_substream->runtime->channels) + twl4030->configured = 0; } static int twl4030_hw_params(struct snd_pcm_substream *substream, @@ -1276,8 +1308,8 @@ static int twl4030_hw_params(struct snd_pcm_substream *substream, struct twl4030_priv *twl4030 = codec->private_data; u8 mode, old_mode, format, old_format; - if (substream == twl4030->slave_substream) - /* Ignoring hw_params for slave substream */ + if (twl4030->configured) + /* Ignoring hw_params for already configured DAI */ return 0; /* bit rate */ @@ -1357,6 +1389,21 @@ static int twl4030_hw_params(struct snd_pcm_substream *substream, /* set CODECPDZ afterwards */ twl4030_codec_enable(codec, 1); } + + /* Store the important parameters for the DAI configuration and set + * the DAI as configured */ + twl4030->configured = 1; + twl4030->rate = params_rate(params); + twl4030->sample_bits = hw_param_interval(params, + SNDRV_PCM_HW_PARAM_SAMPLE_BITS)->min; + twl4030->channels = params_channels(params); + + /* If both playback and capture streams are open, and one of them + * is setting the hw parameters right now (since we are here), set + * constraints to the other stream to match the current one. */ + if (twl4030->slave_substream) + twl4030_constraints(twl4030, substream); + return 0; } -- cgit v0.10.2 From d1b182a8d49ed6416325b4e0a1cb0f17cd4e702a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 15 Apr 2009 16:53:47 -0400 Subject: tracing/events/ring-buffer: expose format of ring buffer headers to users Currently, every thing needed to read the binary output from the ring buffers is available, with the exception of the way the ring buffers handles itself internally. This patch creates two special files in the debugfs/tracing/events directory: # cat /debug/tracing/events/header_page field: u64 timestamp; offset:0; size:8; field: local_t commit; offset:8; size:8; field: char data; offset:16; size:4080; # cat /debug/tracing/events/header_event type : 2 bits len : 3 bits time_delta : 27 bits array : 32 bits padding : type == 0 time_extend : type == 1 data : type == 3 This is to allow a userspace app to see if the ring buffer format changes or not. [ Impact: allow userspace apps to know of ringbuffer format changes ] Signed-off-by: Steven Rostedt diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index f0aa486..fac8f1a 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -166,6 +166,11 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data); int ring_buffer_read_page(struct ring_buffer *buffer, void **data_page, size_t len, int cpu, int full); +struct trace_seq; + +int ring_buffer_print_entry_header(struct trace_seq *s); +int ring_buffer_print_page_header(struct trace_seq *s); + enum ring_buffer_flags { RB_FL_OVERWRITE = 1 << 0, }; diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index f935bd5..84a6055 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -22,6 +22,28 @@ #include "trace.h" /* + * The ring buffer header is special. We must manually up keep it. + */ +int ring_buffer_print_entry_header(struct trace_seq *s) +{ + int ret; + + ret = trace_seq_printf(s, "\ttype : 2 bits\n"); + ret = trace_seq_printf(s, "\tlen : 3 bits\n"); + ret = trace_seq_printf(s, "\ttime_delta : 27 bits\n"); + ret = trace_seq_printf(s, "\tarray : 32 bits\n"); + ret = trace_seq_printf(s, "\n"); + ret = trace_seq_printf(s, "\tpadding : type == %d\n", + RINGBUF_TYPE_PADDING); + ret = trace_seq_printf(s, "\ttime_extend : type == %d\n", + RINGBUF_TYPE_TIME_EXTEND); + ret = trace_seq_printf(s, "\tdata : type == %d\n", + RINGBUF_TYPE_DATA); + + return ret; +} + +/* * The ring buffer is made up of a list of pages. A separate list of pages is * allocated for each CPU. A writer may only write to a buffer that is * associated with the CPU it is currently executing on. A reader may read @@ -340,6 +362,28 @@ static inline int test_time_stamp(u64 delta) #define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE) +int ring_buffer_print_page_header(struct trace_seq *s) +{ + struct buffer_data_page field; + int ret; + + ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t" + "offset:0;\tsize:%u;\n", + (unsigned int)sizeof(field.time_stamp)); + + ret = trace_seq_printf(s, "\tfield: local_t commit;\t" + "offset:%u;\tsize:%u;\n", + (unsigned int)offsetof(typeof(field), commit), + (unsigned int)sizeof(field.commit)); + + ret = trace_seq_printf(s, "\tfield: char data;\t" + "offset:%u;\tsize:%u;\n", + (unsigned int)offsetof(typeof(field), data), + (unsigned int)BUF_PAGE_SIZE); + + return ret; +} + /* * head_page == tail_page && head == tail then buffer is empty. */ diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index f81d6ee..7163a2b 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -610,6 +610,30 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, return cnt; } +static ssize_t +show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) +{ + int (*func)(struct trace_seq *s) = filp->private_data; + struct trace_seq *s; + int r; + + if (*ppos) + return 0; + + s = kmalloc(sizeof(*s), GFP_KERNEL); + if (!s) + return -ENOMEM; + + trace_seq_init(s); + + func(s); + r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); + + kfree(s); + + return r; +} + static const struct seq_operations show_event_seq_ops = { .start = t_start, .next = t_next, @@ -667,6 +691,11 @@ static const struct file_operations ftrace_subsystem_filter_fops = { .write = subsystem_filter_write, }; +static const struct file_operations ftrace_show_header_fops = { + .open = tracing_open_generic, + .read = show_header, +}; + static struct dentry *event_trace_events_dir(void) { static struct dentry *d_tracer; @@ -909,6 +938,15 @@ static __init int event_trace_init(void) if (!d_events) return 0; + /* ring buffer internal formats */ + trace_create_file("header_page", 0444, d_events, + ring_buffer_print_page_header, + &ftrace_show_header_fops); + + trace_create_file("header_event", 0444, d_events, + ring_buffer_print_entry_header, + &ftrace_show_header_fops); + for_each_event(call, __start_ftrace_events, __stop_ftrace_events) { /* The linker may leave blanks */ if (!call->name) -- cgit v0.10.2 From 69abe6a5d18a9394baa325bab8f57748b037c517 Mon Sep 17 00:00:00 2001 From: Avadh Patel Date: Fri, 10 Apr 2009 16:04:48 -0400 Subject: tracing: add saved_cmdlines file to show cached task comms Export the cached task comms to userspace. This allows user apps to translate the pids from a trace into their respective task command lines. [ Impact: let userspace apps reading binary buffer know comm's of pids ] Signed-off-by: Avadh Patel [ added error checking and use of buf pointer to index file_buf ] Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2d69b26..031c46f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2422,6 +2422,56 @@ static const struct file_operations tracing_readme_fops = { }; static ssize_t +tracing_saved_cmdlines_read(struct file *file, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char *buf_comm; + char *file_buf; + char *buf; + int len = 0; + int pid; + int i; + + file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL); + if (!file_buf) + return -ENOMEM; + + buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL); + if (!buf_comm) { + kfree(file_buf); + return -ENOMEM; + } + + buf = file_buf; + + for (i = 0; i < SAVED_CMDLINES; i++) { + int r; + + pid = map_cmdline_to_pid[i]; + if (pid == -1 || pid == NO_CMDLINE_MAP) + continue; + + trace_find_cmdline(pid, buf_comm); + r = sprintf(buf, "%d %s\n", pid, buf_comm); + buf += r; + len += r; + } + + len = simple_read_from_buffer(ubuf, cnt, ppos, + file_buf, len); + + kfree(file_buf); + kfree(buf_comm); + + return len; +} + +static const struct file_operations tracing_saved_cmdlines_fops = { + .open = tracing_open_generic, + .read = tracing_saved_cmdlines_read, +}; + +static ssize_t tracing_ctrl_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { @@ -3973,6 +4023,9 @@ static __init int tracer_init_debugfs(void) trace_create_file("trace_marker", 0220, d_tracer, NULL, &tracing_mark_fops); + trace_create_file("saved_cmdlines", 0444, d_tracer, + NULL, &tracing_saved_cmdlines_fops); + #ifdef CONFIG_DYNAMIC_FTRACE trace_create_file("dyn_ftrace_total_info", 0444, d_tracer, &ftrace_update_tot_cnt, &tracing_dyn_info_fops); -- cgit v0.10.2 From 9ea21c1ecdb35ecdcac5fd9d95f62a1f6a7ffec0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 16 Apr 2009 12:15:44 -0400 Subject: tracing/events: perform function tracing in event selftests We can find some bugs in the trace events if we stress the writes as well. The function tracer is a good way to stress the events. [ Impact: extend scope of event tracer self-tests ] Signed-off-by: Steven Rostedt Cc: Andrew Morton Cc: Peter Zijlstra Cc: Frederic Weisbecker LKML-Reference: <20090416161746.604786131@goodmis.org> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 7163a2b..1137f95 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1017,7 +1017,7 @@ static __init void event_test_stuff(void) * For every trace event defined, we will test each trace point separately, * and then by groups, and finally all trace points. */ -static __init int event_trace_self_tests(void) +static __init void event_trace_self_tests(void) { struct ftrace_event_call *call; struct event_subsystem *system; @@ -1071,7 +1071,7 @@ static __init int event_trace_self_tests(void) sysname = kstrdup(system->name, GFP_KERNEL); if (WARN_ON(!sysname)) { pr_warning("Can't allocate memory, giving up!\n"); - return 0; + return; } ret = ftrace_set_clr_event(sysname, 1); kfree(sysname); @@ -1086,7 +1086,7 @@ static __init int event_trace_self_tests(void) sysname = kstrdup(system->name, GFP_KERNEL); if (WARN_ON(!sysname)) { pr_warning("Can't allocate memory, giving up!\n"); - return 0; + return; } ret = ftrace_set_clr_event(sysname, 0); kfree(sysname); @@ -1106,14 +1106,14 @@ static __init int event_trace_self_tests(void) sysname = kmalloc(4, GFP_KERNEL); if (WARN_ON(!sysname)) { pr_warning("Can't allocate memory, giving up!\n"); - return 0; + return; } memcpy(sysname, "*:*", 4); ret = ftrace_set_clr_event(sysname, 1); if (WARN_ON_ONCE(ret)) { kfree(sysname); pr_warning("error enabling all events\n"); - return 0; + return; } event_test_stuff(); @@ -1125,10 +1125,76 @@ static __init int event_trace_self_tests(void) if (WARN_ON_ONCE(ret)) { pr_warning("error disabling all events\n"); - return 0; + return; } pr_cont("OK\n"); +} + +#ifdef CONFIG_FUNCTION_TRACER + +static DEFINE_PER_CPU(atomic_t, test_event_disable); + +static void +function_test_events_call(unsigned long ip, unsigned long parent_ip) +{ + struct ring_buffer_event *event; + struct ftrace_entry *entry; + unsigned long flags; + long disabled; + int resched; + int cpu; + int pc; + + pc = preempt_count(); + resched = ftrace_preempt_disable(); + cpu = raw_smp_processor_id(); + disabled = atomic_inc_return(&per_cpu(test_event_disable, cpu)); + + if (disabled != 1) + goto out; + + local_save_flags(flags); + + event = trace_current_buffer_lock_reserve(TRACE_FN, sizeof(*entry), + flags, pc); + if (!event) + goto out; + entry = ring_buffer_event_data(event); + entry->ip = ip; + entry->parent_ip = parent_ip; + + trace_current_buffer_unlock_commit(event, flags, pc); + + out: + atomic_dec(&per_cpu(test_event_disable, cpu)); + ftrace_preempt_enable(resched); +} + +static struct ftrace_ops trace_ops __initdata = +{ + .func = function_test_events_call, +}; + +static __init void event_trace_self_test_with_function(void) +{ + register_ftrace_function(&trace_ops); + pr_info("Running tests again, along with the function tracer\n"); + event_trace_self_tests(); + unregister_ftrace_function(&trace_ops); +} +#else +static __init void event_trace_self_test_with_function(void) +{ +} +#endif + +static __init int event_trace_self_tests_init(void) +{ + + event_trace_self_tests(); + + event_trace_self_test_with_function(); return 0; } -- cgit v0.10.2 From 76aa81118ddfbb3dc31533030cf3ec329dd067a6 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 16 Apr 2009 23:35:39 -0700 Subject: tracing: avoid warnings from zero-arg tracepoints Tracepoints with no arguments can issue two warnings: "field" defined by not used "ret" is uninitialized in this function Mark field as being OK to leave unused, and initialize ret. [ Impact: fix false positive compiler warnings. ] Signed-off-by: Jeremy Fitzhardinge Acked-by: Steven Rostedt Cc: mathieu.desnoyers@polymtl.ca LKML-Reference: <1239950139-1119-5-git-send-email-jeremy@goop.org> Signed-off-by: Ingo Molnar diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 60c5323..39a3351 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -160,8 +160,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ static int \ ftrace_format_##call(struct trace_seq *s) \ { \ - struct ftrace_raw_##call field; \ - int ret; \ + struct ftrace_raw_##call field __attribute__((unused)); \ + int ret = 0; \ \ tstruct; \ \ -- cgit v0.10.2 From 339ae5d3c3fc2025e3657637921495fd600027c7 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 17 Apr 2009 10:34:30 +0800 Subject: tracing: fix file mode of trace and README trace is read-write and README is read-only. [ Impact: fix /debug/tracing/ file permissions. ] Signed-off-by: Li Zefan Acked-by: Frederic Weisbecker Acked-by: Steven Rostedt LKML-Reference: <49E7EAB6.4070605@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 031c46f..f681f64 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4002,7 +4002,7 @@ static __init int tracer_init_debugfs(void) trace_create_file("available_tracers", 0444, d_tracer, &global_trace, &show_traces_fops); - trace_create_file("current_tracer", 0444, d_tracer, + trace_create_file("current_tracer", 0644, d_tracer, &global_trace, &set_tracer_fops); trace_create_file("tracing_max_latency", 0644, d_tracer, @@ -4011,7 +4011,7 @@ static __init int tracer_init_debugfs(void) trace_create_file("tracing_thresh", 0644, d_tracer, &tracing_thresh, &tracing_max_lat_fops); - trace_create_file("README", 0644, d_tracer, + trace_create_file("README", 0444, d_tracer, NULL, &tracing_readme_fops); trace_create_file("trace_pipe", 0444, d_tracer, -- cgit v0.10.2 From 9b94b3a19b13e094c10f65f24bc358f6ffe4eacd Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Fri, 17 Apr 2009 12:07:46 +0200 Subject: x86: fixup numa_node information for AMD CPU northbridge functions Currently the numa_node attribute for these PCI devices is 0 (it corresponds to the numa_node for PCI bus 0). This is not a big issue but incorrect. This inconsistency can be fixed by reading the node number from CPU NB function 0. [ Impact: fill in dev->numa_node information, to optimize DMA allocations ] Signed-off-by: Andreas Herrmann Cc: jbarnes@virtuousgeek.org LKML-Reference: <20090417100746.GG16198@alberich.amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index e95022e..94ad0c0 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -493,5 +493,42 @@ void force_hpet_resume(void) break; } } +#endif + +#if defined(CONFIG_PCI) && defined(CONFIG_NUMA) +/* Set correct numa_node information for AMD NB functions */ +static void __init quirk_amd_nb_node(struct pci_dev *dev) +{ + struct pci_dev *nb_ht; + unsigned int devfn; + u32 val; + + devfn = PCI_DEVFN(PCI_SLOT(dev->devfn), 0); + nb_ht = pci_get_slot(dev->bus, devfn); + if (!nb_ht) + return; + + pci_read_config_dword(nb_ht, 0x60, &val); + set_dev_node(&dev->dev, val & 7); + pci_dev_put(dev); +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, + quirk_amd_nb_node); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP, + quirk_amd_nb_node); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MEMCTL, + quirk_amd_nb_node); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC, + quirk_amd_nb_node); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_HT, + quirk_amd_nb_node); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MAP, + quirk_amd_nb_node); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_DRAM, + quirk_amd_nb_node); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC, + quirk_amd_nb_node); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_LINK, + quirk_amd_nb_node); #endif -- cgit v0.10.2 From 46de405f25f1d9fa73b657ffbb752aa0cc87a91d Mon Sep 17 00:00:00 2001 From: Zhaolei Date: Fri, 17 Apr 2009 10:53:43 +0800 Subject: tracing: Remove include/trace/kmem_event_types.h kmem_event_types.h is no longer necessary since tracepoint definitions are put into include/trace/events/kmem.h [ Impact: remove now-unused file. ] Signed-off-by: Zhao Lei Acked-by: Steven Rostedt Cc: Frederic Weisbecker Cc: Tom Zanussi LKML-Reference: <49E7EF37.2080205@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/include/trace/kmem_event_types.h b/include/trace/kmem_event_types.h deleted file mode 100644 index 4ff420f..0000000 --- a/include/trace/kmem_event_types.h +++ /dev/null @@ -1,193 +0,0 @@ - -/* use instead */ -#ifndef TRACE_EVENT -# error Do not include this file directly. -# error Unless you know what you are doing. -#endif - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM kmem - -TRACE_EVENT(kmalloc, - - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), - - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - ), - - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - __entry->gfp_flags) -); - -TRACE_EVENT(kmem_cache_alloc, - - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), - - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - ), - - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - __entry->gfp_flags) -); - -TRACE_EVENT(kmalloc_node, - - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), - - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - __field( int, node ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - __entry->node = node; - ), - - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - __entry->gfp_flags, - __entry->node) -); - -TRACE_EVENT(kmem_cache_alloc_node, - - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), - - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - __field( int, node ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - __entry->node = node; - ), - - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%08x node=%d", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - __entry->gfp_flags, - __entry->node) -); - -TRACE_EVENT(kfree, - - TP_PROTO(unsigned long call_site, const void *ptr), - - TP_ARGS(call_site, ptr), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - ), - - TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) -); - -TRACE_EVENT(kmem_cache_free, - - TP_PROTO(unsigned long call_site, const void *ptr), - - TP_ARGS(call_site, ptr), - - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - ), - - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - ), - - TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) -); - -#undef TRACE_SYSTEM -- cgit v0.10.2 From ac1adc55fc71c7515caa2eb0e63e49b3d1c6a47c Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Fri, 17 Apr 2009 00:27:08 -0500 Subject: tracing/filters: add filter_mutex to protect filter predicates This patch adds a filter_mutex to prevent the filter predicates from being accessed concurrently by various external functions. It's based on a previous patch by Li Zefan: "[PATCH 7/7] tracing/filters: make filter preds RCU safe" v2 changes: - fixed wrong value returned in a add_subsystem_pred() failure case noticed by Li Zefan. [ Impact: fix trace filter corruption/crashes on parallel access ] Signed-off-by: Tom Zanussi Reviewed-by: Li Zefan Tested-by: Li Zefan Cc: Frederic Weisbecker Cc: Steven Rostedt Cc: paulmck@linux.vnet.ibm.com LKML-Reference: <1239946028.6639.13.camel@tropicana> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 8817c18..247948e 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -757,13 +757,15 @@ struct filter_pred { }; extern void filter_free_pred(struct filter_pred *pred); -extern void filter_print_preds(struct filter_pred **preds, int n_preds, +extern void filter_print_preds(struct ftrace_event_call *call, struct trace_seq *s); extern int filter_parse(char **pbuf, struct filter_pred *pred); extern int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred); extern void filter_disable_preds(struct ftrace_event_call *call); extern void filter_free_subsystem_preds(struct event_subsystem *system); +extern void filter_print_subsystem_preds(struct event_subsystem *system, + struct trace_seq *s); extern int filter_add_subsystem_pred(struct event_subsystem *system, struct filter_pred *pred); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 1137f95..64f9d6d 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -488,7 +488,7 @@ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt, trace_seq_init(s); - filter_print_preds(call->preds, call->n_preds, s); + filter_print_preds(call, s); r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); kfree(s); @@ -558,7 +558,7 @@ subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt, trace_seq_init(s); - filter_print_preds(system->preds, system->n_preds, s); + filter_print_subsystem_preds(system, s); r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); kfree(s); diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index f8e5eab..e0fcfd2 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -22,10 +22,13 @@ #include #include #include +#include #include "trace.h" #include "trace_output.h" +static DEFINE_MUTEX(filter_mutex); + static int filter_pred_64(struct filter_pred *pred, void *event) { u64 *addr = (u64 *)(event + pred->offset); @@ -112,8 +115,8 @@ int filter_match_preds(struct ftrace_event_call *call, void *rec) } EXPORT_SYMBOL_GPL(filter_match_preds); -void filter_print_preds(struct filter_pred **preds, int n_preds, - struct trace_seq *s) +static void __filter_print_preds(struct filter_pred **preds, int n_preds, + struct trace_seq *s) { char *field_name; struct filter_pred *pred; @@ -138,6 +141,21 @@ void filter_print_preds(struct filter_pred **preds, int n_preds, } } +void filter_print_preds(struct ftrace_event_call *call, struct trace_seq *s) +{ + mutex_lock(&filter_mutex); + __filter_print_preds(call->preds, call->n_preds, s); + mutex_unlock(&filter_mutex); +} + +void filter_print_subsystem_preds(struct event_subsystem *system, + struct trace_seq *s) +{ + mutex_lock(&filter_mutex); + __filter_print_preds(system->preds, system->n_preds, s); + mutex_unlock(&filter_mutex); +} + static struct ftrace_event_field * find_event_field(struct ftrace_event_call *call, char *name) { @@ -180,7 +198,7 @@ static int filter_set_pred(struct filter_pred *dest, return 0; } -void filter_disable_preds(struct ftrace_event_call *call) +static void __filter_disable_preds(struct ftrace_event_call *call) { int i; @@ -190,6 +208,13 @@ void filter_disable_preds(struct ftrace_event_call *call) call->preds[i]->fn = filter_pred_none; } +void filter_disable_preds(struct ftrace_event_call *call) +{ + mutex_lock(&filter_mutex); + __filter_disable_preds(call); + mutex_unlock(&filter_mutex); +} + int init_preds(struct ftrace_event_call *call) { struct filter_pred *pred; @@ -223,7 +248,7 @@ oom: } EXPORT_SYMBOL_GPL(init_preds); -void filter_free_subsystem_preds(struct event_subsystem *system) +static void __filter_free_subsystem_preds(struct event_subsystem *system) { struct ftrace_event_call *call; int i; @@ -241,18 +266,25 @@ void filter_free_subsystem_preds(struct event_subsystem *system) continue; if (!strcmp(call->system, system->name)) - filter_disable_preds(call); + __filter_disable_preds(call); } } -static int __filter_add_pred(struct ftrace_event_call *call, - struct filter_pred *pred, - filter_pred_fn_t fn) +void filter_free_subsystem_preds(struct event_subsystem *system) +{ + mutex_lock(&filter_mutex); + __filter_free_subsystem_preds(system); + mutex_unlock(&filter_mutex); +} + +static int filter_add_pred_fn(struct ftrace_event_call *call, + struct filter_pred *pred, + filter_pred_fn_t fn) { int idx, err; if (call->n_preds && !pred->compound) - filter_disable_preds(call); + __filter_disable_preds(call); if (call->n_preds == MAX_FILTER_PRED) return -ENOSPC; @@ -276,7 +308,8 @@ static int is_string_field(const char *type) return 0; } -int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred) +static int __filter_add_pred(struct ftrace_event_call *call, + struct filter_pred *pred) { struct ftrace_event_field *field; filter_pred_fn_t fn; @@ -293,7 +326,7 @@ int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred) return -EINVAL; fn = filter_pred_string; pred->str_len = field->size; - return __filter_add_pred(call, pred, fn); + return filter_add_pred_fn(call, pred, fn); } else { if (pred->str_len) return -EINVAL; @@ -316,7 +349,18 @@ int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred) return -EINVAL; } - return __filter_add_pred(call, pred, fn); + return filter_add_pred_fn(call, pred, fn); +} + +int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred) +{ + int err; + + mutex_lock(&filter_mutex); + err = __filter_add_pred(call, pred); + mutex_unlock(&filter_mutex); + + return err; } int filter_add_subsystem_pred(struct event_subsystem *system, @@ -324,20 +368,27 @@ int filter_add_subsystem_pred(struct event_subsystem *system, { struct ftrace_event_call *call; + mutex_lock(&filter_mutex); + if (system->n_preds && !pred->compound) - filter_free_subsystem_preds(system); + __filter_free_subsystem_preds(system); if (!system->n_preds) { system->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL); - if (!system->preds) + if (!system->preds) { + mutex_unlock(&filter_mutex); return -ENOMEM; + } } - if (system->n_preds == MAX_FILTER_PRED) + if (system->n_preds == MAX_FILTER_PRED) { + mutex_unlock(&filter_mutex); return -ENOSPC; + } system->preds[system->n_preds] = pred; + system->n_preds++; list_for_each_entry(call, &ftrace_events, list) { int err; @@ -348,17 +399,16 @@ int filter_add_subsystem_pred(struct event_subsystem *system, if (strcmp(call->system, system->name)) continue; - if (!find_event_field(call, pred->field_name)) - continue; - - err = filter_add_pred(call, pred); + err = __filter_add_pred(call, pred); if (err == -ENOMEM) { system->preds[system->n_preds] = NULL; + system->n_preds--; + mutex_unlock(&filter_mutex); return err; } } - system->n_preds++; + mutex_unlock(&filter_mutex); return 0; } -- cgit v0.10.2 From b0afdc126d0515e76890f0a5f26b28501cfa298e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 17 Apr 2009 13:02:22 -0400 Subject: tracing/events: enable code with EVENT_TRACING not EVENT_TRACER The CONFIG_EVENT_TRACER is the way to turn on event tracing when no other tracing has been configured. All code to get enabled should depend on CONFIG_EVENT_TRACING. That is what is enabled when TRACING (or CONFIG_EVENT_TRACER) is selected. This patch enables the include/trace/ftrace.h file when CONFIG_EVENT_TRACING is enabled. [ Impact: fix warning in event tracer selftest ] Signed-off-by: Steven Rostedt diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index 1886941..7f1f23d 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -56,7 +56,7 @@ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -#ifdef CONFIG_EVENT_TRACER +#ifdef CONFIG_EVENT_TRACING #include #endif -- cgit v0.10.2 From 12acd473d45cf2e40de3782cb2de712e5cd4d715 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 17 Apr 2009 16:01:56 -0400 Subject: tracing: add EXPORT_SYMBOL_GPL for trace commits Not all the necessary symbols were exported to allow for tracing by modules. This patch adds them in. [ Impact: allow modules to commit data to the ring buffer ] Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f681f64..183d788 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -894,18 +894,20 @@ void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, { __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1); } +EXPORT_SYMBOL(trace_current_buffer_unlock_commit); void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event, unsigned long flags, int pc) { __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0); } +EXPORT_SYMBOL(trace_nowake_buffer_unlock_commit); void trace_current_buffer_discard_commit(struct ring_buffer_event *event) { ring_buffer_discard_commit(global_trace.buffer, event); } -EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit); +EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit); void trace_function(struct trace_array *tr, -- cgit v0.10.2 From 261842b7c9099f56de2eb969c8ad65402d68e00e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 16 Apr 2009 21:41:52 -0400 Subject: tracing: add same level recursion detection The tracing infrastructure allows for recursion. That is, an interrupt may interrupt the act of tracing an event, and that interrupt may very well perform its own trace. This is a recursive trace, and is fine to do. The problem arises when there is a bug, and the utility doing the trace calls something that recurses back into the tracer. This recursion is not caused by an external event like an interrupt, but by code that is not expected to recurse. The result could be a lockup. This patch adds a bitmask to the task structure that keeps track of the trace recursion. To find the interrupt depth, the following algorithm is used: level = hardirq_count() + softirq_count() + in_nmi; Here, level will be the depth of interrutps and softirqs, and even handles the nmi. Then the corresponding bit is set in the recursion bitmask. If the bit was already set, we know we had a recursion at the same level and we warn about it and fail the writing to the buffer. After the data has been committed to the buffer, we clear the bit. No atomics are needed. The only races are with interrupts and they reset the bitmask before returning anywy. [ Impact: detect same irq level trace recursion ] Signed-off-by: Steven Rostedt diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 97c83e1..39b95c5 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -488,8 +488,15 @@ static inline int test_tsk_trace_graph(struct task_struct *tsk) extern int ftrace_dump_on_oops; +#ifdef CONFIG_PREEMPT +#define INIT_TRACE_RECURSION .trace_recursion = 0, +#endif + #endif /* CONFIG_TRACING */ +#ifndef INIT_TRACE_RECURSION +#define INIT_TRACE_RECURSION +#endif #ifdef CONFIG_HW_BRANCH_TRACER diff --git a/include/linux/init_task.h b/include/linux/init_task.h index dcfb933..6fc2185 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -187,6 +187,7 @@ extern struct cred init_cred; INIT_TRACE_IRQFLAGS \ INIT_LOCKDEP \ INIT_FTRACE_GRAPH \ + INIT_TRACE_RECURSION \ } diff --git a/include/linux/sched.h b/include/linux/sched.h index b4c38bc..7ede5e4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1428,7 +1428,9 @@ struct task_struct { #ifdef CONFIG_TRACING /* state flags for use by tracers */ unsigned long trace; -#endif + /* bitmask of trace recursion */ + unsigned long trace_recursion; +#endif /* CONFIG_TRACING */ }; /* Future-safe accessor for struct task_struct's cpus_allowed. */ diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 84a6055..b421b0e 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1481,6 +1481,40 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, return event; } +static int trace_irq_level(void) +{ + return hardirq_count() + softirq_count() + in_nmi(); +} + +static int trace_recursive_lock(void) +{ + int level; + + level = trace_irq_level(); + + if (unlikely(current->trace_recursion & (1 << level))) { + /* Disable all tracing before we do anything else */ + tracing_off_permanent(); + WARN_ON_ONCE(1); + return -1; + } + + current->trace_recursion |= 1 << level; + + return 0; +} + +static void trace_recursive_unlock(void) +{ + int level; + + level = trace_irq_level(); + + WARN_ON_ONCE(!current->trace_recursion & (1 << level)); + + current->trace_recursion &= ~(1 << level); +} + static DEFINE_PER_CPU(int, rb_need_resched); /** @@ -1514,6 +1548,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) /* If we are tracing schedule, we don't want to recurse */ resched = ftrace_preempt_disable(); + if (trace_recursive_lock()) + goto out_nocheck; + cpu = raw_smp_processor_id(); if (!cpumask_test_cpu(cpu, buffer->cpumask)) @@ -1543,6 +1580,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) return event; out: + trace_recursive_unlock(); + + out_nocheck: ftrace_preempt_enable(resched); return NULL; } @@ -1581,6 +1621,8 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer, rb_commit(cpu_buffer, event); + trace_recursive_unlock(); + /* * Only the last preempt count needs to restore preemption. */ -- cgit v0.10.2 From 3189cdb31622f4e40688ce5a6fc5d940b42bc805 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 17 Apr 2009 16:13:55 -0400 Subject: tracing: protect trace_printk from recursion trace_printk can be called from any context, including NMIs. If this happens, then we must test for for recursion before grabbing any spinlocks. This patch prevents trace_printk from being called recursively. [ Impact: prevent hard lockup in lockdep event tracer ] Cc: Peter Zijlstra Cc: Frederic Weisbecker Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 183d788..b9a3adc 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1259,6 +1259,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) struct trace_array_cpu *data; struct bprint_entry *entry; unsigned long flags; + int disable; int resched; int cpu, len = 0, size, pc; @@ -1273,7 +1274,8 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) cpu = raw_smp_processor_id(); data = tr->data[cpu]; - if (unlikely(atomic_read(&data->disabled))) + disable = atomic_inc_return(&data->disabled); + if (unlikely(disable != 1)) goto out; /* Lockdep uses trace_printk for lock tracing */ @@ -1301,6 +1303,7 @@ out_unlock: local_irq_restore(flags); out: + atomic_dec_return(&data->disabled); ftrace_preempt_enable(resched); unpause_graph_tracing(); @@ -1320,6 +1323,7 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) int cpu, len = 0, size, pc; struct print_entry *entry; unsigned long irq_flags; + int disable; if (tracing_disabled || tracing_selftest_running) return 0; @@ -1329,7 +1333,8 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) cpu = raw_smp_processor_id(); data = tr->data[cpu]; - if (unlikely(atomic_read(&data->disabled))) + disable = atomic_inc_return(&data->disabled); + if (unlikely(disable != 1)) goto out; pause_graph_tracing(); @@ -1357,6 +1362,7 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args) raw_local_irq_restore(irq_flags); unpause_graph_tracing(); out: + atomic_dec_return(&data->disabled); preempt_enable_notrace(); return len; -- cgit v0.10.2 From 8e668b5b3455207e4540fc7ccab9ecf70142f288 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 17 Apr 2009 17:17:55 -0400 Subject: tracing: remove format attribute of inline function Due to a cut and paste error, I added the gcc attribute for printf format to the static inline stub of trace_seq_printf. This will cause a compile failure. [ Impact: fix compiler error when CONFIG_TRACING is off ] Reported-by: Ingo Molnar Signed-off-by: Steven Rostedt Cc: Andrew Morton Cc: =?ISO-8859-15?Q?Fr=E9d=E9ric_Weisbecker?= LKML-Reference: Signed-off-by: Ingo Molnar diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index 15ca2c7..37db9bd 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -42,7 +42,6 @@ extern int trace_seq_path(struct trace_seq *s, struct path *path); #else /* CONFIG_TRACING */ static inline int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))) { return 0; } -- cgit v0.10.2 From 6f41469c627fe118121dfce2a8134ad24da3df28 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 07:00:41 +0900 Subject: block: clear req->errors on bio completion only for fs requests Impact: subtle behavior change For fs requests, rq is only carrier of bios and rq error status as a whole doesn't mean much. This is the reason why rq->errors is being cleared on each partial completion of a request as on each partial completion the error status is transferred to the respective bios. For pc requests, rq->errors is used to carry error status to the issuer and thus __end_that_request_first() doesn't clear it on such cases. The condition was fine till now as only fs and pc requests have used bio and thus the bio completion path. However, future changes will unify data accesses to bio and all non fs users care about rq error status. Clear rq->errors on bio completion only for fs requests. In general, the implicit clearing is a bit too subtle especially as the meaning of rq->errors is completely dependent on low level drivers. Unifying / cleaning up rq->errors usage and letting llds manage it would be better. TODO comment added. Signed-off-by: Tejun Heo Acked-by: Jens Axboe diff --git a/block/blk-core.c b/block/blk-core.c index 07ab754..cce7a88 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1739,10 +1739,14 @@ static int __end_that_request_first(struct request *req, int error, trace_block_rq_complete(req->q, req); /* - * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual - * sense key with us all the way through + * For fs requests, rq is just carrier of independent bio's + * and each partial completion should be handled separately. + * Reset per-request error on each partial completion. + * + * TODO: tj: This is too subtle. It would be better to let + * low level drivers do what they see fit. */ - if (!blk_pc_request(req)) + if (blk_fs_request(req)) req->errors = 0; if (error && (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))) { -- cgit v0.10.2 From 1e75540ec5202cae63cd238c86bd880e3d496546 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 07:00:41 +0900 Subject: ide-tape: remove back-to-back REQUEST_SENSE detection Impact: fix an oops which always triggers ide_tape_issue_pc() assumed drive->pc isn't NULL on invocation when checking for back-to-back request sense issues but drive->pc can be NULL and even when it's not NULL, it's not safe to dereference it once the previous command is complete because pc could have been freed or was on stack. Kill back-to-back REQUEST_SENSE detection. Signed-off-by: Tejun Heo diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index cb942a9..3a53e08 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -614,12 +614,6 @@ static ide_startstop_t ide_tape_issue_pc(ide_drive_t *drive, { idetape_tape_t *tape = drive->driver_data; - if (drive->pc->c[0] == REQUEST_SENSE && - pc->c[0] == REQUEST_SENSE) { - printk(KERN_ERR "ide-tape: possible ide-tape.c bug - " - "Two request sense in serial were issued\n"); - } - if (drive->failed_pc == NULL && pc->c[0] != REQUEST_SENSE) drive->failed_pc = pc; -- cgit v0.10.2 From 853280a4dc8e3cc97ff10c1c02234d96078f437b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 07:00:41 +0900 Subject: ide: use blk_run_queue() instead of blk_start_queueing() blk_start_queueing() is being phased out in favor of [__]blk_run_queue(). Switch. Signed-off-by: Tejun Heo diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c index 310d03f..a914023 100644 --- a/drivers/ide/ide-park.c +++ b/drivers/ide/ide-park.c @@ -24,11 +24,8 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout) start_queue = 1; spin_unlock_irq(&hwif->lock); - if (start_queue) { - spin_lock_irq(q->queue_lock); - blk_start_queueing(q); - spin_unlock_irq(q->queue_lock); - } + if (start_queue) + blk_run_queue(q); return; } spin_unlock_irq(&hwif->lock); -- cgit v0.10.2 From 55f3f399422a4a3f6cb84ea4096dfaddf8817399 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 07:00:41 +0900 Subject: ide: don't set REQ_SOFTBARRIER ide doesn't have to worry about REQ_SOFTBARRIER. Don't set it. Signed-off-by: Tejun Heo diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index a9fbe2c..c243880 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -411,7 +411,6 @@ static void idedisk_prepare_flush(struct request_queue *q, struct request *rq) cmd->protocol = ATA_PROT_NODATA; rq->cmd_type = REQ_TYPE_ATA_TASKFILE; - rq->cmd_flags |= REQ_SOFTBARRIER; rq->special = cmd; } diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c index c1c25eb..5991b23 100644 --- a/drivers/ide/ide-ioctls.c +++ b/drivers/ide/ide-ioctls.c @@ -231,7 +231,6 @@ static int generic_drive_reset(ide_drive_t *drive) rq->cmd_type = REQ_TYPE_SPECIAL; rq->cmd_len = 1; rq->cmd[0] = REQ_DRIVE_RESET; - rq->cmd_flags |= REQ_SOFTBARRIER; if (blk_execute_rq(drive->queue, NULL, rq, 1)) ret = rq->errors; blk_put_request(rq); -- cgit v0.10.2 From 46a802e852c2106d755f697819ea80cd3ffdc222 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 07:00:41 +0900 Subject: ide kill unused ide_cmd->special Impact: removal of unused field No one uses ide_cmd->special anymore. Kill it. Signed-off-by: Tejun Heo diff --git a/include/linux/ide.h b/include/linux/ide.h index ff65fff..846a1e1 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -324,7 +324,6 @@ struct ide_cmd { unsigned int cursg_ofs; struct request *rq; /* copy of request */ - void *special; /* valid_t generally */ }; /* ATAPI packet command flags */ -- cgit v0.10.2 From 1873b90cdea038715ec7140fccc2116fb930ffb5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 07:00:41 +0900 Subject: ide-cd: clear sense buffer before issuing request sense Impact: code simplification ide_cd_request_sense_fixup() clears the tail of the sense buffer if the device didn't completely fill it. This patch makes cdrom_queue_request_sense() clear the sense buffer before issuing the command instead of clearing it afterwards. This simplifies code and eases future changes. Signed-off-by: Tejun Heo diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 3aec19d..509f129 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -217,6 +217,8 @@ static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense, if (sense == NULL) sense = &info->sense_data; + memset(sense, 0, 18); + /* stuff the sense request in front of our current request */ blk_rq_init(NULL, rq); rq->cmd_type = REQ_TYPE_ATA_PC; @@ -504,14 +506,8 @@ static void ide_cd_request_sense_fixup(ide_drive_t *drive, struct ide_cmd *cmd) * and some drives don't send them. Sigh. */ if (rq->cmd[0] == GPCMD_REQUEST_SENSE && - cmd->nleft > 0 && cmd->nleft <= 5) { - unsigned int ofs = cmd->nbytes - cmd->nleft; - - while (cmd->nleft > 0) { - *((u8 *)rq->data + ofs++) = 0; - cmd->nleft--; - } - } + cmd->nleft > 0 && cmd->nleft <= 5) + cmd->nleft = 0; } int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd, -- cgit v0.10.2 From 7f006dc24fae158131116c9472874f12e16cf040 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 07:00:42 +0900 Subject: ide-floppy: block pc always uses bio Impact: remove unnecessary code path Block pc requests always use bio and rq->data is always NULL. No need to worry about !rq->bio cases in idefloppy_block_pc_cmd(). Note that ide-atapi uses ide_pio_bytes() for bio PIO transfer which handle sg fine. Signed-off-by: Tejun Heo Cc: Jens Axboe diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index 2b4868d..3b22e06 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -216,15 +216,13 @@ static void idefloppy_blockpc_cmd(struct ide_disk_obj *floppy, ide_init_pc(pc); memcpy(pc->c, rq->cmd, sizeof(pc->c)); pc->rq = rq; - if (rq->data_len && rq_data_dir(rq) == WRITE) - pc->flags |= PC_FLAG_WRITING; - pc->buf = rq->data; - if (rq->bio) + if (rq->data_len) { pc->flags |= PC_FLAG_DMA_OK; - /* - * possibly problematic, doesn't look like ide-floppy correctly - * handled scattered requests if dma fails... - */ + if (rq_data_dir(rq) == WRITE) + pc->flags |= PC_FLAG_WRITING; + } + /* pio will be performed by ide_pio_bytes() which handles sg fine */ + pc->buf = NULL; pc->req_xfer = pc->buf_size = rq->data_len; } -- cgit v0.10.2 From eace4cb04c0edc9388e987bf9bbdef461f6daca4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 07:00:42 +0900 Subject: ide-taskfile: don't abuse rq->buffer Impact: rq->buffer usage cleanup ide_raw_taskfile() directly uses rq->buffer to carry pointer to the data buffer. This complicates both block interface and ide backend request handling. Use blk_rq_map_kern() instead and drop special handling for REQ_TYPE_ATA_TASKFILE from ide_map_sg(). Note that REQ_RW setting is moved upwards as blk_rq_map_kern() uses it to initialize bio rw flag. Signed-off-by: Tejun Heo Cc: Jens Axboe diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 35dc38d..9b9e8b1 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -248,10 +248,7 @@ void ide_map_sg(ide_drive_t *drive, struct ide_cmd *cmd) struct scatterlist *sg = hwif->sg_table; struct request *rq = cmd->rq; - if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) { - sg_init_one(sg, rq->buffer, rq->nr_sectors * SECTOR_SIZE); - cmd->sg_nents = 1; - } else if (!rq->bio) { + if (!rq->bio) { sg_init_one(sg, rq->data, rq->data_len); cmd->sg_nents = 1; } else diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index 4aa6223..f400eb4 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -424,7 +424,9 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf, rq = blk_get_request(drive->queue, READ, __GFP_WAIT); rq->cmd_type = REQ_TYPE_ATA_TASKFILE; - rq->buffer = buf; + + if (cmd->tf_flags & IDE_TFLAG_WRITE) + rq->cmd_flags |= REQ_RW; /* * (ks) We transfer currently only whole sectors. @@ -432,18 +434,20 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf, * if we would find a solution to transfer any size. * To support special commands like READ LONG. */ - rq->hard_nr_sectors = rq->nr_sectors = nsect; - rq->hard_cur_sectors = rq->current_nr_sectors = nsect; - - if (cmd->tf_flags & IDE_TFLAG_WRITE) - rq->cmd_flags |= REQ_RW; + if (nsect) { + error = blk_rq_map_kern(drive->queue, rq, buf, + nsect * SECTOR_SIZE, __GFP_WAIT); + if (error) + goto put_req; + } rq->special = cmd; cmd->rq = rq; error = blk_execute_rq(drive->queue, NULL, rq, 0); - blk_put_request(rq); +put_req: + blk_put_request(rq); return error; } -- cgit v0.10.2 From c267cc1c4db4ccb3406d045a8da8660f0bbfe08d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 07:00:42 +0900 Subject: ide-atapi: don't abuse rq->buffer Impact: rq->buffer usage cleanup ide-atapi uses rq->buffer as private opaque value for internal special requests. rq->special isn't used for these cases (the only case where rq->special is used is for ide-tape rw requests). Use rq->special instead. Signed-off-by: Tejun Heo Cc: Jens Axboe diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index 7201b17..2894577 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -90,7 +90,7 @@ static void ide_queue_pc_head(ide_drive_t *drive, struct gendisk *disk, blk_rq_init(NULL, rq); rq->cmd_type = REQ_TYPE_SPECIAL; rq->cmd_flags |= REQ_PREEMPT; - rq->buffer = (char *)pc; + rq->special = (char *)pc; rq->rq_disk = disk; if (pc->req_xfer) { @@ -119,7 +119,7 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk, rq = blk_get_request(drive->queue, READ, __GFP_WAIT); rq->cmd_type = REQ_TYPE_SPECIAL; - rq->buffer = (char *)pc; + rq->special = (char *)pc; if (pc->req_xfer) { rq->data = pc->buf; diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index 3b22e06..9460033 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -264,7 +264,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, pc = &floppy->queued_pc; idefloppy_create_rw_cmd(drive, pc, rq, (unsigned long)block); } else if (blk_special_request(rq)) { - pc = (struct ide_atapi_pc *) rq->buffer; + pc = (struct ide_atapi_pc *)rq->special; } else if (blk_pc_request(rq)) { pc = &floppy->queued_pc; idefloppy_blockpc_cmd(floppy, pc, rq); diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 3a53e08..aadf53c 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -828,7 +828,7 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, goto out; } if (rq->cmd[13] & REQ_IDETAPE_PC1) { - pc = (struct ide_atapi_pc *) rq->buffer; + pc = (struct ide_atapi_pc *)rq->special; rq->cmd[13] &= ~(REQ_IDETAPE_PC1); rq->cmd[13] |= REQ_IDETAPE_PC2; goto out; -- cgit v0.10.2 From cbfd082abfcbed8c57a12636f36e9bead8d6cfc6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 07:00:42 +0900 Subject: ide-cd: don't abuse rq->buffer Impact: rq->buffer usage cleanup ide-cd uses rq->buffer to carry pointer to the original request when issuing REQUEST_SENSE. Use rq->special instead. Signed-off-by: Tejun Heo Cc: Jens Axboe diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 509f129..eb3c299 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -232,8 +232,8 @@ static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense, rq->cmd_type = REQ_TYPE_SENSE; rq->cmd_flags |= REQ_PREEMPT; - /* NOTE! Save the failed command in "rq->buffer" */ - rq->buffer = (void *) failed_command; + /* NOTE! Save the failed command in "rq->special" */ + rq->special = (void *)failed_command; if (failed_command) ide_debug_log(IDE_DBG_SENSE, "failed_cmd: 0x%x", @@ -247,10 +247,10 @@ static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense, static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq) { /* - * For REQ_TYPE_SENSE, "rq->buffer" points to the original + * For REQ_TYPE_SENSE, "rq->special" points to the original * failed request */ - struct request *failed = (struct request *)rq->buffer; + struct request *failed = (struct request *)rq->special; struct cdrom_info *info = drive->driver_data; void *sense = &info->sense_data; -- cgit v0.10.2 From a1df5169f9bf08f6067029bfb840a05e282b1b97 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 19 Apr 2009 07:00:42 +0900 Subject: ide: add helpers for preparing sense requests This is in preparation of removing the queueing of a sense request out of the IRQ handler path. Use struct request_sense as a general sense buffer for all ATAPI devices ide-{floppy,tape,cd}. tj: * blk_get_request(__GFP_WAIT) can't be called from do_request() as it can cause deadlock. Converted to use inline struct request and blk_rq_init(). * Added xfer / cdb len selection depending on device type. * All sense prep logics folded into ide_prep_sense() which never fails. * hwif->rq clearing and sense_rq used handling moved into ide_queue_sense_rq(). * blk_rq_map_kern() conversion is moved to later patch. CC: Bartlomiej Zolnierkiewicz CC: FUJITA Tomonori Signed-off-by: Borislav Petkov Signed-off-by: Tejun Heo diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index 2894577..c6e0348 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -191,6 +191,67 @@ void ide_create_request_sense_cmd(ide_drive_t *drive, struct ide_atapi_pc *pc) } EXPORT_SYMBOL_GPL(ide_create_request_sense_cmd); +void ide_prep_sense(ide_drive_t *drive, struct request *rq) +{ + struct request_sense *sense = &drive->sense_data; + struct request *sense_rq = &drive->sense_rq; + unsigned int cmd_len, sense_len; + + debug_log("%s: enter\n", __func__); + + switch (drive->media) { + case ide_floppy: + cmd_len = 255; + sense_len = 18; + break; + case ide_tape: + cmd_len = 20; + sense_len = 20; + break; + default: + cmd_len = 18; + sense_len = 18; + } + + BUG_ON(sense_len > sizeof(*sense)); + + if (blk_sense_request(rq) || drive->sense_rq_armed) + return; + + memset(sense, 0, sizeof(*sense)); + + blk_rq_init(rq->q, sense_rq); + sense_rq->rq_disk = rq->rq_disk; + + sense_rq->data = sense; + sense_rq->cmd[0] = GPCMD_REQUEST_SENSE; + sense_rq->cmd[4] = cmd_len; + sense_rq->data_len = sense_len; + + sense_rq->cmd_type = REQ_TYPE_SENSE; + sense_rq->cmd_flags |= REQ_PREEMPT; + + if (drive->media == ide_tape) + sense_rq->cmd[13] = REQ_IDETAPE_PC1; + + drive->sense_rq_armed = true; +} +EXPORT_SYMBOL_GPL(ide_prep_sense); + +void ide_queue_sense_rq(ide_drive_t *drive, void *special) +{ + BUG_ON(!drive->sense_rq_armed); + + drive->sense_rq.special = special; + drive->sense_rq_armed = false; + + drive->hwif->rq = NULL; + + elv_add_request(drive->queue, &drive->sense_rq, + ELEVATOR_INSERT_FRONT, 0); +} +EXPORT_SYMBOL_GPL(ide_queue_sense_rq); + /* * Called when an error was detected during the last packet command. * We queue a request sense packet command in the head of the request list. diff --git a/include/linux/ide.h b/include/linux/ide.h index 846a1e1..a69ccac 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -26,6 +26,9 @@ #include #include +/* for request_sense */ +#include + #if defined(CONFIG_CRIS) || defined(CONFIG_FRV) || defined(CONFIG_MN10300) # define SUPPORT_VLB_SYNC 0 #else @@ -602,6 +605,11 @@ struct ide_drive_s { struct ide_atapi_pc request_sense_pc; struct request request_sense_rq; + + /* current sense rq and buffer */ + bool sense_rq_armed; + struct request sense_rq; + struct request_sense sense_data; }; typedef struct ide_drive_s ide_drive_t; @@ -1175,6 +1183,9 @@ int ide_set_media_lock(ide_drive_t *, struct gendisk *, int); void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *); void ide_retry_pc(ide_drive_t *, struct gendisk *); +void ide_prep_sense(ide_drive_t *drive, struct request *rq); +void ide_queue_sense_rq(ide_drive_t *drive, void *special); + int ide_cd_expiry(ide_drive_t *); int ide_cd_get_xferlen(struct request *); -- cgit v0.10.2 From 746d5e43274e9ea6cbd58818afc9239d41fb4e1e Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 19 Apr 2009 07:00:42 +0900 Subject: ide-cd: convert to using generic sense request Preallocate a sense request in the ->do_request method and reinitialize it only on demand, in case it's been consumed in the IRQ handler path. The reason for this is that we don't want to be mapping rq to bio in the IRQ path and introduce all kinds of unnecessary hacks to the block layer. tj: * Both user and kernel PC requests expect sense data to be stored in separate storage other than drive->sense_data. Copy sense data to rq->sense on completion if rq->sense is not NULL. This fixes bogus sense data on PC requests. As a result, remove cdrom_queue_request_sense. CC: Bartlomiej Zolnierkiewicz CC: FUJITA Tomonori Signed-off-by: Borislav Petkov Signed-off-by: Tejun Heo diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index eb3c299..7b21c7e 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -206,44 +206,6 @@ static void cdrom_analyze_sense_data(ide_drive_t *drive, ide_cd_log_error(drive->name, failed_command, sense); } -static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense, - struct request *failed_command) -{ - struct cdrom_info *info = drive->driver_data; - struct request *rq = &drive->request_sense_rq; - - ide_debug_log(IDE_DBG_SENSE, "enter"); - - if (sense == NULL) - sense = &info->sense_data; - - memset(sense, 0, 18); - - /* stuff the sense request in front of our current request */ - blk_rq_init(NULL, rq); - rq->cmd_type = REQ_TYPE_ATA_PC; - rq->rq_disk = info->disk; - - rq->data = sense; - rq->cmd[0] = GPCMD_REQUEST_SENSE; - rq->cmd[4] = 18; - rq->data_len = 18; - - rq->cmd_type = REQ_TYPE_SENSE; - rq->cmd_flags |= REQ_PREEMPT; - - /* NOTE! Save the failed command in "rq->special" */ - rq->special = (void *)failed_command; - - if (failed_command) - ide_debug_log(IDE_DBG_SENSE, "failed_cmd: 0x%x", - failed_command->cmd[0]); - - drive->hwif->rq = NULL; - - elv_add_request(drive->queue, rq, ELEVATOR_INSERT_FRONT, 0); -} - static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq) { /* @@ -251,11 +213,16 @@ static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq) * failed request */ struct request *failed = (struct request *)rq->special; - struct cdrom_info *info = drive->driver_data; - void *sense = &info->sense_data; + struct request_sense *sense = &drive->sense_data; if (failed) { if (failed->sense) { + /* + * Sense is always read into drive->sense_data. + * Copy back if the failed request has its + * sense pointer set. + */ + memcpy(failed->sense, sense, 18); sense = failed->sense; failed->sense_len = rq->sense_len; } @@ -431,7 +398,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) /* if we got a CHECK_CONDITION status, queue a request sense command */ if (stat & ATA_ERR) - cdrom_queue_request_sense(drive, NULL, NULL); + ide_queue_sense_rq(drive, NULL); return 1; end_request: @@ -445,7 +412,7 @@ end_request: hwif->rq = NULL; - cdrom_queue_request_sense(drive, rq->sense, rq); + ide_queue_sense_rq(drive, rq); return 1; } else return 2; @@ -893,6 +860,9 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq, goto out_end; } + /* prepare sense request for this command */ + ide_prep_sense(drive, rq); + memset(&cmd, 0, sizeof(cmd)); if (rq_data_dir(rq)) diff --git a/drivers/ide/ide-cd.h b/drivers/ide/ide-cd.h index 1d97101..93a3cf1b 100644 --- a/drivers/ide/ide-cd.h +++ b/drivers/ide/ide-cd.h @@ -87,10 +87,6 @@ struct cdrom_info { struct atapi_toc *toc; - /* The result of the last successful request sense command - on this device. */ - struct request_sense sense_data; - u8 max_speed; /* Max speed of the drive. */ u8 current_speed; /* Current speed of the drive. */ -- cgit v0.10.2 From 6b544fcc8cd0a04eb42de9d1ecdd345e979d6ada Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 19 Apr 2009 07:00:42 +0900 Subject: ide-atapi: convert ide-{floppy,tape} to using preallocated sense buffer Since we're issuing REQ_TYPE_SENSE now we need to allow those types of rqs in the ->do_request callbacks. As a future improvement, sense_len assignment might be unified across all ATAPI devices. Borislav to check with specs and test. As a result, get rid of ide_queue_pc_head() and drive->request_sense_rq. tj: * Init request sense ide_atapi_pc from sense request. In the longer timer, it would probably better to fold ide_create_request_sense_cmd() into its only current user - ide_floppy_get_format_progress(). * ide_retry_pc() no longer takes @disk. CC: Bartlomiej Zolnierkiewicz CC: FUJITA Tomonori Signed-off-by: Borislav Petkov Signed-off-by: Tejun Heo diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index c6e0348..972c522 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -80,34 +80,6 @@ void ide_init_pc(struct ide_atapi_pc *pc) EXPORT_SYMBOL_GPL(ide_init_pc); /* - * Generate a new packet command request in front of the request queue, before - * the current request, so that it will be processed immediately, on the next - * pass through the driver. - */ -static void ide_queue_pc_head(ide_drive_t *drive, struct gendisk *disk, - struct ide_atapi_pc *pc, struct request *rq) -{ - blk_rq_init(NULL, rq); - rq->cmd_type = REQ_TYPE_SPECIAL; - rq->cmd_flags |= REQ_PREEMPT; - rq->special = (char *)pc; - rq->rq_disk = disk; - - if (pc->req_xfer) { - rq->data = pc->buf; - rq->data_len = pc->req_xfer; - } - - memcpy(rq->cmd, pc->c, 12); - if (drive->media == ide_tape) - rq->cmd[13] = REQ_IDETAPE_PC1; - - drive->hwif->rq = NULL; - - elv_add_request(drive->queue, rq, ELEVATOR_INSERT_FRONT, 0); -} - -/* * Add a special packet command request to the tail of the request queue, * and wait for it to be serviced. */ @@ -254,18 +226,26 @@ EXPORT_SYMBOL_GPL(ide_queue_sense_rq); /* * Called when an error was detected during the last packet command. - * We queue a request sense packet command in the head of the request list. + * We queue a request sense packet command at the head of the request + * queue. */ -void ide_retry_pc(ide_drive_t *drive, struct gendisk *disk) +void ide_retry_pc(ide_drive_t *drive) { - struct request *rq = &drive->request_sense_rq; + struct request *sense_rq = &drive->sense_rq; struct ide_atapi_pc *pc = &drive->request_sense_pc; (void)ide_read_error(drive); - ide_create_request_sense_cmd(drive, pc); + + /* init pc from sense_rq */ + ide_init_pc(pc); + memcpy(pc->c, sense_rq->cmd, 12); + pc->buf = sense_rq->data; + pc->req_xfer = sense_rq->data_len; + if (drive->media == ide_tape) set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags); - ide_queue_pc_head(drive, disk, pc, rq); + + ide_queue_sense_rq(drive, pc); } EXPORT_SYMBOL_GPL(ide_retry_pc); @@ -404,7 +384,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) debug_log("[cmd %x]: check condition\n", rq->cmd[0]); /* Retry operation */ - ide_retry_pc(drive, rq->rq_disk); + ide_retry_pc(drive); /* queued, but not started */ return ide_stopped; diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index 9460033..d3302cc 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -263,7 +263,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, } pc = &floppy->queued_pc; idefloppy_create_rw_cmd(drive, pc, rq, (unsigned long)block); - } else if (blk_special_request(rq)) { + } else if (blk_special_request(rq) || blk_sense_request(rq)) { pc = (struct ide_atapi_pc *)rq->special; } else if (blk_pc_request(rq)) { pc = &floppy->queued_pc; @@ -273,6 +273,8 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, goto out_end; } + ide_prep_sense(drive, rq); + memset(&cmd, 0, sizeof(cmd)); if (rq_data_dir(rq)) diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index aadf53c..8324dfa 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -695,7 +695,7 @@ static ide_startstop_t idetape_media_access_finished(ide_drive_t *drive) printk(KERN_ERR "ide-tape: %s: I/O error, ", tape->name); /* Retry operation */ - ide_retry_pc(drive, tape->disk); + ide_retry_pc(drive); return ide_stopped; } pc->error = 0; @@ -752,7 +752,7 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, (unsigned long long)rq->sector, rq->nr_sectors, rq->current_nr_sectors); - if (!blk_special_request(rq)) { + if (!(blk_special_request(rq) || blk_sense_request(rq))) { /* We do not support buffer cache originated requests. */ printk(KERN_NOTICE "ide-tape: %s: Unsupported request in " "request queue (%d)\n", drive->name, rq->cmd_type); @@ -840,6 +840,9 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, BUG(); out: + /* prepare sense request for this command */ + ide_prep_sense(drive, rq); + memset(&cmd, 0, sizeof(cmd)); if (rq_data_dir(rq)) diff --git a/include/linux/ide.h b/include/linux/ide.h index a69ccac..9e67cca 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -604,7 +604,6 @@ struct ide_drive_s { unsigned long atapi_flags; struct ide_atapi_pc request_sense_pc; - struct request request_sense_rq; /* current sense rq and buffer */ bool sense_rq_armed; @@ -1181,7 +1180,7 @@ int ide_do_test_unit_ready(ide_drive_t *, struct gendisk *); int ide_do_start_stop(ide_drive_t *, struct gendisk *, int); int ide_set_media_lock(ide_drive_t *, struct gendisk *, int); void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *); -void ide_retry_pc(ide_drive_t *, struct gendisk *); +void ide_retry_pc(ide_drive_t *drive); void ide_prep_sense(ide_drive_t *drive, struct request *rq); void ide_queue_sense_rq(ide_drive_t *drive, void *special); -- cgit v0.10.2 From 5c4be57249e2e09136446597d2fe2a967c6ffef0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 07:00:42 +0900 Subject: ide-cd,atapi: use bio for internal commands Impact: unify request data buffer handling rq->data is used mostly to pass kernel buffer through request queue without using bio. There are only a couple of places which still do this in kernel and converting to bio isn't difficult. This patch converts ide-cd and atapi to use bio instead of rq->data for request sense and internal pc commands. With previous change to unify sense request handling, this is relatively easily achieved by adding blk_rq_map_kern() during sense_rq prep and PC issue. If blk_rq_map_kern() fails for sense, the error is deferred till sense issue and aborts the failed command which triggered the sense. Note that this is a slim possibility as sense prep is done on each command issue, so for the above condition to actually trigger, all preps since the last sense issue till the issue of the request which would require a sense should fail. * do_request functions might sleep now. This should be okay as ide request_fn - do_ide_request() - is invoked only from make_request and plug work. Make sure this is the case by adding might_sleep() to do_ide_request(). * Functions which access the read sense data before the sense request is complete now should access bio_data(sense_rq->bio) as the sense buffer might have been copied during blk_rq_map_kern(). * ide-tape updated to map sg. * cdrom_do_block_pc() now doesn't have to deal with REQ_TYPE_ATA_PC special case. Simplified. * tp_ops->output/input_data path dropped from ide_pc_intr(). Signed-off-by: Tejun Heo diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index 972c522..5cefe12 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -94,16 +94,18 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk, rq->special = (char *)pc; if (pc->req_xfer) { - rq->data = pc->buf; - rq->data_len = pc->req_xfer; + error = blk_rq_map_kern(drive->queue, rq, pc->buf, pc->req_xfer, + GFP_NOIO); + if (error) + goto put_req; } memcpy(rq->cmd, pc->c, 12); if (drive->media == ide_tape) rq->cmd[13] = REQ_IDETAPE_PC1; error = blk_execute_rq(drive->queue, disk, rq, 0); +put_req: blk_put_request(rq); - return error; } EXPORT_SYMBOL_GPL(ide_queue_pc_tail); @@ -168,6 +170,7 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq) struct request_sense *sense = &drive->sense_data; struct request *sense_rq = &drive->sense_rq; unsigned int cmd_len, sense_len; + int err; debug_log("%s: enter\n", __func__); @@ -193,13 +196,19 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq) memset(sense, 0, sizeof(*sense)); blk_rq_init(rq->q, sense_rq); - sense_rq->rq_disk = rq->rq_disk; - sense_rq->data = sense; + err = blk_rq_map_kern(drive->queue, sense_rq, sense, sense_len, + GFP_NOIO); + if (unlikely(err)) { + if (printk_ratelimit()) + printk(KERN_WARNING "%s: failed to map sense buffer\n", + drive->name); + return; + } + + sense_rq->rq_disk = rq->rq_disk; sense_rq->cmd[0] = GPCMD_REQUEST_SENSE; sense_rq->cmd[4] = cmd_len; - sense_rq->data_len = sense_len; - sense_rq->cmd_type = REQ_TYPE_SENSE; sense_rq->cmd_flags |= REQ_PREEMPT; @@ -210,9 +219,14 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq) } EXPORT_SYMBOL_GPL(ide_prep_sense); -void ide_queue_sense_rq(ide_drive_t *drive, void *special) +int ide_queue_sense_rq(ide_drive_t *drive, void *special) { - BUG_ON(!drive->sense_rq_armed); + /* deferred failure from ide_prep_sense() */ + if (!drive->sense_rq_armed) { + printk(KERN_WARNING "%s: failed queue sense request\n", + drive->name); + return -ENOMEM; + } drive->sense_rq.special = special; drive->sense_rq_armed = false; @@ -221,6 +235,7 @@ void ide_queue_sense_rq(ide_drive_t *drive, void *special) elv_add_request(drive->queue, &drive->sense_rq, ELEVATOR_INSERT_FRONT, 0); + return 0; } EXPORT_SYMBOL_GPL(ide_queue_sense_rq); @@ -239,13 +254,14 @@ void ide_retry_pc(ide_drive_t *drive) /* init pc from sense_rq */ ide_init_pc(pc); memcpy(pc->c, sense_rq->cmd, 12); - pc->buf = sense_rq->data; + pc->buf = bio_data(sense_rq->bio); /* pointer to mapped address */ pc->req_xfer = sense_rq->data_len; if (drive->media == ide_tape) set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags); - ide_queue_sense_rq(drive, pc); + if (ide_queue_sense_rq(drive, pc)) + ide_complete_rq(drive, -EIO, blk_rq_bytes(drive->hwif->rq)); } EXPORT_SYMBOL_GPL(ide_retry_pc); @@ -317,7 +333,6 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) struct ide_cmd *cmd = &hwif->cmd; struct request *rq = hwif->rq; const struct ide_tp_ops *tp_ops = hwif->tp_ops; - xfer_func_t *xferfunc; unsigned int timeout, done; u16 bcount; u8 stat, ireason, dsc = 0; @@ -411,7 +426,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) rq->errors = -EIO; } - if (drive->media == ide_tape) + if (drive->media == ide_tape && !rq->bio) done = ide_rq_bytes(rq); /* FIXME */ else done = blk_rq_bytes(rq); @@ -448,16 +463,11 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) return ide_do_reset(drive); } - xferfunc = write ? tp_ops->output_data : tp_ops->input_data; - - if (drive->media == ide_floppy && pc->buf == NULL) { + if (drive->media == ide_tape && pc->bh) + done = drive->pc_io_buffers(drive, pc, bcount, write); + else { done = min_t(unsigned int, bcount, cmd->nleft); ide_pio_bytes(drive, cmd, write, done); - } else if (drive->media == ide_tape && pc->bh) { - done = drive->pc_io_buffers(drive, pc, bcount, write); - } else { - done = min_t(unsigned int, bcount, pc->req_xfer - pc->xferred); - xferfunc(drive, NULL, pc->cur_pos, done); } /* Update the current position */ diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 7b21c7e..392a5bd 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -210,10 +210,12 @@ static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq) { /* * For REQ_TYPE_SENSE, "rq->special" points to the original - * failed request + * failed request. Also, the sense data should be read + * directly from rq which might be different from the original + * sense buffer if it got copied during mapping. */ struct request *failed = (struct request *)rq->special; - struct request_sense *sense = &drive->sense_data; + void *sense = bio_data(rq->bio); if (failed) { if (failed->sense) { @@ -398,7 +400,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) /* if we got a CHECK_CONDITION status, queue a request sense command */ if (stat & ATA_ERR) - ide_queue_sense_rq(drive, NULL); + return ide_queue_sense_rq(drive, NULL) ? 2 : 1; return 1; end_request: @@ -412,8 +414,7 @@ end_request: hwif->rq = NULL; - ide_queue_sense_rq(drive, rq); - return 1; + return ide_queue_sense_rq(drive, rq) ? 2 : 1; } else return 2; } @@ -507,8 +508,12 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd, rq->cmd_flags |= cmd_flags; rq->timeout = timeout; if (buffer) { - rq->data = buffer; - rq->data_len = *bufflen; + error = blk_rq_map_kern(drive->queue, rq, buffer, + *bufflen, GFP_NOIO); + if (error) { + blk_put_request(rq); + return error; + } } error = blk_execute_rq(drive->queue, info->disk, rq, 0); @@ -802,15 +807,10 @@ static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq) drive->dma = 0; /* sg request */ - if (rq->bio || ((rq->cmd_type == REQ_TYPE_ATA_PC) && rq->data_len)) { + if (rq->bio) { struct request_queue *q = drive->queue; + char *buf = bio_data(rq->bio); unsigned int alignment; - char *buf; - - if (rq->bio) - buf = bio_data(rq->bio); - else - buf = rq->data; drive->dma = !!(drive->dev_flags & IDE_DFLAG_USING_DMA); diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 9b9e8b1..3245c2d 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -481,6 +481,9 @@ void do_ide_request(struct request_queue *q) spin_unlock_irq(q->queue_lock); + /* HLD do_request() callback might sleep, make sure it's okay */ + might_sleep(); + if (ide_lock_host(host, hwif)) goto plug_device_2; diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 8324dfa..9b762a2 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -850,6 +850,9 @@ out: cmd.rq = rq; + ide_init_sg_cmd(&cmd, pc->req_xfer); + ide_map_sg(drive, &cmd); + return ide_tape_issue_pc(drive, &cmd, pc); } diff --git a/include/linux/ide.h b/include/linux/ide.h index 9e67cca..1957461 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1183,7 +1183,7 @@ void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *); void ide_retry_pc(ide_drive_t *drive); void ide_prep_sense(ide_drive_t *drive, struct request *rq); -void ide_queue_sense_rq(ide_drive_t *drive, void *special); +int ide_queue_sense_rq(ide_drive_t *drive, void *special); int ide_cd_expiry(ide_drive_t *); -- cgit v0.10.2 From fc38b521dcffcb07447cd98fedc56f495c10b90d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 07:00:43 +0900 Subject: ide-pm: don't abuse rq->data Impact: cleanup rq->data usage ide-pm uses rq->data to carry pointer to struct request_pm_state through request queue and rq->special is used to carray pointer to local struct ide_cmd, which isn't necessary. Use rq->special for request_pm_state instead and use local ide_cmd in ide_start_power_step(). Signed-off-by: Tejun Heo Cc: Jens Axboe diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 3245c2d..6e3094e 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -368,7 +368,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) return execute_drive_cmd(drive, rq); else if (blk_pm_request(rq)) { - struct request_pm_state *pm = rq->data; + struct request_pm_state *pm = rq->special; #ifdef DEBUG_PM printk("%s: start_power_step(step: %d)\n", drive->name, pm->pm_step); diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c index 0d8a151..ba1488b 100644 --- a/drivers/ide/ide-pm.c +++ b/drivers/ide/ide-pm.c @@ -7,7 +7,6 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg) ide_hwif_t *hwif = drive->hwif; struct request *rq; struct request_pm_state rqpm; - struct ide_cmd cmd; int ret; /* call ACPI _GTM only once */ @@ -15,11 +14,9 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg) ide_acpi_get_timing(hwif); memset(&rqpm, 0, sizeof(rqpm)); - memset(&cmd, 0, sizeof(cmd)); rq = blk_get_request(drive->queue, READ, __GFP_WAIT); rq->cmd_type = REQ_TYPE_PM_SUSPEND; - rq->special = &cmd; - rq->data = &rqpm; + rq->special = &rqpm; rqpm.pm_step = IDE_PM_START_SUSPEND; if (mesg.event == PM_EVENT_PRETHAW) mesg.event = PM_EVENT_FREEZE; @@ -41,7 +38,6 @@ int generic_ide_resume(struct device *dev) ide_hwif_t *hwif = drive->hwif; struct request *rq; struct request_pm_state rqpm; - struct ide_cmd cmd; int err; /* call ACPI _PS0 / _STM only once */ @@ -53,12 +49,10 @@ int generic_ide_resume(struct device *dev) ide_acpi_exec_tfs(drive); memset(&rqpm, 0, sizeof(rqpm)); - memset(&cmd, 0, sizeof(cmd)); rq = blk_get_request(drive->queue, READ, __GFP_WAIT); rq->cmd_type = REQ_TYPE_PM_RESUME; rq->cmd_flags |= REQ_PREEMPT; - rq->special = &cmd; - rq->data = &rqpm; + rq->special = &rqpm; rqpm.pm_step = IDE_PM_START_RESUME; rqpm.pm_state = PM_EVENT_ON; @@ -77,7 +71,7 @@ int generic_ide_resume(struct device *dev) void ide_complete_power_step(ide_drive_t *drive, struct request *rq) { - struct request_pm_state *pm = rq->data; + struct request_pm_state *pm = rq->special; #ifdef DEBUG_PM printk(KERN_INFO "%s: complete_power_step(step: %d)\n", @@ -107,10 +101,8 @@ void ide_complete_power_step(ide_drive_t *drive, struct request *rq) ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq) { - struct request_pm_state *pm = rq->data; - struct ide_cmd *cmd = rq->special; - - memset(cmd, 0, sizeof(*cmd)); + struct request_pm_state *pm = rq->special; + struct ide_cmd cmd = { }; switch (pm->pm_step) { case IDE_PM_FLUSH_CACHE: /* Suspend step 1 (flush cache) */ @@ -123,12 +115,12 @@ ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq) return ide_stopped; } if (ata_id_flush_ext_enabled(drive->id)) - cmd->tf.command = ATA_CMD_FLUSH_EXT; + cmd.tf.command = ATA_CMD_FLUSH_EXT; else - cmd->tf.command = ATA_CMD_FLUSH; + cmd.tf.command = ATA_CMD_FLUSH; goto out_do_tf; case IDE_PM_STANDBY: /* Suspend step 2 (standby) */ - cmd->tf.command = ATA_CMD_STANDBYNOW1; + cmd.tf.command = ATA_CMD_STANDBYNOW1; goto out_do_tf; case IDE_PM_RESTORE_PIO: /* Resume step 1 (restore PIO) */ ide_set_max_pio(drive); @@ -141,7 +133,7 @@ ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq) ide_complete_power_step(drive, rq); return ide_stopped; case IDE_PM_IDLE: /* Resume step 2 (idle) */ - cmd->tf.command = ATA_CMD_IDLEIMMEDIATE; + cmd.tf.command = ATA_CMD_IDLEIMMEDIATE; goto out_do_tf; case IDE_PM_RESTORE_DMA: /* Resume step 3 (restore DMA) */ /* @@ -163,11 +155,11 @@ ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq) return ide_stopped; out_do_tf: - cmd->valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE; - cmd->valid.in.tf = IDE_VALID_IN_TF | IDE_VALID_DEVICE; - cmd->protocol = ATA_PROT_NODATA; + cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE; + cmd.valid.in.tf = IDE_VALID_IN_TF | IDE_VALID_DEVICE; + cmd.protocol = ATA_PROT_NODATA; - return do_rw_taskfile(drive, cmd); + return do_rw_taskfile(drive, &cmd); } /** @@ -181,7 +173,7 @@ out_do_tf: void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq) { struct request_queue *q = drive->queue; - struct request_pm_state *pm = rq->data; + struct request_pm_state *pm = rq->special; unsigned long flags; ide_complete_power_step(drive, rq); @@ -207,7 +199,7 @@ void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq) void ide_check_pm_state(ide_drive_t *drive, struct request *rq) { - struct request_pm_state *pm = rq->data; + struct request_pm_state *pm = rq->special; if (blk_pm_suspend_request(rq) && pm->pm_step == IDE_PM_START_SUSPEND) -- cgit v0.10.2 From ea7066afcd590e4663e6dc010f93704164050f48 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 08:46:02 +0900 Subject: ide-tape,floppy: fix failed command completion after request sense Impact: fix infinite retry loop After a command failed, ide-tape and floppy inserts REQUEST_SENSE in front of the failed command and according to the result, sets pc->retries, flags and errors. After REQUEST_SENSE is complete, the failed command is again at the front of the queue and if the verdict was to terminate the request, the issue functions tries to complete it directly by calling drive->pc_callback() and returning ide_stopped. However, drive->pc_callback() doesn't complete a request. It only prepares for completion of the request. As a result, this creates an infinite loop where the failed request is retried perpetually. Fix it by actually ending the request by calling ide_complete_rq(). Signed-off-by: Tejun Heo diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index d3302cc..d20704a 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -141,6 +141,7 @@ static ide_startstop_t ide_floppy_issue_pc(ide_drive_t *drive, drive->failed_pc = NULL; drive->pc_callback(drive, 0); + ide_complete_rq(drive, -EIO, blk_rq_bytes(drive->hwif->rq)); return ide_stopped; } diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 9b762a2..2b9a136 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -643,6 +643,7 @@ static ide_startstop_t ide_tape_issue_pc(ide_drive_t *drive, } drive->failed_pc = NULL; drive->pc_callback(drive, 0); + ide_complete_rq(drive, -EIO, blk_rq_bytes(drive->hwif->rq)); return ide_stopped; } debug_log(DBG_SENSE, "Retry #%d, cmd = %02X\n", pc->retries, pc->c[0]); -- cgit v0.10.2 From b3071d190d6757b14af002a9d79832f12de61bce Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 08:46:02 +0900 Subject: ide-atapi,tape,floppy: allow ->pc_callback() to change rq->data_len Impact: allow residual count implementation in ->pc_callback() rq->data_len has two duties - carrying the number of input bytes on issue and carrying residual count back to the issuer on completion. ide-atapi completion callback ->pc_callback() is the right place to do this but currently ide-atapi depends on rq->data_len carrying the original request size after calling ->pc_callback() to complete the pc request. This patch makes ide_pc_intr(), ide_tape_issue_pc() and ide_floppy_issue_pc() cache length to complete before calling ->pc_callback() so that it can modify rq->data_len as necessary. Note: As using rq->data_len for two purposes can make cases like this incorrect in subtle ways, future changes will introduce separate field for residual count. Signed-off-by: Tejun Heo Cc: Jens Axboe diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index 5cefe12..3df5442 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -409,6 +409,16 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) if ((pc->flags & PC_FLAG_WAIT_FOR_DSC) && (stat & ATA_DSC) == 0) dsc = 1; + /* + * ->pc_callback() might change rq->data_len for + * residual count, cache total length. + */ + if (!blk_special_request(rq) && + (drive->media == ide_tape && !rq->bio)) + done = ide_rq_bytes(rq); /* FIXME */ + else + done = blk_rq_bytes(rq); + /* Command finished - Call the callback function */ uptodate = drive->pc_callback(drive, dsc); @@ -417,7 +427,6 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) if (blk_special_request(rq)) { rq->errors = 0; - done = blk_rq_bytes(rq); error = 0; } else { @@ -426,11 +435,6 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) rq->errors = -EIO; } - if (drive->media == ide_tape && !rq->bio) - done = ide_rq_bytes(rq); /* FIXME */ - else - done = blk_rq_bytes(rq); - error = uptodate ? 0 : -EIO; } diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index d20704a..537b7c5 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -134,14 +134,17 @@ static ide_startstop_t ide_floppy_issue_pc(ide_drive_t *drive, drive->pc = pc; if (pc->retries > IDEFLOPPY_MAX_PC_RETRIES) { + unsigned int done = blk_rq_bytes(drive->hwif->rq); + if (!(pc->flags & PC_FLAG_SUPPRESS_ERROR)) ide_floppy_report_error(floppy, pc); + /* Giving up */ pc->error = IDE_DRV_ERROR_GENERAL; drive->failed_pc = NULL; drive->pc_callback(drive, 0); - ide_complete_rq(drive, -EIO, blk_rq_bytes(drive->hwif->rq)); + ide_complete_rq(drive, -EIO, done); return ide_stopped; } diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 2b9a136..8226d52 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -622,6 +622,8 @@ static ide_startstop_t ide_tape_issue_pc(ide_drive_t *drive, if (pc->retries > IDETAPE_MAX_PC_RETRIES || (pc->flags & PC_FLAG_ABORT)) { + unsigned int done = blk_rq_bytes(drive->hwif->rq); + /* * We will "abort" retrying a packet command in case legitimate * error code was received (crossing a filemark, or end of the @@ -641,9 +643,10 @@ static ide_startstop_t ide_tape_issue_pc(ide_drive_t *drive, /* Giving up */ pc->error = IDE_DRV_ERROR_GENERAL; } + drive->failed_pc = NULL; drive->pc_callback(drive, 0); - ide_complete_rq(drive, -EIO, blk_rq_bytes(drive->hwif->rq)); + ide_complete_rq(drive, -EIO, done); return ide_stopped; } debug_log(DBG_SENSE, "Retry #%d, cmd = %02X\n", pc->retries, pc->c[0]); -- cgit v0.10.2 From 35ab8d3251833e4052aa64b09b08195e949518c7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 08:46:02 +0900 Subject: ide-tape: use single continuous buffer Impact: simpler buffer allocation and handling, kills OOM, fix DMA transfers ide-tape has its own multiple buffer mechanism using struct idetape_bh. It allocates buffer with decreasing order-of-two allocations so that it results in minimum number of segments. However, the implementation is quite complex and works in a way that no other block or ide driver works necessitating a lot of special case handling. The benefit this complex allocation scheme brings is questionable as PIO or DMA the number of segments (16 maximum) doesn't make any noticeable difference and it also doesn't negate the need for multiple order allocation which can fail under memory pressure or high fragmentation although it does lower the highest order necessary by one when the buffer size isn't power of two. As the first step to remove the custom buffer management, this patch makes ide-tape allocate single continous buffer. The maximum order is four. I doubt the change would cause any trouble but if it ever matters, it should be converted to regular sg mechanism like everyone else and even in that case dropping custom buffer handling and moving to standard mechanism first make sense as an intermediate step. This patch makes the first bh to contain the whole buffer and drops multi bh handling code. Following patches will make further changes. This patch has the side effect of killing OOM triggered by allocation path and fixing DMA transfers. Previously, bug in alloc path triggered OOM on command issue and commands were passed to DMA engine without DMA-mapping all the segments. Signed-off-by: Tejun Heo diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 8226d52..b2afbc7 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -134,7 +134,6 @@ enum { struct idetape_bh { u32 b_size; atomic_t b_count; - struct idetape_bh *b_reqnext; char *b_data; }; @@ -228,10 +227,6 @@ typedef struct ide_tape_obj { char *b_data; int b_count; - int pages_per_buffer; - /* Wasted space in each stage */ - int excess_bh_size; - /* Measures average tape speed */ unsigned long avg_time; int avg_size; @@ -303,9 +298,7 @@ static int idetape_input_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, struct idetape_bh *bh = pc->bh; int count; - while (bcount) { - if (bh == NULL) - break; + if (bcount && bh) { count = min( (unsigned int)(bh->b_size - atomic_read(&bh->b_count)), bcount); @@ -313,15 +306,10 @@ static int idetape_input_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, atomic_read(&bh->b_count), count); bcount -= count; atomic_add(count, &bh->b_count); - if (atomic_read(&bh->b_count) == bh->b_size) { - bh = bh->b_reqnext; - if (bh) - atomic_set(&bh->b_count, 0); - } + if (atomic_read(&bh->b_count) == bh->b_size) + pc->bh = NULL; } - pc->bh = bh; - return bcount; } @@ -331,22 +319,14 @@ static int idetape_output_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, struct idetape_bh *bh = pc->bh; int count; - while (bcount) { - if (bh == NULL) - break; + if (bcount && bh) { count = min((unsigned int)pc->b_count, (unsigned int)bcount); drive->hwif->tp_ops->output_data(drive, NULL, pc->b_data, count); bcount -= count; pc->b_data += count; pc->b_count -= count; - if (!pc->b_count) { - bh = bh->b_reqnext; - pc->bh = bh; - if (bh) { - pc->b_data = bh->b_data; - pc->b_count = atomic_read(&bh->b_count); - } - } + if (!pc->b_count) + pc->bh = NULL; } return bcount; @@ -355,24 +335,20 @@ static int idetape_output_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, static void idetape_update_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc) { struct idetape_bh *bh = pc->bh; - int count; unsigned int bcount = pc->xferred; if (pc->flags & PC_FLAG_WRITING) return; - while (bcount) { - if (bh == NULL) { + if (bcount) { + if (bh == NULL || bcount > bh->b_size) { printk(KERN_ERR "ide-tape: bh == NULL in %s\n", __func__); return; } - count = min((unsigned int)bh->b_size, (unsigned int)bcount); - atomic_set(&bh->b_count, count); + atomic_set(&bh->b_count, bcount); if (atomic_read(&bh->b_count) == bh->b_size) - bh = bh->b_reqnext; - bcount -= count; + pc->bh = NULL; } - pc->bh = bh; } /* @@ -439,24 +415,10 @@ static void idetape_analyze_error(ide_drive_t *drive, u8 *sense) /* Free data buffers completely. */ static void ide_tape_kfree_buffer(idetape_tape_t *tape) { - struct idetape_bh *prev_bh, *bh = tape->merge_bh; - - while (bh) { - u32 size = bh->b_size; - - while (size) { - unsigned int order = fls(size >> PAGE_SHIFT)-1; - - if (bh->b_data) - free_pages((unsigned long)bh->b_data, order); + struct idetape_bh *bh = tape->merge_bh; - size &= (order-1); - bh->b_data += (1 << order) * PAGE_SIZE; - } - prev_bh = bh; - bh = bh->b_reqnext; - kfree(prev_bh); - } + kfree(bh->b_data); + kfree(bh); } static void ide_tape_handle_dsc(ide_drive_t *); @@ -861,117 +823,50 @@ out: } /* - * The function below uses __get_free_pages to allocate a data buffer of size - * tape->buffer_size (or a bit more). We attempt to combine sequential pages as - * much as possible. - * - * It returns a pointer to the newly allocated buffer, or NULL in case of - * failure. + * It returns a pointer to the newly allocated buffer, or NULL in case + * of failure. */ static struct idetape_bh *ide_tape_kmalloc_buffer(idetape_tape_t *tape, - int full, int clear) -{ - struct idetape_bh *prev_bh, *bh, *merge_bh; - int pages = tape->pages_per_buffer; - unsigned int order, b_allocd; - char *b_data = NULL; - - merge_bh = kmalloc(sizeof(struct idetape_bh), GFP_KERNEL); - bh = merge_bh; - if (bh == NULL) - goto abort; - - order = fls(pages) - 1; - bh->b_data = (char *) __get_free_pages(GFP_KERNEL, order); - if (!bh->b_data) - goto abort; - b_allocd = (1 << order) * PAGE_SIZE; - pages &= (order-1); - - if (clear) - memset(bh->b_data, 0, b_allocd); - bh->b_reqnext = NULL; - bh->b_size = b_allocd; - atomic_set(&bh->b_count, full ? bh->b_size : 0); + int full) +{ + struct idetape_bh *bh; - while (pages) { - order = fls(pages) - 1; - b_data = (char *) __get_free_pages(GFP_KERNEL, order); - if (!b_data) - goto abort; - b_allocd = (1 << order) * PAGE_SIZE; - - if (clear) - memset(b_data, 0, b_allocd); - - /* newly allocated page frames below buffer header or ...*/ - if (bh->b_data == b_data + b_allocd) { - bh->b_size += b_allocd; - bh->b_data -= b_allocd; - if (full) - atomic_add(b_allocd, &bh->b_count); - continue; - } - /* they are above the header */ - if (b_data == bh->b_data + bh->b_size) { - bh->b_size += b_allocd; - if (full) - atomic_add(b_allocd, &bh->b_count); - continue; - } - prev_bh = bh; - bh = kmalloc(sizeof(struct idetape_bh), GFP_KERNEL); - if (!bh) { - free_pages((unsigned long) b_data, order); - goto abort; - } - bh->b_reqnext = NULL; - bh->b_data = b_data; - bh->b_size = b_allocd; - atomic_set(&bh->b_count, full ? bh->b_size : 0); - prev_bh->b_reqnext = bh; + bh = kmalloc(sizeof(struct idetape_bh), GFP_KERNEL); + if (!bh) + return NULL; - pages &= (order-1); + bh->b_data = kmalloc(tape->buffer_size, GFP_KERNEL); + if (!bh->b_data) { + kfree(bh); + return NULL; } - bh->b_size -= tape->excess_bh_size; - if (full) - atomic_sub(tape->excess_bh_size, &bh->b_count); - return merge_bh; -abort: - ide_tape_kfree_buffer(tape); - return NULL; + bh->b_size = tape->buffer_size; + atomic_set(&bh->b_count, full ? bh->b_size : 0); + + return bh; } static int idetape_copy_stage_from_user(idetape_tape_t *tape, const char __user *buf, int n) { struct idetape_bh *bh = tape->bh; - int count; int ret = 0; - while (n) { - if (bh == NULL) { + if (n) { + if (bh == NULL || n > bh->b_size - atomic_read(&bh->b_count)) { printk(KERN_ERR "ide-tape: bh == NULL in %s\n", __func__); return 1; } - count = min((unsigned int) - (bh->b_size - atomic_read(&bh->b_count)), - (unsigned int)n); if (copy_from_user(bh->b_data + atomic_read(&bh->b_count), buf, - count)) + n)) ret = 1; - n -= count; - atomic_add(count, &bh->b_count); - buf += count; - if (atomic_read(&bh->b_count) == bh->b_size) { - bh = bh->b_reqnext; - if (bh) - atomic_set(&bh->b_count, 0); - } + atomic_add(n, &bh->b_count); + if (atomic_read(&bh->b_count) == bh->b_size) + tape->bh = NULL; } - tape->bh = bh; + return ret; } @@ -979,30 +874,20 @@ static int idetape_copy_stage_to_user(idetape_tape_t *tape, char __user *buf, int n) { struct idetape_bh *bh = tape->bh; - int count; int ret = 0; - while (n) { - if (bh == NULL) { + if (n) { + if (bh == NULL || n > tape->b_count) { printk(KERN_ERR "ide-tape: bh == NULL in %s\n", __func__); return 1; } - count = min(tape->b_count, n); - if (copy_to_user(buf, tape->b_data, count)) + if (copy_to_user(buf, tape->b_data, n)) ret = 1; - n -= count; - tape->b_data += count; - tape->b_count -= count; - buf += count; - if (!tape->b_count) { - bh = bh->b_reqnext; - tape->bh = bh; - if (bh) { - tape->b_data = bh->b_data; - tape->b_count = atomic_read(&bh->b_count); - } - } + tape->b_data += n; + tape->b_count -= n; + if (!tape->b_count) + tape->bh = NULL; } return ret; } @@ -1254,7 +1139,7 @@ static int idetape_add_chrdev_write_request(ide_drive_t *drive, int blocks) static void ide_tape_flush_merge_buffer(ide_drive_t *drive) { idetape_tape_t *tape = drive->driver_data; - int blocks, min; + int blocks; struct idetape_bh *bh; if (tape->chrdev_dir != IDETAPE_DIR_WRITE) { @@ -1269,31 +1154,16 @@ static void ide_tape_flush_merge_buffer(ide_drive_t *drive) if (tape->merge_bh_size) { blocks = tape->merge_bh_size / tape->blk_size; if (tape->merge_bh_size % tape->blk_size) { - unsigned int i; - + unsigned int i = tape->blk_size - + tape->merge_bh_size % tape->blk_size; blocks++; - i = tape->blk_size - tape->merge_bh_size % - tape->blk_size; - bh = tape->bh->b_reqnext; - while (bh) { - atomic_set(&bh->b_count, 0); - bh = bh->b_reqnext; - } bh = tape->bh; - while (i) { - if (bh == NULL) { - printk(KERN_INFO "ide-tape: bug," - " bh NULL\n"); - break; - } - min = min(i, (unsigned int)(bh->b_size - - atomic_read(&bh->b_count))); + if (bh) { memset(bh->b_data + atomic_read(&bh->b_count), - 0, min); - atomic_add(min, &bh->b_count); - i -= min; - bh = bh->b_reqnext; - } + 0, i); + atomic_add(i, &bh->b_count); + } else + printk(KERN_INFO "ide-tape: bug, bh NULL\n"); } (void) idetape_add_chrdev_write_request(drive, blocks); tape->merge_bh_size = 0; @@ -1321,7 +1191,7 @@ static int idetape_init_read(ide_drive_t *drive) " 0 now\n"); tape->merge_bh_size = 0; } - tape->merge_bh = ide_tape_kmalloc_buffer(tape, 0, 0); + tape->merge_bh = ide_tape_kmalloc_buffer(tape, 0); if (!tape->merge_bh) return -ENOMEM; tape->chrdev_dir = IDETAPE_DIR_READ; @@ -1368,23 +1238,18 @@ static int idetape_add_chrdev_read_request(ide_drive_t *drive, int blocks) static void idetape_pad_zeros(ide_drive_t *drive, int bcount) { idetape_tape_t *tape = drive->driver_data; - struct idetape_bh *bh; + struct idetape_bh *bh = tape->merge_bh; int blocks; while (bcount) { unsigned int count; - bh = tape->merge_bh; count = min(tape->buffer_size, bcount); bcount -= count; blocks = count / tape->blk_size; - while (count) { - atomic_set(&bh->b_count, - min(count, (unsigned int)bh->b_size)); - memset(bh->b_data, 0, atomic_read(&bh->b_count)); - count -= atomic_read(&bh->b_count); - bh = bh->b_reqnext; - } + atomic_set(&bh->b_count, count); + memset(bh->b_data, 0, atomic_read(&bh->b_count)); + idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, blocks, tape->merge_bh); } @@ -1596,7 +1461,7 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf, "should be 0 now\n"); tape->merge_bh_size = 0; } - tape->merge_bh = ide_tape_kmalloc_buffer(tape, 0, 0); + tape->merge_bh = ide_tape_kmalloc_buffer(tape, 0); if (!tape->merge_bh) return -ENOMEM; tape->chrdev_dir = IDETAPE_DIR_WRITE; @@ -1970,7 +1835,7 @@ static void idetape_write_release(ide_drive_t *drive, unsigned int minor) idetape_tape_t *tape = drive->driver_data; ide_tape_flush_merge_buffer(drive); - tape->merge_bh = ide_tape_kmalloc_buffer(tape, 1, 0); + tape->merge_bh = ide_tape_kmalloc_buffer(tape, 1); if (tape->merge_bh != NULL) { idetape_pad_zeros(drive, tape->blk_size * (tape->user_bs_factor - 1)); @@ -2201,11 +2066,6 @@ static void idetape_setup(ide_drive_t *drive, idetape_tape_t *tape, int minor) tape->buffer_size = *ctl * tape->blk_size; } buffer_size = tape->buffer_size; - tape->pages_per_buffer = buffer_size / PAGE_SIZE; - if (buffer_size % PAGE_SIZE) { - tape->pages_per_buffer++; - tape->excess_bh_size = PAGE_SIZE - buffer_size % PAGE_SIZE; - } /* select the "best" DSC read/write polling freq */ speed = max(*(u16 *)&tape->caps[14], *(u16 *)&tape->caps[8]); -- cgit v0.10.2 From 21d9c5d227593d15630ae83a336d1519653e9b8a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 08:46:02 +0900 Subject: ide-tape: use standard data transfer mechanism Impact: use standard way to transfer data ide-tape uses rq in an interesting way. For r/w requests, rq->special is used to carry a private buffer management structure idetape_bh and rq->nr_sectors and current_nr_sectors are initialized to the number of idetape blocks which isn't necessary 512 bytes. Also, rq->current_nr_sectors is used to report back the residual count in units of idetape blocks. This peculiarity taxes both block layer and ide. ide-atapi has different paths and hooks to accomodate it and what a rq means becomes quite confusing and making changes at the block layer becomes quite difficult and error-prone. This patch makes ide-tape use bio instead. With the previous patch, ide-tape currently is using single contiguos buffer so replacing it isn't difficult. Data buffer is mapped into bio using blk_rq_map_kern() in idetape_queue_rw_tail(). idetape_io_buffers() and idetape_update_buffers() are dropped and pc->bh is set to null to tell ide-atapi to use standard data transfer mechanism and idetape_bh byte counts are updated by the issuer on completion using the residual count. This change also nicely removes the FIXME in ide_pc_intr() where ide-tape rqs need to be completed using ide_rq_bytes() instead of blk_rq_bytes() (although this didn't really matter as the request didn't have bio). Signed-off-by: Tejun Heo Cc: Jens Axboe diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index 3df5442..b9dd450 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -413,11 +413,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) * ->pc_callback() might change rq->data_len for * residual count, cache total length. */ - if (!blk_special_request(rq) && - (drive->media == ide_tape && !rq->bio)) - done = ide_rq_bytes(rq); /* FIXME */ - else - done = blk_rq_bytes(rq); + done = blk_rq_bytes(rq); /* Command finished - Call the callback function */ uptodate = drive->pc_callback(drive, dsc); diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index b2afbc7..b373ac8 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -292,65 +292,6 @@ static struct ide_tape_obj *ide_tape_chrdev_get(unsigned int i) return tape; } -static int idetape_input_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, - unsigned int bcount) -{ - struct idetape_bh *bh = pc->bh; - int count; - - if (bcount && bh) { - count = min( - (unsigned int)(bh->b_size - atomic_read(&bh->b_count)), - bcount); - drive->hwif->tp_ops->input_data(drive, NULL, bh->b_data + - atomic_read(&bh->b_count), count); - bcount -= count; - atomic_add(count, &bh->b_count); - if (atomic_read(&bh->b_count) == bh->b_size) - pc->bh = NULL; - } - - return bcount; -} - -static int idetape_output_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, - unsigned int bcount) -{ - struct idetape_bh *bh = pc->bh; - int count; - - if (bcount && bh) { - count = min((unsigned int)pc->b_count, (unsigned int)bcount); - drive->hwif->tp_ops->output_data(drive, NULL, pc->b_data, count); - bcount -= count; - pc->b_data += count; - pc->b_count -= count; - if (!pc->b_count) - pc->bh = NULL; - } - - return bcount; -} - -static void idetape_update_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc) -{ - struct idetape_bh *bh = pc->bh; - unsigned int bcount = pc->xferred; - - if (pc->flags & PC_FLAG_WRITING) - return; - if (bcount) { - if (bh == NULL || bcount > bh->b_size) { - printk(KERN_ERR "ide-tape: bh == NULL in %s\n", - __func__); - return; - } - atomic_set(&bh->b_count, bcount); - if (atomic_read(&bh->b_count) == bh->b_size) - pc->bh = NULL; - } -} - /* * called on each failed packet command retry to analyze the request sense. We * currently do not utilize this information. @@ -368,12 +309,10 @@ static void idetape_analyze_error(ide_drive_t *drive, u8 *sense) pc->c[0], tape->sense_key, tape->asc, tape->ascq); /* Correct pc->xferred by asking the tape. */ - if (pc->flags & PC_FLAG_DMA_ERROR) { + if (pc->flags & PC_FLAG_DMA_ERROR) pc->xferred = pc->req_xfer - tape->blk_size * get_unaligned_be32(&sense[3]); - idetape_update_buffers(drive, pc); - } /* * If error was the result of a zero-length read or write command, @@ -458,7 +397,7 @@ static int ide_tape_callback(ide_drive_t *drive, int dsc) } tape->first_frame += blocks; - rq->current_nr_sectors -= blocks; + rq->data_len -= blocks * tape->blk_size; if (pc->error) { uptodate = 0; @@ -520,19 +459,6 @@ static void ide_tape_handle_dsc(ide_drive_t *drive) idetape_postpone_request(drive); } -static int ide_tape_io_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, - unsigned int bcount, int write) -{ - unsigned int bleft; - - if (write) - bleft = idetape_output_buffers(drive, pc, bcount); - else - bleft = idetape_input_buffers(drive, pc, bcount); - - return bcount - bleft; -} - /* * Packet Command Interface * @@ -683,7 +609,7 @@ static void ide_tape_create_rw_cmd(idetape_tape_t *tape, ide_init_pc(pc); put_unaligned(cpu_to_be32(length), (unsigned int *) &pc->c[1]); pc->c[1] = 1; - pc->bh = bh; + pc->bh = NULL; pc->buf = NULL; pc->buf_size = length * tape->blk_size; pc->req_xfer = pc->buf_size; @@ -1063,10 +989,12 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks, struct idetape_bh *bh) { idetape_tape_t *tape = drive->driver_data; + size_t size = blocks * tape->blk_size; struct request *rq; - int ret, errors; + int ret; debug_log(DBG_SENSE, "%s: cmd=%d\n", __func__, cmd); + BUG_ON(cmd != REQ_IDETAPE_READ && cmd != REQ_IDETAPE_WRITE); rq = blk_get_request(drive->queue, READ, __GFP_WAIT); rq->cmd_type = REQ_TYPE_SPECIAL; @@ -1074,21 +1002,29 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks, rq->rq_disk = tape->disk; rq->special = (void *)bh; rq->sector = tape->first_frame; - rq->nr_sectors = blocks; - rq->current_nr_sectors = blocks; + + if (size) { + ret = blk_rq_map_kern(drive->queue, rq, bh->b_data, size, + __GFP_WAIT); + if (ret) + goto out_put; + } + blk_execute_rq(drive->queue, tape->disk, rq, 0); - errors = rq->errors; - ret = tape->blk_size * (blocks - rq->current_nr_sectors); - blk_put_request(rq); + /* calculate the number of transferred bytes and update bh */ + size -= rq->data_len; + if (cmd == REQ_IDETAPE_READ) + atomic_add(size, &bh->b_count); - if ((cmd & (REQ_IDETAPE_READ | REQ_IDETAPE_WRITE)) == 0) - return 0; + ret = size; + if (rq->errors == IDE_DRV_ERROR_GENERAL) + ret = -EIO; if (tape->merge_bh) idetape_init_merge_buffer(tape); - if (errors == IDE_DRV_ERROR_GENERAL) - return -EIO; +out_put: + blk_put_request(rq); return ret; } @@ -2034,8 +1970,6 @@ static void idetape_setup(ide_drive_t *drive, idetape_tape_t *tape, int minor) u16 *ctl = (u16 *)&tape->caps[12]; drive->pc_callback = ide_tape_callback; - drive->pc_update_buffers = idetape_update_buffers; - drive->pc_io_buffers = ide_tape_io_buffers; drive->dev_flags |= IDE_DFLAG_DSC_OVERLAP; -- cgit v0.10.2 From 963da55c4b9eeeb2085ca74ba927cf77bce966d4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 08:46:02 +0900 Subject: ide-tape: kill idetape_bh Impact: kill now unnecessary idetape_bh With everything using standard mechanisms, there is no need for idetape_bh anymore. Kill it and use tape->buf, cur and valid to describe data buffer instead. Changes worth mentioning are... * idetape_queue_rq_tail() now always queue tape->buf and and adjusts buffer state properly before completion. * idetape_pad_zeros() clears the buffer only once. Signed-off-by: Tejun Heo diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index b373ac8..d2e9c09 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -131,12 +131,6 @@ enum { IDETAPE_DIR_WRITE = (1 << 2), }; -struct idetape_bh { - u32 b_size; - atomic_t b_count; - char *b_data; -}; - /* Tape door status */ #define DOOR_UNLOCKED 0 #define DOOR_LOCKED 1 @@ -218,14 +212,12 @@ typedef struct ide_tape_obj { /* Data buffer size chosen based on the tape's recommendation */ int buffer_size; - /* merge buffer */ - struct idetape_bh *merge_bh; - /* size of the merge buffer */ - int merge_bh_size; - /* pointer to current buffer head within the merge buffer */ - struct idetape_bh *bh; - char *b_data; - int b_count; + /* Staging buffer of buffer_size bytes */ + void *buf; + /* The read/write cursor */ + void *cur; + /* The number of valid bytes in buf */ + size_t valid; /* Measures average tape speed */ unsigned long avg_time; @@ -351,15 +343,6 @@ static void idetape_analyze_error(ide_drive_t *drive, u8 *sense) } } -/* Free data buffers completely. */ -static void ide_tape_kfree_buffer(idetape_tape_t *tape) -{ - struct idetape_bh *bh = tape->merge_bh; - - kfree(bh->b_data); - kfree(bh); -} - static void ide_tape_handle_dsc(ide_drive_t *); static int ide_tape_callback(ide_drive_t *drive, int dsc) @@ -603,8 +586,7 @@ static void ide_tape_create_rw_cmd(idetape_tape_t *tape, struct ide_atapi_pc *pc, struct request *rq, u8 opcode) { - struct idetape_bh *bh = (struct idetape_bh *)rq->special; - unsigned int length = rq->current_nr_sectors; + unsigned int length = rq->nr_sectors; ide_init_pc(pc); put_unaligned(cpu_to_be32(length), (unsigned int *) &pc->c[1]); @@ -616,14 +598,11 @@ static void ide_tape_create_rw_cmd(idetape_tape_t *tape, if (pc->req_xfer == tape->buffer_size) pc->flags |= PC_FLAG_DMA_OK; - if (opcode == READ_6) { + if (opcode == READ_6) pc->c[0] = READ_6; - atomic_set(&bh->b_count, 0); - } else if (opcode == WRITE_6) { + else if (opcode == WRITE_6) { pc->c[0] = WRITE_6; pc->flags |= PC_FLAG_WRITING; - pc->b_data = bh->b_data; - pc->b_count = atomic_read(&bh->b_count); } memcpy(rq->cmd, pc->c, 12); @@ -639,10 +618,8 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, struct ide_cmd cmd; u8 stat; - debug_log(DBG_SENSE, "sector: %llu, nr_sectors: %lu," - " current_nr_sectors: %u\n", - (unsigned long long)rq->sector, rq->nr_sectors, - rq->current_nr_sectors); + debug_log(DBG_SENSE, "sector: %llu, nr_sectors: %lu\n" + (unsigned long long)rq->sector, rq->nr_sectors); if (!(blk_special_request(rq) || blk_sense_request(rq))) { /* We do not support buffer cache originated requests. */ @@ -749,89 +726,6 @@ out: } /* - * It returns a pointer to the newly allocated buffer, or NULL in case - * of failure. - */ -static struct idetape_bh *ide_tape_kmalloc_buffer(idetape_tape_t *tape, - int full) -{ - struct idetape_bh *bh; - - bh = kmalloc(sizeof(struct idetape_bh), GFP_KERNEL); - if (!bh) - return NULL; - - bh->b_data = kmalloc(tape->buffer_size, GFP_KERNEL); - if (!bh->b_data) { - kfree(bh); - return NULL; - } - - bh->b_size = tape->buffer_size; - atomic_set(&bh->b_count, full ? bh->b_size : 0); - - return bh; -} - -static int idetape_copy_stage_from_user(idetape_tape_t *tape, - const char __user *buf, int n) -{ - struct idetape_bh *bh = tape->bh; - int ret = 0; - - if (n) { - if (bh == NULL || n > bh->b_size - atomic_read(&bh->b_count)) { - printk(KERN_ERR "ide-tape: bh == NULL in %s\n", - __func__); - return 1; - } - if (copy_from_user(bh->b_data + atomic_read(&bh->b_count), buf, - n)) - ret = 1; - atomic_add(n, &bh->b_count); - if (atomic_read(&bh->b_count) == bh->b_size) - tape->bh = NULL; - } - - return ret; -} - -static int idetape_copy_stage_to_user(idetape_tape_t *tape, char __user *buf, - int n) -{ - struct idetape_bh *bh = tape->bh; - int ret = 0; - - if (n) { - if (bh == NULL || n > tape->b_count) { - printk(KERN_ERR "ide-tape: bh == NULL in %s\n", - __func__); - return 1; - } - if (copy_to_user(buf, tape->b_data, n)) - ret = 1; - tape->b_data += n; - tape->b_count -= n; - if (!tape->b_count) - tape->bh = NULL; - } - return ret; -} - -static void idetape_init_merge_buffer(idetape_tape_t *tape) -{ - struct idetape_bh *bh = tape->merge_bh; - tape->bh = tape->merge_bh; - - if (tape->chrdev_dir == IDETAPE_DIR_WRITE) - atomic_set(&bh->b_count, 0); - else { - tape->b_data = bh->b_data; - tape->b_count = atomic_read(&bh->b_count); - } -} - -/* * Write a filemark if write_filemark=1. Flush the device buffers without * writing a filemark otherwise. */ @@ -928,10 +822,10 @@ static void __ide_tape_discard_merge_buffer(ide_drive_t *drive) return; clear_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags); - tape->merge_bh_size = 0; - if (tape->merge_bh != NULL) { - ide_tape_kfree_buffer(tape); - tape->merge_bh = NULL; + tape->valid = 0; + if (tape->buf != NULL) { + kfree(tape->buf); + tape->buf = NULL; } tape->chrdev_dir = IDETAPE_DIR_NONE; @@ -985,8 +879,7 @@ static void ide_tape_discard_merge_buffer(ide_drive_t *drive, * Generate a read/write request for the block device interface and wait for it * to be serviced. */ -static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks, - struct idetape_bh *bh) +static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks) { idetape_tape_t *tape = drive->driver_data; size_t size = blocks * tape->blk_size; @@ -1000,11 +893,10 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks, rq->cmd_type = REQ_TYPE_SPECIAL; rq->cmd[13] = cmd; rq->rq_disk = tape->disk; - rq->special = (void *)bh; rq->sector = tape->first_frame; if (size) { - ret = blk_rq_map_kern(drive->queue, rq, bh->b_data, size, + ret = blk_rq_map_kern(drive->queue, rq, tape->buf, size, __GFP_WAIT); if (ret) goto out_put; @@ -1012,17 +904,17 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks, blk_execute_rq(drive->queue, tape->disk, rq, 0); - /* calculate the number of transferred bytes and update bh */ + /* calculate the number of transferred bytes and update buffer state */ size -= rq->data_len; + tape->cur = tape->buf; if (cmd == REQ_IDETAPE_READ) - atomic_add(size, &bh->b_count); + tape->valid = size; + else + tape->valid = 0; ret = size; if (rq->errors == IDE_DRV_ERROR_GENERAL) ret = -EIO; - - if (tape->merge_bh) - idetape_init_merge_buffer(tape); out_put: blk_put_request(rq); return ret; @@ -1064,49 +956,33 @@ static void idetape_create_space_cmd(struct ide_atapi_pc *pc, int count, u8 cmd) /* Queue up a character device originated write request. */ static int idetape_add_chrdev_write_request(ide_drive_t *drive, int blocks) { - idetape_tape_t *tape = drive->driver_data; - debug_log(DBG_CHRDEV, "Enter %s\n", __func__); - return idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, - blocks, tape->merge_bh); + return idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, blocks); } static void ide_tape_flush_merge_buffer(ide_drive_t *drive) { idetape_tape_t *tape = drive->driver_data; int blocks; - struct idetape_bh *bh; if (tape->chrdev_dir != IDETAPE_DIR_WRITE) { printk(KERN_ERR "ide-tape: bug: Trying to empty merge buffer" " but we are not writing.\n"); return; } - if (tape->merge_bh_size > tape->buffer_size) { - printk(KERN_ERR "ide-tape: bug: merge_buffer too big\n"); - tape->merge_bh_size = tape->buffer_size; - } - if (tape->merge_bh_size) { - blocks = tape->merge_bh_size / tape->blk_size; - if (tape->merge_bh_size % tape->blk_size) { - unsigned int i = tape->blk_size - - tape->merge_bh_size % tape->blk_size; + if (tape->buf) { + blocks = tape->valid / tape->blk_size; + if (tape->valid % tape->blk_size) { blocks++; - bh = tape->bh; - if (bh) { - memset(bh->b_data + atomic_read(&bh->b_count), - 0, i); - atomic_add(i, &bh->b_count); - } else - printk(KERN_INFO "ide-tape: bug, bh NULL\n"); + memset(tape->buf + tape->valid, 0, + tape->blk_size - tape->valid % tape->blk_size); } (void) idetape_add_chrdev_write_request(drive, blocks); - tape->merge_bh_size = 0; } - if (tape->merge_bh != NULL) { - ide_tape_kfree_buffer(tape); - tape->merge_bh = NULL; + if (tape->buf != NULL) { + kfree(tape->buf); + tape->buf = NULL; } tape->chrdev_dir = IDETAPE_DIR_NONE; } @@ -1122,15 +998,15 @@ static int idetape_init_read(ide_drive_t *drive) ide_tape_flush_merge_buffer(drive); idetape_flush_tape_buffers(drive); } - if (tape->merge_bh || tape->merge_bh_size) { - printk(KERN_ERR "ide-tape: merge_bh_size should be" - " 0 now\n"); - tape->merge_bh_size = 0; + if (tape->buf || tape->valid) { + printk(KERN_ERR "ide-tape: valid should be 0 now\n"); + tape->valid = 0; } - tape->merge_bh = ide_tape_kmalloc_buffer(tape, 0); - if (!tape->merge_bh) + tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL); + if (!tape->buf) return -ENOMEM; tape->chrdev_dir = IDETAPE_DIR_READ; + tape->cur = tape->buf; /* * Issue a read 0 command to ensure that DSC handshake is @@ -1140,11 +1016,10 @@ static int idetape_init_read(ide_drive_t *drive) */ if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) { bytes_read = idetape_queue_rw_tail(drive, - REQ_IDETAPE_READ, 0, - tape->merge_bh); + REQ_IDETAPE_READ, 0); if (bytes_read < 0) { - ide_tape_kfree_buffer(tape); - tape->merge_bh = NULL; + kfree(tape->buf); + tape->buf = NULL; tape->chrdev_dir = IDETAPE_DIR_NONE; return bytes_read; } @@ -1157,8 +1032,6 @@ static int idetape_init_read(ide_drive_t *drive) /* called from idetape_chrdev_read() to service a chrdev read request. */ static int idetape_add_chrdev_read_request(ide_drive_t *drive, int blocks) { - idetape_tape_t *tape = drive->driver_data; - debug_log(DBG_PROCS, "Enter %s, %d blocks\n", __func__, blocks); /* If we are at a filemark, return a read length of 0 */ @@ -1167,27 +1040,21 @@ static int idetape_add_chrdev_read_request(ide_drive_t *drive, int blocks) idetape_init_read(drive); - return idetape_queue_rw_tail(drive, REQ_IDETAPE_READ, blocks, - tape->merge_bh); + return idetape_queue_rw_tail(drive, REQ_IDETAPE_READ, blocks); } static void idetape_pad_zeros(ide_drive_t *drive, int bcount) { idetape_tape_t *tape = drive->driver_data; - struct idetape_bh *bh = tape->merge_bh; - int blocks; + + memset(tape->buf, 0, tape->buffer_size); while (bcount) { - unsigned int count; + unsigned int count = min(tape->buffer_size, bcount); - count = min(tape->buffer_size, bcount); + idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, + count / tape->blk_size); bcount -= count; - blocks = count / tape->blk_size; - atomic_set(&bh->b_count, count); - memset(bh->b_data, 0, atomic_read(&bh->b_count)); - - idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, blocks, - tape->merge_bh); } } @@ -1267,7 +1134,7 @@ static int idetape_space_over_filemarks(ide_drive_t *drive, short mt_op, } if (tape->chrdev_dir == IDETAPE_DIR_READ) { - tape->merge_bh_size = 0; + tape->valid = 0; if (test_and_clear_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) ++count; ide_tape_discard_merge_buffer(drive, 0); @@ -1333,20 +1200,20 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf, return rc; if (count == 0) return (0); - if (tape->merge_bh_size) { - actually_read = min((unsigned int)(tape->merge_bh_size), - (unsigned int)count); - if (idetape_copy_stage_to_user(tape, buf, actually_read)) + if (tape->valid) { + actually_read = min_t(unsigned int, tape->valid, count); + if (copy_to_user(buf, tape->cur, actually_read)) ret = -EFAULT; buf += actually_read; - tape->merge_bh_size -= actually_read; count -= actually_read; + tape->cur += actually_read; + tape->valid -= actually_read; } while (count >= tape->buffer_size) { bytes_read = idetape_add_chrdev_read_request(drive, ctl); if (bytes_read <= 0) goto finish; - if (idetape_copy_stage_to_user(tape, buf, bytes_read)) + if (copy_to_user(buf, tape->cur, bytes_read)) ret = -EFAULT; buf += bytes_read; count -= bytes_read; @@ -1357,10 +1224,11 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf, if (bytes_read <= 0) goto finish; temp = min((unsigned long)count, (unsigned long)bytes_read); - if (idetape_copy_stage_to_user(tape, buf, temp)) + if (copy_to_user(buf, tape->cur, temp)) ret = -EFAULT; actually_read += temp; - tape->merge_bh_size = bytes_read-temp; + tape->cur += temp; + tape->valid -= temp; } finish: if (!actually_read && test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) { @@ -1392,16 +1260,15 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf, if (tape->chrdev_dir != IDETAPE_DIR_WRITE) { if (tape->chrdev_dir == IDETAPE_DIR_READ) ide_tape_discard_merge_buffer(drive, 1); - if (tape->merge_bh || tape->merge_bh_size) { - printk(KERN_ERR "ide-tape: merge_bh_size " - "should be 0 now\n"); - tape->merge_bh_size = 0; + if (tape->buf || tape->valid) { + printk(KERN_ERR "ide-tape: valid should be 0 now\n"); + tape->valid = 0; } - tape->merge_bh = ide_tape_kmalloc_buffer(tape, 0); - if (!tape->merge_bh) + tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL); + if (!tape->buf) return -ENOMEM; tape->chrdev_dir = IDETAPE_DIR_WRITE; - idetape_init_merge_buffer(tape); + tape->cur = tape->buf; /* * Issue a write 0 command to ensure that DSC handshake is @@ -1411,11 +1278,10 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf, */ if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) { ssize_t retval = idetape_queue_rw_tail(drive, - REQ_IDETAPE_WRITE, 0, - tape->merge_bh); + REQ_IDETAPE_WRITE, 0); if (retval < 0) { - ide_tape_kfree_buffer(tape); - tape->merge_bh = NULL; + kfree(tape->buf); + tape->buf = NULL; tape->chrdev_dir = IDETAPE_DIR_NONE; return retval; } @@ -1423,23 +1289,19 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf, } if (count == 0) return (0); - if (tape->merge_bh_size) { - if (tape->merge_bh_size >= tape->buffer_size) { - printk(KERN_ERR "ide-tape: bug: merge buf too big\n"); - tape->merge_bh_size = 0; - } - actually_written = min((unsigned int) - (tape->buffer_size - tape->merge_bh_size), - (unsigned int)count); - if (idetape_copy_stage_from_user(tape, buf, actually_written)) - ret = -EFAULT; + if (tape->valid < tape->buffer_size) { + actually_written = min_t(unsigned int, + tape->buffer_size - tape->valid, + count); + if (copy_from_user(tape->cur, buf, actually_written)) + ret = -EFAULT; buf += actually_written; - tape->merge_bh_size += actually_written; count -= actually_written; + tape->cur += actually_written; + tape->valid += actually_written; - if (tape->merge_bh_size == tape->buffer_size) { + if (tape->valid == tape->buffer_size) { ssize_t retval; - tape->merge_bh_size = 0; retval = idetape_add_chrdev_write_request(drive, ctl); if (retval <= 0) return (retval); @@ -1447,7 +1309,7 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf, } while (count >= tape->buffer_size) { ssize_t retval; - if (idetape_copy_stage_from_user(tape, buf, tape->buffer_size)) + if (copy_from_user(tape->cur, buf, tape->buffer_size)) ret = -EFAULT; buf += tape->buffer_size; count -= tape->buffer_size; @@ -1458,9 +1320,10 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf, } if (count) { actually_written += count; - if (idetape_copy_stage_from_user(tape, buf, count)) + if (copy_from_user(tape->cur, buf, count)) ret = -EFAULT; - tape->merge_bh_size += count; + tape->cur += count; + tape->valid += count; } return ret ? ret : actually_written; } @@ -1623,7 +1486,7 @@ static int idetape_chrdev_ioctl(struct inode *inode, struct file *file, idetape_flush_tape_buffers(drive); } if (cmd == MTIOCGET || cmd == MTIOCPOS) { - block_offset = tape->merge_bh_size / + block_offset = tape->valid / (tape->blk_size * tape->user_bs_factor); position = idetape_read_position(drive); if (position < 0) @@ -1771,12 +1634,12 @@ static void idetape_write_release(ide_drive_t *drive, unsigned int minor) idetape_tape_t *tape = drive->driver_data; ide_tape_flush_merge_buffer(drive); - tape->merge_bh = ide_tape_kmalloc_buffer(tape, 1); - if (tape->merge_bh != NULL) { + tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL); + if (tape->buf != NULL) { idetape_pad_zeros(drive, tape->blk_size * (tape->user_bs_factor - 1)); - ide_tape_kfree_buffer(tape); - tape->merge_bh = NULL; + kfree(tape->buf); + tape->buf = NULL; } idetape_write_filemark(drive); idetape_flush_tape_buffers(drive); @@ -2042,7 +1905,7 @@ static void ide_tape_release(struct device *dev) ide_drive_t *drive = tape->drive; struct gendisk *g = tape->disk; - BUG_ON(tape->merge_bh_size); + BUG_ON(tape->valid); drive->dev_flags &= ~IDE_DFLAG_DSC_OVERLAP; drive->driver_data = NULL; -- cgit v0.10.2 From 88f1b941c5c94016a59144a3c94c9ca31eb16205 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 08:46:02 +0900 Subject: ide-tape: unify r/w init paths Impact: cleanup Read and write init paths are almost identical. Unify them into idetape_init_rw(). Signed-off-by: Tejun Heo diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index d2e9c09..4da7fa9 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -987,42 +987,50 @@ static void ide_tape_flush_merge_buffer(ide_drive_t *drive) tape->chrdev_dir = IDETAPE_DIR_NONE; } -static int idetape_init_read(ide_drive_t *drive) +static int idetape_init_rw(ide_drive_t *drive, int dir) { idetape_tape_t *tape = drive->driver_data; - int bytes_read; + int rc; - /* Initialize read operation */ - if (tape->chrdev_dir != IDETAPE_DIR_READ) { - if (tape->chrdev_dir == IDETAPE_DIR_WRITE) { - ide_tape_flush_merge_buffer(drive); - idetape_flush_tape_buffers(drive); - } - if (tape->buf || tape->valid) { - printk(KERN_ERR "ide-tape: valid should be 0 now\n"); - tape->valid = 0; - } - tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL); - if (!tape->buf) - return -ENOMEM; - tape->chrdev_dir = IDETAPE_DIR_READ; - tape->cur = tape->buf; + BUG_ON(dir != IDETAPE_DIR_READ && dir != IDETAPE_DIR_WRITE); - /* - * Issue a read 0 command to ensure that DSC handshake is - * switched from completion mode to buffer available mode. - * No point in issuing this if DSC overlap isn't supported, some - * drives (Seagate STT3401A) will return an error. - */ - if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) { - bytes_read = idetape_queue_rw_tail(drive, - REQ_IDETAPE_READ, 0); - if (bytes_read < 0) { - kfree(tape->buf); - tape->buf = NULL; - tape->chrdev_dir = IDETAPE_DIR_NONE; - return bytes_read; - } + if (tape->chrdev_dir == dir) + return 0; + + if (tape->chrdev_dir == IDETAPE_DIR_READ) + ide_tape_discard_merge_buffer(drive, 1); + else if (tape->chrdev_dir == IDETAPE_DIR_WRITE) { + ide_tape_flush_merge_buffer(drive); + idetape_flush_tape_buffers(drive); + } + + if (tape->buf || tape->valid) { + printk(KERN_ERR "ide-tape: valid should be 0 now\n"); + tape->valid = 0; + } + + tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL); + if (!tape->buf) + return -ENOMEM; + tape->chrdev_dir = dir; + tape->cur = tape->buf; + + /* + * Issue a 0 rw command to ensure that DSC handshake is + * switched from completion mode to buffer available mode. No + * point in issuing this if DSC overlap isn't supported, some + * drives (Seagate STT3401A) will return an error. + */ + if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) { + int cmd = dir == IDETAPE_DIR_READ ? REQ_IDETAPE_READ + : REQ_IDETAPE_WRITE; + + rc = idetape_queue_rw_tail(drive, cmd, 0); + if (rc < 0) { + kfree(tape->buf); + tape->buf = NULL; + tape->chrdev_dir = IDETAPE_DIR_NONE; + return rc; } } @@ -1038,7 +1046,7 @@ static int idetape_add_chrdev_read_request(ide_drive_t *drive, int blocks) if (test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) return 0; - idetape_init_read(drive); + idetape_init_rw(drive, IDETAPE_DIR_READ); return idetape_queue_rw_tail(drive, REQ_IDETAPE_READ, blocks); } @@ -1195,7 +1203,7 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf, (count % tape->blk_size) == 0) tape->user_bs_factor = count / tape->blk_size; } - rc = idetape_init_read(drive); + rc = idetape_init_rw(drive, IDETAPE_DIR_READ); if (rc < 0) return rc; if (count == 0) @@ -1249,6 +1257,7 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf, ssize_t actually_written = 0; ssize_t ret = 0; u16 ctl = *(u16 *)&tape->caps[12]; + int rc; /* The drive is write protected. */ if (tape->write_prot) @@ -1257,36 +1266,9 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf, debug_log(DBG_CHRDEV, "Enter %s, count %Zd\n", __func__, count); /* Initialize write operation */ - if (tape->chrdev_dir != IDETAPE_DIR_WRITE) { - if (tape->chrdev_dir == IDETAPE_DIR_READ) - ide_tape_discard_merge_buffer(drive, 1); - if (tape->buf || tape->valid) { - printk(KERN_ERR "ide-tape: valid should be 0 now\n"); - tape->valid = 0; - } - tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL); - if (!tape->buf) - return -ENOMEM; - tape->chrdev_dir = IDETAPE_DIR_WRITE; - tape->cur = tape->buf; - - /* - * Issue a write 0 command to ensure that DSC handshake is - * switched from completion mode to buffer available mode. No - * point in issuing this if DSC overlap isn't supported, some - * drives (Seagate STT3401A) will return an error. - */ - if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) { - ssize_t retval = idetape_queue_rw_tail(drive, - REQ_IDETAPE_WRITE, 0); - if (retval < 0) { - kfree(tape->buf); - tape->buf = NULL; - tape->chrdev_dir = IDETAPE_DIR_NONE; - return retval; - } - } - } + rc = idetape_init_rw(drive, IDETAPE_DIR_WRITE); + if (rc < 0) + return rc; if (count == 0) return (0); if (tape->valid < tape->buffer_size) { -- cgit v0.10.2 From 6bb11dd14f70228f8dab25fd25dabeb9bc74926d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 08:46:03 +0900 Subject: ide-tape: use byte size instead of sectors on rw issue functions Impact: cleanup Byte size is what most issue functions deal with, make idetape_queue_rw_tail() and its wrappers take byte size instead of sector counts. idetape_chrdev_read() and write() functions are converted to use tape->buffer_size instead of ctl from tape->cap. This cleans up code a little bit and will ease the next r/w reimplementation. Signed-off-by: Tejun Heo diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 4da7fa9..d5e9bb2 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -879,15 +879,15 @@ static void ide_tape_discard_merge_buffer(ide_drive_t *drive, * Generate a read/write request for the block device interface and wait for it * to be serviced. */ -static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks) +static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size) { idetape_tape_t *tape = drive->driver_data; - size_t size = blocks * tape->blk_size; struct request *rq; int ret; debug_log(DBG_SENSE, "%s: cmd=%d\n", __func__, cmd); BUG_ON(cmd != REQ_IDETAPE_READ && cmd != REQ_IDETAPE_WRITE); + BUG_ON(size < 0 || size % tape->blk_size); rq = blk_get_request(drive->queue, READ, __GFP_WAIT); rq->cmd_type = REQ_TYPE_SPECIAL; @@ -954,17 +954,16 @@ static void idetape_create_space_cmd(struct ide_atapi_pc *pc, int count, u8 cmd) } /* Queue up a character device originated write request. */ -static int idetape_add_chrdev_write_request(ide_drive_t *drive, int blocks) +static int idetape_add_chrdev_write_request(ide_drive_t *drive, int size) { debug_log(DBG_CHRDEV, "Enter %s\n", __func__); - return idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, blocks); + return idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, size); } static void ide_tape_flush_merge_buffer(ide_drive_t *drive) { idetape_tape_t *tape = drive->driver_data; - int blocks; if (tape->chrdev_dir != IDETAPE_DIR_WRITE) { printk(KERN_ERR "ide-tape: bug: Trying to empty merge buffer" @@ -972,15 +971,10 @@ static void ide_tape_flush_merge_buffer(ide_drive_t *drive) return; } if (tape->buf) { - blocks = tape->valid / tape->blk_size; - if (tape->valid % tape->blk_size) { - blocks++; - memset(tape->buf + tape->valid, 0, - tape->blk_size - tape->valid % tape->blk_size); - } - (void) idetape_add_chrdev_write_request(drive, blocks); - } - if (tape->buf != NULL) { + size_t aligned = roundup(tape->valid, tape->blk_size); + + memset(tape->cur, 0, aligned - tape->valid); + idetape_add_chrdev_write_request(drive, aligned); kfree(tape->buf); tape->buf = NULL; } @@ -1038,9 +1032,9 @@ static int idetape_init_rw(ide_drive_t *drive, int dir) } /* called from idetape_chrdev_read() to service a chrdev read request. */ -static int idetape_add_chrdev_read_request(ide_drive_t *drive, int blocks) +static int idetape_add_chrdev_read_request(ide_drive_t *drive, int size) { - debug_log(DBG_PROCS, "Enter %s, %d blocks\n", __func__, blocks); + debug_log(DBG_PROCS, "Enter %s, %d bytes\n", __func__, size); /* If we are at a filemark, return a read length of 0 */ if (test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) @@ -1048,7 +1042,7 @@ static int idetape_add_chrdev_read_request(ide_drive_t *drive, int blocks) idetape_init_rw(drive, IDETAPE_DIR_READ); - return idetape_queue_rw_tail(drive, REQ_IDETAPE_READ, blocks); + return idetape_queue_rw_tail(drive, REQ_IDETAPE_READ, size); } static void idetape_pad_zeros(ide_drive_t *drive, int bcount) @@ -1060,8 +1054,7 @@ static void idetape_pad_zeros(ide_drive_t *drive, int bcount) while (bcount) { unsigned int count = min(tape->buffer_size, bcount); - idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, - count / tape->blk_size); + idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, count); bcount -= count; } } @@ -1193,7 +1186,6 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf, ide_drive_t *drive = tape->drive; ssize_t bytes_read, temp, actually_read = 0, rc; ssize_t ret = 0; - u16 ctl = *(u16 *)&tape->caps[12]; debug_log(DBG_CHRDEV, "Enter %s, count %Zd\n", __func__, count); @@ -1218,7 +1210,8 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf, tape->valid -= actually_read; } while (count >= tape->buffer_size) { - bytes_read = idetape_add_chrdev_read_request(drive, ctl); + bytes_read = idetape_add_chrdev_read_request(drive, + tape->buffer_size); if (bytes_read <= 0) goto finish; if (copy_to_user(buf, tape->cur, bytes_read)) @@ -1228,7 +1221,8 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf, actually_read += bytes_read; } if (count) { - bytes_read = idetape_add_chrdev_read_request(drive, ctl); + bytes_read = idetape_add_chrdev_read_request(drive, + tape->buffer_size); if (bytes_read <= 0) goto finish; temp = min((unsigned long)count, (unsigned long)bytes_read); @@ -1256,7 +1250,6 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf, ide_drive_t *drive = tape->drive; ssize_t actually_written = 0; ssize_t ret = 0; - u16 ctl = *(u16 *)&tape->caps[12]; int rc; /* The drive is write protected. */ @@ -1284,7 +1277,8 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf, if (tape->valid == tape->buffer_size) { ssize_t retval; - retval = idetape_add_chrdev_write_request(drive, ctl); + retval = idetape_add_chrdev_write_request(drive, + tape->buffer_size); if (retval <= 0) return (retval); } @@ -1295,7 +1289,8 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf, ret = -EFAULT; buf += tape->buffer_size; count -= tape->buffer_size; - retval = idetape_add_chrdev_write_request(drive, ctl); + retval = idetape_add_chrdev_write_request(drive, + tape->buffer_size); actually_written += tape->buffer_size; if (retval <= 0) return (retval); -- cgit v0.10.2 From 07bd9686c50c2b1f10e48089d4fb836a971f5177 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 08:46:03 +0900 Subject: ide-tape: simplify read/write functions Impact: cleanup idetape_chrdev_read/write() functions are unnecessarily complex when everything can be handled in a single loop. Collapse idetape_add_chrdev_read/write_request() into the rw functions and simplify the implementation. Signed-off-by: Tejun Heo diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index d5e9bb2..2599579 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -953,14 +953,6 @@ static void idetape_create_space_cmd(struct ide_atapi_pc *pc, int count, u8 cmd) pc->flags |= PC_FLAG_WAIT_FOR_DSC; } -/* Queue up a character device originated write request. */ -static int idetape_add_chrdev_write_request(ide_drive_t *drive, int size) -{ - debug_log(DBG_CHRDEV, "Enter %s\n", __func__); - - return idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, size); -} - static void ide_tape_flush_merge_buffer(ide_drive_t *drive) { idetape_tape_t *tape = drive->driver_data; @@ -974,7 +966,7 @@ static void ide_tape_flush_merge_buffer(ide_drive_t *drive) size_t aligned = roundup(tape->valid, tape->blk_size); memset(tape->cur, 0, aligned - tape->valid); - idetape_add_chrdev_write_request(drive, aligned); + idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, aligned); kfree(tape->buf); tape->buf = NULL; } @@ -1031,20 +1023,6 @@ static int idetape_init_rw(ide_drive_t *drive, int dir) return 0; } -/* called from idetape_chrdev_read() to service a chrdev read request. */ -static int idetape_add_chrdev_read_request(ide_drive_t *drive, int size) -{ - debug_log(DBG_PROCS, "Enter %s, %d bytes\n", __func__, size); - - /* If we are at a filemark, return a read length of 0 */ - if (test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) - return 0; - - idetape_init_rw(drive, IDETAPE_DIR_READ); - - return idetape_queue_rw_tail(drive, REQ_IDETAPE_READ, size); -} - static void idetape_pad_zeros(ide_drive_t *drive, int bcount) { idetape_tape_t *tape = drive->driver_data; @@ -1184,8 +1162,9 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf, { struct ide_tape_obj *tape = file->private_data; ide_drive_t *drive = tape->drive; - ssize_t bytes_read, temp, actually_read = 0, rc; + size_t done = 0; ssize_t ret = 0; + int rc; debug_log(DBG_CHRDEV, "Enter %s, count %Zd\n", __func__, count); @@ -1195,52 +1174,43 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf, (count % tape->blk_size) == 0) tape->user_bs_factor = count / tape->blk_size; } + rc = idetape_init_rw(drive, IDETAPE_DIR_READ); if (rc < 0) return rc; - if (count == 0) - return (0); - if (tape->valid) { - actually_read = min_t(unsigned int, tape->valid, count); - if (copy_to_user(buf, tape->cur, actually_read)) - ret = -EFAULT; - buf += actually_read; - count -= actually_read; - tape->cur += actually_read; - tape->valid -= actually_read; - } - while (count >= tape->buffer_size) { - bytes_read = idetape_add_chrdev_read_request(drive, - tape->buffer_size); - if (bytes_read <= 0) - goto finish; - if (copy_to_user(buf, tape->cur, bytes_read)) - ret = -EFAULT; - buf += bytes_read; - count -= bytes_read; - actually_read += bytes_read; - } - if (count) { - bytes_read = idetape_add_chrdev_read_request(drive, - tape->buffer_size); - if (bytes_read <= 0) - goto finish; - temp = min((unsigned long)count, (unsigned long)bytes_read); - if (copy_to_user(buf, tape->cur, temp)) + + while (done < count) { + size_t todo; + + /* refill if staging buffer is empty */ + if (!tape->valid) { + /* If we are at a filemark, nothing more to read */ + if (test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) + break; + /* read */ + if (idetape_queue_rw_tail(drive, REQ_IDETAPE_READ, + tape->buffer_size) <= 0) + break; + } + + /* copy out */ + todo = min_t(size_t, count - done, tape->valid); + if (copy_to_user(buf + done, tape->cur, todo)) ret = -EFAULT; - actually_read += temp; - tape->cur += temp; - tape->valid -= temp; + + tape->cur += todo; + tape->valid -= todo; + done += todo; } -finish: - if (!actually_read && test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) { + + if (!done && test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) { debug_log(DBG_SENSE, "%s: spacing over filemark\n", tape->name); idetape_space_over_filemarks(drive, MTFSF, 1); return 0; } - return ret ? ret : actually_read; + return ret ? ret : done; } static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf, @@ -1248,7 +1218,7 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf, { struct ide_tape_obj *tape = file->private_data; ide_drive_t *drive = tape->drive; - ssize_t actually_written = 0; + size_t done = 0; ssize_t ret = 0; int rc; @@ -1262,47 +1232,28 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf, rc = idetape_init_rw(drive, IDETAPE_DIR_WRITE); if (rc < 0) return rc; - if (count == 0) - return (0); - if (tape->valid < tape->buffer_size) { - actually_written = min_t(unsigned int, - tape->buffer_size - tape->valid, - count); - if (copy_from_user(tape->cur, buf, actually_written)) - ret = -EFAULT; - buf += actually_written; - count -= actually_written; - tape->cur += actually_written; - tape->valid += actually_written; - - if (tape->valid == tape->buffer_size) { - ssize_t retval; - retval = idetape_add_chrdev_write_request(drive, - tape->buffer_size); - if (retval <= 0) - return (retval); - } - } - while (count >= tape->buffer_size) { - ssize_t retval; - if (copy_from_user(tape->cur, buf, tape->buffer_size)) - ret = -EFAULT; - buf += tape->buffer_size; - count -= tape->buffer_size; - retval = idetape_add_chrdev_write_request(drive, - tape->buffer_size); - actually_written += tape->buffer_size; - if (retval <= 0) - return (retval); - } - if (count) { - actually_written += count; - if (copy_from_user(tape->cur, buf, count)) + + while (done < count) { + size_t todo; + + /* flush if staging buffer is full */ + if (tape->valid == tape->buffer_size && + idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, + tape->buffer_size) <= 0) + return rc; + + /* copy in */ + todo = min_t(size_t, count - done, + tape->buffer_size - tape->valid); + if (copy_from_user(tape->cur, buf + done, todo)) ret = -EFAULT; - tape->cur += count; - tape->valid += count; + + tape->cur += todo; + tape->valid += todo; + done += todo; } - return ret ? ret : actually_written; + + return ret ? ret : done; } static int idetape_write_filemark(ide_drive_t *drive) -- cgit v0.10.2 From 6d7003877c2f0578f1c08f66d05c3f72ef4ae596 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 08:46:03 +0900 Subject: ide-atapi: kill unused fields and callbacks Impact: remove fields and code paths which are no longer necessary Now that ide-tape uses standard mechanisms to transfer data, special case handling for bh handling can be dropped from ide-atapi. Drop the followings. * pc->cur_pos, b_count, bh and b_data * drive->pc_update_buffers() and pc_io_buffers(). Signed-off-by: Tejun Heo diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index b9dd450..afe5a43 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -359,11 +359,8 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) drive->name, rq_data_dir(pc->rq) ? "write" : "read"); pc->flags |= PC_FLAG_DMA_ERROR; - } else { + } else pc->xferred = pc->req_xfer; - if (drive->pc_update_buffers) - drive->pc_update_buffers(drive, pc); - } debug_log("%s: DMA finished\n", drive->name); } @@ -463,16 +460,11 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) return ide_do_reset(drive); } - if (drive->media == ide_tape && pc->bh) - done = drive->pc_io_buffers(drive, pc, bcount, write); - else { - done = min_t(unsigned int, bcount, cmd->nleft); - ide_pio_bytes(drive, cmd, write, done); - } + done = min_t(unsigned int, bcount, cmd->nleft); + ide_pio_bytes(drive, cmd, write, done); - /* Update the current position */ + /* Update transferred byte count */ pc->xferred += done; - pc->cur_pos += done; bcount -= done; @@ -650,7 +642,6 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, struct ide_cmd *cmd) /* We haven't transferred any data yet */ pc->xferred = 0; - pc->cur_pos = pc->buf; valid_tf = IDE_VALID_DEVICE; bcount = ((drive->media == ide_tape) ? diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 2599579..8dfc688 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -591,7 +591,6 @@ static void ide_tape_create_rw_cmd(idetape_tape_t *tape, ide_init_pc(pc); put_unaligned(cpu_to_be32(length), (unsigned int *) &pc->c[1]); pc->c[1] = 1; - pc->bh = NULL; pc->buf = NULL; pc->buf_size = length * tape->blk_size; pc->req_xfer = pc->buf_size; diff --git a/include/linux/ide.h b/include/linux/ide.h index 1957461..34c128f 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -362,11 +362,7 @@ struct ide_atapi_pc { /* data buffer */ u8 *buf; - /* current buffer position */ - u8 *cur_pos; int buf_size; - /* missing/available data on the current buffer */ - int b_count; /* the corresponding request */ struct request *rq; @@ -379,10 +375,6 @@ struct ide_atapi_pc { */ u8 pc_buf[IDE_PC_BUFFER_SIZE]; - /* idetape only */ - struct idetape_bh *bh; - char *b_data; - unsigned long timeout; }; @@ -595,10 +587,6 @@ struct ide_drive_s { /* callback for packet commands */ int (*pc_callback)(struct ide_drive_s *, int); - void (*pc_update_buffers)(struct ide_drive_s *, struct ide_atapi_pc *); - int (*pc_io_buffers)(struct ide_drive_s *, struct ide_atapi_pc *, - unsigned int, int); - ide_startstop_t (*irq_handler)(struct ide_drive_s *); unsigned long atapi_flags; -- cgit v0.10.2 From 2c316bb57ad4e9f0f3de2d7ef1ae85530c2a7e69 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 08:46:03 +0900 Subject: ide: drop rq->data handling from ide_map_sg() Impact: remove code path which is no longer necessary All IDE data transfers now use rq->bio. Simplify ide_map_sg() accordingly. Signed-off-by: Tejun Heo Cc: Jens Axboe diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 6e3094e..a0309ea 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -248,11 +248,7 @@ void ide_map_sg(ide_drive_t *drive, struct ide_cmd *cmd) struct scatterlist *sg = hwif->sg_table; struct request *rq = cmd->rq; - if (!rq->bio) { - sg_init_one(sg, rq->data, rq->data_len); - cmd->sg_nents = 1; - } else - cmd->sg_nents = blk_rq_map_sg(drive->queue, rq, sg); + cmd->sg_nents = blk_rq_map_sg(drive->queue, rq, sg); } EXPORT_SYMBOL_GPL(ide_map_sg); -- cgit v0.10.2 From e475eedb09ee9a0fd855f3e923aa9af31c17d141 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 15 Apr 2009 10:50:04 +0000 Subject: clocksource: sh_cmt earlytimer support Add Early Platform Driver support to the sh_cmt driver using the earlytimer class. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c index 1c92c39..02bae39 100644 --- a/drivers/clocksource/sh_cmt.c +++ b/drivers/clocksource/sh_cmt.c @@ -566,9 +566,19 @@ static int sh_cmt_setup(struct sh_cmt_priv *p, struct platform_device *pdev) static int __devinit sh_cmt_probe(struct platform_device *pdev) { struct sh_cmt_priv *p = platform_get_drvdata(pdev); + struct sh_cmt_config *cfg = pdev->dev.platform_data; int ret; - p = kmalloc(sizeof(*p), GFP_KERNEL); + if (p) { + pr_info("sh_cmt: %s kept as earlytimer\n", cfg->name); + return 0; + } + + if (is_early_platform_device(pdev)) + p = alloc_bootmem(sizeof(*p)); + else + p = kmalloc(sizeof(*p), GFP_KERNEL); + if (p == NULL) { dev_err(&pdev->dev, "failed to allocate driver data\n"); return -ENOMEM; @@ -576,7 +586,10 @@ static int __devinit sh_cmt_probe(struct platform_device *pdev) ret = sh_cmt_setup(p, pdev); if (ret) { - kfree(p); + if (is_early_platform_device(pdev)) + free_bootmem(__pa(p), sizeof(*p)); + else + kfree(p); platform_set_drvdata(pdev, NULL); } @@ -606,6 +619,7 @@ static void __exit sh_cmt_exit(void) platform_driver_unregister(&sh_cmt_device_driver); } +early_platform_init("earlytimer", &sh_cmt_device_driver); module_init(sh_cmt_init); module_exit(sh_cmt_exit); -- cgit v0.10.2 From eaab89197b733d0f81f07d6c44294b674479fda8 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 15 Apr 2009 10:50:12 +0000 Subject: sh: arch earlytimer support Extend the 32-bit SuperH timer code to register and probe the earlytimer class of Early Platform Drivers. This registers the sh_cmt driver if compiled-in. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt diff --git a/arch/sh/kernel/time_32.c b/arch/sh/kernel/time_32.c index c34e1e0..04b8c6a 100644 --- a/arch/sh/kernel/time_32.c +++ b/arch/sh/kernel/time_32.c @@ -17,6 +17,7 @@ #include #include #include /* for rtc_lock */ +#include #include #include #include @@ -229,6 +230,14 @@ void __init time_init(void) #endif /* + * Make sure all compiled-in early timers register themselves. + * Run probe() for one "earlytimer" device. + */ + early_platform_driver_register_all("earlytimer"); + if (early_platform_driver_probe("earlytimer", 1, 0)) + return; + + /* * Find the timer to use as the system timer, it will be * initialized for us. */ -- cgit v0.10.2 From 87a00dc059e3af46303f1f56b0e8df41af988c7b Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 15 Apr 2009 10:50:21 +0000 Subject: sh: Add plat_early_device_setup() Add a plat_early_device_setup() function to allow processor-specific code to register Early Platform Data. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt diff --git a/arch/sh/include/asm/device.h b/arch/sh/include/asm/device.h index efd511d..8688a88 100644 --- a/arch/sh/include/asm/device.h +++ b/arch/sh/include/asm/device.h @@ -10,3 +10,5 @@ struct platform_device; int platform_resource_setup_memory(struct platform_device *pdev, char *name, unsigned long memsize); +void plat_early_device_setup(void); + diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index 04a6004..22b976d 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -328,6 +329,10 @@ static int __init parse_elfcorehdr(char *arg) early_param("elfcorehdr", parse_elfcorehdr); #endif +void __init __attribute__ ((weak)) plat_early_device_setup(void) +{ +} + void __init setup_arch(char **cmdline_p) { enable_mmu(); @@ -381,6 +386,8 @@ void __init setup_arch(char **cmdline_p) parse_early_param(); + plat_early_device_setup(); + sh_mv_setup(); /* -- cgit v0.10.2 From 28fde6863e6ed1f4bfb7cef080e67bf439c20628 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 15 Apr 2009 10:50:30 +0000 Subject: sh: Early Platform Data for SuperH Mobile Use plat_early_device_setup() to register Early Platform Data for SuperH Mobile processors. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7343.c b/arch/sh/kernel/cpu/sh4a/setup-sh7343.c index c154938..cb5b4db 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7343.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7343.c @@ -234,6 +234,16 @@ static int __init sh7343_devices_setup(void) } __initcall(sh7343_devices_setup); +static struct platform_device *sh7343_early_devices[] __initdata = { + &cmt_device, +}; + +void __init plat_early_device_setup(void) +{ + early_platform_add_devices(sh7343_early_devices, + ARRAY_SIZE(sh7343_early_devices)); +} + enum { UNUSED = 0, diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c index 93ecf8e..2a771f4 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c @@ -226,6 +226,16 @@ static int __init sh7366_devices_setup(void) } __initcall(sh7366_devices_setup); +static struct platform_device *sh7366_early_devices[] __initdata = { + &cmt_device, +}; + +void __init plat_early_device_setup(void) +{ + early_platform_add_devices(sh7366_early_devices, + ARRAY_SIZE(sh7366_early_devices)); +} + enum { UNUSED=0, diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c index 0e5d204..8f974d0 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c @@ -270,6 +270,16 @@ static int __init sh7722_devices_setup(void) } __initcall(sh7722_devices_setup); +static struct platform_device *sh7722_early_devices[] __initdata = { + &cmt_device, +}; + +void __init plat_early_device_setup(void) +{ + early_platform_add_devices(sh7722_early_devices, + ARRAY_SIZE(sh7722_early_devices)); +} + enum { UNUSED=0, diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c index 5338dac..21a58d6 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c @@ -281,6 +281,16 @@ static int __init sh7723_devices_setup(void) } __initcall(sh7723_devices_setup); +static struct platform_device *sh7723_early_devices[] __initdata = { + &cmt_device, +}; + +void __init plat_early_device_setup(void) +{ + early_platform_add_devices(sh7723_early_devices, + ARRAY_SIZE(sh7723_early_devices)); +} + enum { UNUSED=0, -- cgit v0.10.2 From d9aed8b95f062b2f96e37b0e07373f36a6e7066d Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Sun, 19 Apr 2009 13:12:04 +0900 Subject: sh: sh7724: Don't default enable the RTC clock. rtc-sh takes care of this now, so no need to have this always enabled. Signed-off-by: Paul Mundt diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c index 8b87ba8..195274a 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c @@ -275,7 +275,6 @@ static struct platform_device *sh7724_devices[] __initdata = { static int __init sh7724_devices_setup(void) { - clk_always_enable("rtc0"); /* RTC */ clk_always_enable("vpu0"); /* VPU */ clk_always_enable("veu1"); /* VEU3F1 */ clk_always_enable("veu0"); /* VEU3F0 */ -- cgit v0.10.2 From 8fb2bae4b41eb64f6e233e9bd3f3a789fbb04a06 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Sun, 19 Apr 2009 13:14:29 +0900 Subject: sh: sh7724: Register CMT as an early platform device here too. Follows the SH7722/SH7723 changes. Signed-off-by: Paul Mundt diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c index 195274a..8429396 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c @@ -288,6 +288,16 @@ static int __init sh7724_devices_setup(void) } device_initcall(sh7724_devices_setup); +static struct platform_device *sh7724_early_devices[] __initdata = { + &cmt_device, +}; + +void __init plat_early_device_setup(void) +{ + early_platform_add_devices(sh7724_early_devices, + ARRAY_SIZE(sh7724_early_devices)); +} + enum { UNUSED = 0, -- cgit v0.10.2 From 2b2fd87a6ef56ba7647a578e81bb8c8efda166b8 Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Fri, 17 Apr 2009 16:42:12 +0800 Subject: docs, x86: add nox2apic back to kernel-parameters.txt "nox2apic" was removed from kernel-parameters.txt by mistake, when entries were sorted in alpha order (commit 0cb55ad2). But this early parameter is still there, add it back to kernel-parameters.txt. [ Impact: add boot parameter description ] Signed-off-by: Suresh Siddha Signed-off-by: Weidong Han Acked-by: David Woodhouse Cc: Randy Dunlap Cc: iommu@lists.linux-foundation.org Cc: allen.m.kay@intel.com Cc: fenghua.yu@intel.com LKML-Reference: <1239957736-6161-2-git-send-email-weidong.han@intel.com> Signed-off-by: Ingo Molnar diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 6172e43..33989d2 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1588,6 +1588,8 @@ and is between 256 and 4096 characters. It is defined in the file nowb [ARM] + nox2apic [X86-64,APIC] Do not enable x2APIC mode. + nptcg= [IA64] Override max number of concurrent global TLB purges which is reported from either PAL_VM_SUMMARY or SAL PALO. -- cgit v0.10.2 From 5d0ae2db6deac4f15dac4f42f23bc56448fc8d4d Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Fri, 17 Apr 2009 16:42:13 +0800 Subject: x86, intr-remap: fix ack for interrupt remapping Shouldn't call ack_apic_edge() in ir_ack_apic_edge(), because ack_apic_edge() does more than just ack: it also does irq migration in the non-interrupt-remapping case. But there is no such need for interrupt-remapping case, as irq migration is done in the process context. Similarly, ir_ack_apic_level() shouldn't call ack_apic_level, and instead should do the local cpu's EOI + directed EOI to the io-apic. ack_x2APIC_irq() is not neccessary, because ack_APIC_irq() will use MSR write for x2apic, and uncached write for non-x2apic. [ Impact: simplify/standardize intr-remap IRQ acking, fix on !x2apic ] Signed-off-by: Suresh Siddha Signed-off-by: Weidong Han Acked-by: David Woodhouse Cc: iommu@lists.linux-foundation.org Cc: allen.m.kay@intel.com Cc: fenghua.yu@intel.com LKML-Reference: <1239957736-6161-3-git-send-email-weidong.han@intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 2bd5a46..d4cb7e5 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -204,14 +204,6 @@ static inline int x2apic_enabled(void) extern int get_physical_broadcast(void); -#ifdef CONFIG_X86_X2APIC -static inline void ack_x2APIC_irq(void) -{ - /* Docs say use 0 for future compatibility */ - native_apic_msr_write(APIC_EOI, 0); -} -#endif - extern void apic_disable(void); extern int lapic_get_maxlvt(void); extern void clear_local_APIC(void); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 8499000..ea22a86 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2552,20 +2552,6 @@ eoi_ioapic_irq(struct irq_desc *desc) spin_unlock_irqrestore(&ioapic_lock, flags); } -#ifdef CONFIG_X86_X2APIC -static void ack_x2apic_level(unsigned int irq) -{ - struct irq_desc *desc = irq_to_desc(irq); - ack_x2APIC_irq(); - eoi_ioapic_irq(desc); -} - -static void ack_x2apic_edge(unsigned int irq) -{ - ack_x2APIC_irq(); -} -#endif - static void ack_apic_edge(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); @@ -2629,9 +2615,6 @@ static void ack_apic_level(unsigned int irq) */ ack_APIC_irq(); - if (irq_remapped(irq)) - eoi_ioapic_irq(desc); - /* Now we can move and renable the irq */ if (unlikely(do_unmask_irq)) { /* Only migrate the irq if the ack has been received. @@ -2680,20 +2663,15 @@ static void ack_apic_level(unsigned int irq) #ifdef CONFIG_INTR_REMAP static void ir_ack_apic_edge(unsigned int irq) { -#ifdef CONFIG_X86_X2APIC - if (x2apic_enabled()) - return ack_x2apic_edge(irq); -#endif - return ack_apic_edge(irq); + ack_APIC_irq(); } static void ir_ack_apic_level(unsigned int irq) { -#ifdef CONFIG_X86_X2APIC - if (x2apic_enabled()) - return ack_x2apic_level(irq); -#endif - return ack_apic_level(irq); + struct irq_desc *desc = irq_to_desc(irq); + + ack_APIC_irq(); + eoi_ioapic_irq(desc); } #endif /* CONFIG_INTR_REMAP */ -- cgit v0.10.2 From 937582382c71b75b29fbb92615629494e1a05ac0 Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Fri, 17 Apr 2009 16:42:14 +0800 Subject: x86, intr-remap: enable interrupt remapping early Currently, when x2apic is not enabled, interrupt remapping will be enabled in init_dmars(), where it is too late to remap ioapic interrupts, that is, ioapic interrupts are really in compatibility mode, not remappable mode. This patch always enables interrupt remapping before ioapic setup, it guarantees all interrupts will be remapped when interrupt remapping is enabled. Thus it doesn't need to set the compatibility interrupt bit. [ Impact: refactor intr-remap init sequence, enable fuller remap mode ] Signed-off-by: Suresh Siddha Signed-off-by: Weidong Han Acked-by: David Woodhouse Cc: iommu@lists.linux-foundation.org Cc: allen.m.kay@intel.com Cc: fenghua.yu@intel.com LKML-Reference: <1239957736-6161-4-git-send-email-weidong.han@intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index d4cb7e5..fbdd654 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -169,7 +169,6 @@ static inline u64 native_x2apic_icr_read(void) extern int x2apic, x2apic_phys; extern void check_x2apic(void); extern void enable_x2apic(void); -extern void enable_IR_x2apic(void); extern void x2apic_icr_write(u32 low, u32 id); static inline int x2apic_enabled(void) { @@ -190,18 +189,18 @@ static inline void check_x2apic(void) static inline void enable_x2apic(void) { } -static inline void enable_IR_x2apic(void) -{ -} static inline int x2apic_enabled(void) { return 0; } #define x2apic 0 +#define x2apic_preenabled 0 #endif +extern void enable_IR_x2apic(void); + extern int get_physical_broadcast(void); extern void apic_disable(void); diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 83e47fe..0cf1eea 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -141,6 +141,8 @@ static int x2apic_preenabled; static int disable_x2apic; static __init int setup_nox2apic(char *str) { + if (x2apic_enabled()) + panic("Bios already enabled x2apic, can't enforce nox2apic"); disable_x2apic = 1; setup_clear_cpu_cap(X86_FEATURE_X2APIC); return 0; @@ -1345,6 +1347,7 @@ void enable_x2apic(void) wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); } } +#endif /* CONFIG_X86_X2APIC */ void __init enable_IR_x2apic(void) { @@ -1353,32 +1356,21 @@ void __init enable_IR_x2apic(void) unsigned long flags; struct IO_APIC_route_entry **ioapic_entries = NULL; - if (!cpu_has_x2apic) - return; - - if (!x2apic_preenabled && disable_x2apic) { - pr_info("Skipped enabling x2apic and Interrupt-remapping " - "because of nox2apic\n"); - return; + ret = dmar_table_init(); + if (ret) { + pr_debug("dmar_table_init() failed with %d:\n", ret); + goto ir_failed; } - if (x2apic_preenabled && disable_x2apic) - panic("Bios already enabled x2apic, can't enforce nox2apic"); - - if (!x2apic_preenabled && skip_ioapic_setup) { - pr_info("Skipped enabling x2apic and Interrupt-remapping " - "because of skipping io-apic setup\n"); - return; + if (!intr_remapping_supported()) { + pr_debug("intr-remapping not supported\n"); + goto ir_failed; } - ret = dmar_table_init(); - if (ret) { - pr_info("dmar_table_init() failed with %d:\n", ret); - if (x2apic_preenabled) - panic("x2apic enabled by bios. But IR enabling failed"); - else - pr_info("Not enabling x2apic,Intr-remapping\n"); + if (!x2apic_preenabled && skip_ioapic_setup) { + pr_info("Skipped enabling intr-remap because of skipping " + "io-apic setup\n"); return; } @@ -1398,20 +1390,25 @@ void __init enable_IR_x2apic(void) mask_IO_APIC_setup(ioapic_entries); mask_8259A(); - ret = enable_intr_remapping(EIM_32BIT_APIC_ID); - - if (ret && x2apic_preenabled) { - local_irq_restore(flags); - panic("x2apic enabled by bios. But IR enabling failed"); - } +#ifdef CONFIG_X86_X2APIC + if (cpu_has_x2apic) + ret = enable_intr_remapping(EIM_32BIT_APIC_ID); + else +#endif + ret = enable_intr_remapping(EIM_8BIT_APIC_ID); if (ret) goto end_restore; - if (!x2apic) { + pr_info("Enabled Interrupt-remapping\n"); + +#ifdef CONFIG_X86_X2APIC + if (cpu_has_x2apic && !x2apic) { x2apic = 1; enable_x2apic(); + pr_info("Enabled x2apic\n"); } +#endif end_restore: if (ret) @@ -1426,30 +1423,29 @@ end_restore: local_irq_restore(flags); end: - if (!ret) { - if (!x2apic_preenabled) - pr_info("Enabled x2apic and interrupt-remapping\n"); - else - pr_info("Enabled Interrupt-remapping\n"); - } else - pr_err("Failed to enable Interrupt-remapping and x2apic\n"); if (ioapic_entries) free_ioapic_entries(ioapic_entries); + + if (!ret) + return; + +ir_failed: + if (x2apic_preenabled) + panic("x2apic enabled by bios. But IR enabling failed"); + else if (cpu_has_x2apic) + pr_info("Not enabling x2apic,Intr-remapping\n"); #else if (!cpu_has_x2apic) return; if (x2apic_preenabled) panic("x2apic enabled prior OS handover," - " enable CONFIG_INTR_REMAP"); - - pr_info("Enable CONFIG_INTR_REMAP for enabling intr-remapping " - " and x2apic\n"); + " enable CONFIG_X86_X2APIC, CONFIG_INTR_REMAP"); #endif return; } -#endif /* CONFIG_X86_X2APIC */ + #ifdef CONFIG_X86_64 /* diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 001b328..9ce8f07 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1968,15 +1968,6 @@ static int __init init_dmars(void) } } -#ifdef CONFIG_INTR_REMAP - if (!intr_remapping_enabled) { - ret = enable_intr_remapping(0); - if (ret) - printk(KERN_ERR - "IOMMU: enable interrupt remapping failed\n"); - } -#endif - /* * For each rmrr * for each dev attached to rmrr diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index f5e0ea7..5c21426 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -423,20 +423,6 @@ static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode) readl, (sts & DMA_GSTS_IRTPS), sts); spin_unlock_irqrestore(&iommu->register_lock, flags); - if (mode == 0) { - spin_lock_irqsave(&iommu->register_lock, flags); - - /* enable comaptiblity format interrupt pass through */ - cmd = iommu->gcmd | DMA_GCMD_CFI; - iommu->gcmd |= DMA_GCMD_CFI; - writel(cmd, iommu->reg + DMAR_GCMD_REG); - - IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, - readl, (sts & DMA_GSTS_CFIS), sts); - - spin_unlock_irqrestore(&iommu->register_lock, flags); - } - /* * global invalidation of interrupt entry cache before enabling * interrupt-remapping. @@ -516,6 +502,20 @@ end: spin_unlock_irqrestore(&iommu->register_lock, flags); } +int __init intr_remapping_supported(void) +{ + struct dmar_drhd_unit *drhd; + + for_each_drhd_unit(drhd) { + struct intel_iommu *iommu = drhd->iommu; + + if (!ecap_ir_support(iommu->ecap)) + return 0; + } + + return 1; +} + int __init enable_intr_remapping(int eim) { struct dmar_drhd_unit *drhd; diff --git a/include/linux/dmar.h b/include/linux/dmar.h index e397dc3..06f592a 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -108,6 +108,7 @@ struct irte { }; #ifdef CONFIG_INTR_REMAP extern int intr_remapping_enabled; +extern int intr_remapping_supported(void); extern int enable_intr_remapping(int); extern void disable_intr_remapping(void); extern int reenable_intr_remapping(int); -- cgit v0.10.2 From 03ea81550676296d94596e4337c771c6ba29f542 Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Fri, 17 Apr 2009 16:42:15 +0800 Subject: x86, intr-remap: add option to disable interrupt remapping Add option "nointremap" to disable interrupt remapping. [ Impact: add new boot option ] Signed-off-by: Suresh Siddha Signed-off-by: Weidong Han Acked-by: David Woodhouse Cc: iommu@lists.linux-foundation.org Cc: allen.m.kay@intel.com Cc: fenghua.yu@intel.com LKML-Reference: <1239957736-6161-5-git-send-email-weidong.han@intel.com> Signed-off-by: Ingo Molnar diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 33989d2..843cb66 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1533,6 +1533,9 @@ and is between 256 and 4096 characters. It is defined in the file noinitrd [RAM] Tells the kernel not to load any configured initial RAM disk. + nointremap [X86-64, Intel-IOMMU] Do not enable interrupt + remapping. + nointroute [IA-64] nojitter [IA64] Disables jitter checking for ITC timers. diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index 5c21426..842039e 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -15,6 +15,14 @@ static struct ioapic_scope ir_ioapic[MAX_IO_APICS]; static int ir_ioapic_num; int intr_remapping_enabled; +static int disable_intremap; +static __init int setup_nointremap(char *str) +{ + disable_intremap = 1; + return 0; +} +early_param("nointremap", setup_nointremap); + struct irq_2_iommu { struct intel_iommu *iommu; u16 irte_index; @@ -506,6 +514,9 @@ int __init intr_remapping_supported(void) { struct dmar_drhd_unit *drhd; + if (disable_intremap) + return 0; + for_each_drhd_unit(drhd) { struct intel_iommu *iommu = drhd->iommu; -- cgit v0.10.2 From 9a2755c3569e4db92bd9b1daadeddb4045b0cccd Mon Sep 17 00:00:00 2001 From: Weidong Han Date: Fri, 17 Apr 2009 16:42:16 +0800 Subject: x86, intr-remap: fix x2apic/intr-remap resume Interrupt remapping was decoupled from x2apic. Shouldn't check x2apic before resume interrupt remapping. Otherwise, interrupt remapping won't be resumed when x2apic is not enabled. [ Impact: fix potential intr-remap resume hang on !x2apic ] Signed-off-by: Suresh Siddha Signed-off-by: Weidong Han Acked-by: David Woodhouse Cc: iommu@lists.linux-foundation.org Cc: allen.m.kay@intel.com Cc: fenghua.yu@intel.com LKML-Reference: <1239957736-6161-6-git-send-email-weidong.han@intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 0cf1eea..7b41a32 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2032,7 +2032,7 @@ static int lapic_resume(struct sys_device *dev) return 0; local_irq_save(flags); - if (x2apic) { + if (intr_remapping_enabled) { ioapic_entries = alloc_ioapic_entries(); if (!ioapic_entries) { WARN(1, "Alloc ioapic_entries in lapic resume failed."); @@ -2048,8 +2048,10 @@ static int lapic_resume(struct sys_device *dev) mask_IO_APIC_setup(ioapic_entries); mask_8259A(); - enable_x2apic(); } + + if (x2apic) + enable_x2apic(); #else if (!apic_pm_state.active) return 0; @@ -2097,10 +2099,12 @@ static int lapic_resume(struct sys_device *dev) apic_read(APIC_ESR); #ifdef CONFIG_INTR_REMAP - if (intr_remapping_enabled) - reenable_intr_remapping(EIM_32BIT_APIC_ID); + if (intr_remapping_enabled) { + if (x2apic) + reenable_intr_remapping(EIM_32BIT_APIC_ID); + else + reenable_intr_remapping(EIM_8BIT_APIC_ID); - if (x2apic) { unmask_8259A(); restore_IO_APIC_setup(ioapic_entries); free_ioapic_entries(ioapic_entries); @@ -2109,7 +2113,6 @@ static int lapic_resume(struct sys_device *dev) local_irq_restore(flags); - return 0; } -- cgit v0.10.2 From cece3155d869a50ba534ed161b5a05e8a29dcad0 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 18 Apr 2009 23:45:28 +0400 Subject: x86: smpboot - wakeup_secondary should be done via __cpuinit section A caller (do_boot_cpu) already has __cpuinit attribute. Since HOTPLUG_CPU depends on SMP && HOTPLUG it doesn't lead to panic at moment. [ Impact: cleanup ] Signed-off-by: Cyrill Gorcunov LKML-Reference: <20090418194528.GD25510@lenovo> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index bf8ad63..d2e8de9 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -504,7 +504,7 @@ void __inquire_remote_apic(int apicid) * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this * won't ... remember to clear down the APIC, etc later. */ -int __devinit +int __cpuinit wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) { unsigned long send_status, accept_status = 0; @@ -538,7 +538,7 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) return (send_status | accept_status); } -static int __devinit +static int __cpuinit wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) { unsigned long send_status, accept_status = 0; -- cgit v0.10.2 From 667c5296cc76fefe0abcb79228952b28d9af45e3 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 19 Apr 2009 11:43:11 +0400 Subject: x86: es7000, uv - use __cpuinit for kicking secondary cpus The caller already has __cpuinit attribute. [ Impact: save memory, address section mismatch warning ] Signed-off-by: Cyrill Gorcunov Cc: Yinghai Lu Cc: Pavel Emelyanov LKML-Reference: <20090419074311.GA8670@lenovo> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 1c11b81..8e07c14 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -145,7 +145,7 @@ es7000_rename_gsi(int ioapic, int gsi) return gsi; } -static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) +static int __cpuinit wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) { unsigned long vect = 0, psaival = 0; diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index de1a50a..873bf71 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -91,7 +91,7 @@ static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask) cpumask_set_cpu(cpu, retmask); } -static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) +static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) { #ifdef CONFIG_SMP unsigned long val; -- cgit v0.10.2 From e057a5e5647a1c9d0d0054fbd298bfa04b3d1cb4 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 19 Apr 2009 23:38:12 +0200 Subject: tracing/core: Add current context on tracing recursion warning In case of tracing recursion detection, we only get the stacktrace. But the current context may be very useful to debug the issue. This patch adds the softirq/hardirq/nmi context with the warning using lockdep context display to have a familiar output. v2: Use printk_once() v3: drop {hardirq,softirq}_context which depend on lockdep, only keep what is part of current->trace_recursion, sufficient to debug the warning source. [ Impact: print context necessary to debug recursion ] Signed-off-by: Frederic Weisbecker diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index b421b0e..bffde63 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1495,6 +1495,13 @@ static int trace_recursive_lock(void) if (unlikely(current->trace_recursion & (1 << level))) { /* Disable all tracing before we do anything else */ tracing_off_permanent(); + + printk_once(KERN_WARNING "Tracing recursion: " + "HC[%lu]:SC[%lu]:NMI[%lu]\n", + hardirq_count() >> HARDIRQ_SHIFT, + softirq_count() >> SOFTIRQ_SHIFT, + in_nmi()); + WARN_ON_ONCE(1); return -1; } -- cgit v0.10.2 From 9ade1217c9ba39ad2f004a898ddfbb815fd5fe74 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 20 Apr 2009 15:38:25 +0900 Subject: sh: pci: Drop asm-generic/pci.h, so we can use our own fixups. The new PCI code wants its own bus<->resource mappings instead of the generic equivalents, so drop the asm-generic include in preparation. Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c index 8aaacc9..6d659cd 100644 --- a/arch/sh/drivers/pci/pci.c +++ b/arch/sh/drivers/pci/pci.c @@ -72,6 +72,20 @@ void __devinit __weak pcibios_fixup_bus(struct pci_bus *bus) pci_read_bridge_bases(bus); } +void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, + struct resource *res) +{ + region->start = res->start; + region->end = res->end; +} + +void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, + struct pci_bus_region *region) +{ + res->start = region->start; + res->end = region->end; +} + void pcibios_align_resource(void *data, struct resource *res, resource_size_t size, resource_size_t align) __attribute__ ((weak)); diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h index bb2c2fc..69cb615 100644 --- a/arch/sh/include/asm/pci.h +++ b/arch/sh/include/asm/pci.h @@ -146,13 +146,34 @@ int pcibios_map_platform_irq(struct pci_dev *dev, u8 slot, u8 pin); int pciauto_assign_resources(int busno, struct pci_channel *hose); #endif -#endif /* __KERNEL__ */ +extern void pcibios_resource_to_bus(struct pci_dev *dev, + struct pci_bus_region *region, struct resource *res); + +extern void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, + struct pci_bus_region *region); + +static inline struct resource * +pcibios_select_root(struct pci_dev *pdev, struct resource *res) +{ + struct resource *root = NULL; + + if (res->flags & IORESOURCE_IO) + root = &ioport_resource; + if (res->flags & IORESOURCE_MEM) + root = &iomem_resource; -/* generic pci stuff */ -#include + return root; +} + +/* Chances are this interrupt is wired PC-style ... */ +static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) +{ + return channel ? 15 : 14; +} /* generic DMA-mapping stuff */ #include +#endif /* __KERNEL__ */ #endif /* __ASM_SH_PCI_H */ -- cgit v0.10.2 From 4c5107e44514a9bde74d0af77982705d602d9e39 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 20 Apr 2009 15:43:36 +0900 Subject: sh: pci: Split out new-style PCI core. This splits off a 'pci-new.c' which is aimed at gradually replacing the pci-auto backend and the arch/sh/drivers/pci/pci.c core respectively. Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/Kconfig b/arch/sh/drivers/pci/Kconfig index 7e816ed..efe8cf9 100644 --- a/arch/sh/drivers/pci/Kconfig +++ b/arch/sh/drivers/pci/Kconfig @@ -18,10 +18,17 @@ config SH_PCIDMA_NONCOHERENT bridge integrated with your SH CPU, refer carefully to the chip specs to see if you can say 'N' here. Otherwise, leave it as 'Y'. +# Temporary config option for transitioning off of PCI_AUTO +config PCI_NEW + bool + depends on PCI + default y if CPU_SUBTYPE_SH7763 || CPU_SUBTYPE_SH7780 || \ + CPU_SUBTYPE_SH7785 + # This is also board-specific config PCI_AUTO bool - depends on PCI + depends on PCI && !PCI_NEW default y config PCI_AUTO_UPDATE_RESOURCES @@ -34,4 +41,3 @@ config PCI_AUTO_UPDATE_RESOURCES for some reason, you have a board that simply refuses to work with its resources updated beyond what they are when the device is powered up, set this to N. Everyone else will want this as Y. - diff --git a/arch/sh/drivers/pci/Makefile b/arch/sh/drivers/pci/Makefile index 5003971..362a1ee 100644 --- a/arch/sh/drivers/pci/Makefile +++ b/arch/sh/drivers/pci/Makefile @@ -2,8 +2,8 @@ # Makefile for the PCI specific kernel interface routines under Linux. # -obj-y += pci.o -obj-$(CONFIG_PCI_AUTO) += pci-auto.o +obj-$(CONFIG_PCI_AUTO) := pci.o pci-auto.o +obj-$(CONFIG_PCI_NEW) := pci-new.o obj-$(CONFIG_CPU_SUBTYPE_SH7751) += pci-sh7751.o ops-sh4.o obj-$(CONFIG_CPU_SUBTYPE_SH7751R) += pci-sh7751.o ops-sh4.o diff --git a/arch/sh/drivers/pci/pci-new.c b/arch/sh/drivers/pci/pci-new.c new file mode 100644 index 0000000..097eb88 --- /dev/null +++ b/arch/sh/drivers/pci/pci-new.c @@ -0,0 +1,248 @@ +/* + * New-style PCI core. + * + * Copyright (c) 2002 M. R. Brown + * Copyright (c) 2004 - 2009 Paul Mundt + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include +#include +#include +#include +#include + +static int __init pcibios_init(void) +{ + struct pci_channel *p; + struct pci_bus *bus; + int busno; + + /* init channels */ + busno = 0; + for (p = board_pci_channels; p->init; p++) { + if (p->init(p) == 0) + p->enabled = 1; + else + pr_err("Unable to init pci channel %d\n", busno); + busno++; + } + + /* scan the buses */ + busno = 0; + for (p = board_pci_channels; p->init; p++) { + if (p->enabled) { + bus = pci_scan_bus(busno, p->pci_ops, p); + busno = bus->subordinate + 1; + + pci_bus_size_bridges(bus); + pci_bus_assign_resources(bus); + pci_enable_bridges(bus); + } + } + + pci_fixup_irqs(pci_common_swizzle, pcibios_map_platform_irq); + + dma_debug_add_bus(&pci_bus_type); + + return 0; +} +subsys_initcall(pcibios_init); + +static void pcibios_fixup_device_resources(struct pci_dev *dev, + struct pci_bus *bus) +{ + /* Update device resources. */ + struct pci_channel *chan = bus->sysdata; + unsigned long offset = 0; + int i; + + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + if (!dev->resource[i].start) + continue; + if (dev->resource[i].flags & IORESOURCE_PCI_FIXED) + continue; + if (dev->resource[i].flags & IORESOURCE_IO) + offset = chan->io_base; + else if (dev->resource[i].flags & IORESOURCE_MEM) + offset = 0; + + dev->resource[i].start += offset; + dev->resource[i].end += offset; + } +} + + +/* + * Called after each bus is probed, but before its children + * are examined. + */ +void __devinit __weak pcibios_fixup_bus(struct pci_bus *bus) +{ + struct pci_dev *dev = bus->self; + struct list_head *ln; + struct pci_channel *chan = bus->sysdata; + + if (!dev) { + bus->resource[0] = chan->io_resource; + bus->resource[1] = chan->mem_resource; + } + + for (ln = bus->devices.next; ln != &bus->devices; ln = ln->next) { + dev = pci_dev_b(ln); + + if ((dev->class >> 8) != PCI_CLASS_BRIDGE_PCI) + pcibios_fixup_device_resources(dev, bus); + } +} + +void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, + struct resource *res) +{ + struct pci_channel *chan = dev->sysdata; + unsigned long offset = 0; + + if (res->flags & IORESOURCE_IO) + offset = chan->io_base; + else if (res->flags & IORESOURCE_MEM) + offset = 0; + + region->start = res->start - offset; + region->end = res->end - offset; +} + +void __devinit +pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, + struct pci_bus_region *region) +{ + struct pci_channel *chan = dev->sysdata; + unsigned long offset = 0; + + if (res->flags & IORESOURCE_IO) + offset = chan->io_base; + else if (res->flags & IORESOURCE_MEM) + offset = 0; + + res->start = region->start + offset; + res->end = region->end + offset; +} + +void pcibios_align_resource(void *data, struct resource *res, + resource_size_t size, resource_size_t align) + __attribute__ ((weak)); + +/* + * We need to avoid collisions with `mirrored' VGA ports + * and other strange ISA hardware, so we always want the + * addresses to be allocated in the 0x000-0x0ff region + * modulo 0x400. + */ +void pcibios_align_resource(void *data, struct resource *res, + resource_size_t size, resource_size_t align) +{ + if (res->flags & IORESOURCE_IO) { + resource_size_t start = res->start; + + if (start & 0x300) { + start = (start + 0x3ff) & ~0x3ff; + res->start = start; + } + } +} + +int pcibios_enable_device(struct pci_dev *dev, int mask) +{ + u16 cmd, old_cmd; + int idx; + struct resource *r; + + pci_read_config_word(dev, PCI_COMMAND, &cmd); + old_cmd = cmd; + for(idx=0; idx<6; idx++) { + if (!(mask & (1 << idx))) + continue; + r = &dev->resource[idx]; + if (!r->start && r->end) { + printk(KERN_ERR "PCI: Device %s not available because " + "of resource collisions\n", pci_name(dev)); + return -EINVAL; + } + if (r->flags & IORESOURCE_IO) + cmd |= PCI_COMMAND_IO; + if (r->flags & IORESOURCE_MEM) + cmd |= PCI_COMMAND_MEMORY; + } + if (dev->resource[PCI_ROM_RESOURCE].start) + cmd |= PCI_COMMAND_MEMORY; + if (cmd != old_cmd) { + printk(KERN_INFO "PCI: Enabling device %s (%04x -> %04x)\n", + pci_name(dev), old_cmd, cmd); + pci_write_config_word(dev, PCI_COMMAND, cmd); + } + return 0; +} + +/* + * If we set up a device for bus mastering, we need to check and set + * the latency timer as it may not be properly set. + */ +static unsigned int pcibios_max_latency = 255; + +void pcibios_set_master(struct pci_dev *dev) +{ + u8 lat; + pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat); + if (lat < 16) + lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency; + else if (lat > pcibios_max_latency) + lat = pcibios_max_latency; + else + return; + printk(KERN_INFO "PCI: Setting latency timer of device %s to %d\n", + pci_name(dev), lat); + pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); +} + +void __init pcibios_update_irq(struct pci_dev *dev, int irq) +{ + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); +} + +void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) +{ + resource_size_t start = pci_resource_start(dev, bar); + resource_size_t len = pci_resource_len(dev, bar); + unsigned long flags = pci_resource_flags(dev, bar); + + if (unlikely(!len || !start)) + return NULL; + if (maxlen && len > maxlen) + len = maxlen; + + /* + * Presently the IORESOURCE_MEM case is a bit special, most + * SH7751 style PCI controllers have PCI memory at a fixed + * location in the address space where no remapping is desired. + * With the IORESOURCE_MEM case more care has to be taken + * to inhibit page table mapping for legacy cores, but this is + * punted off to __ioremap(). + * -- PFM. + */ + if (flags & IORESOURCE_IO) + return ioport_map(start, len); + if (flags & IORESOURCE_MEM) + return ioremap(start, len); + + return NULL; +} +EXPORT_SYMBOL(pci_iomap); + +void pci_iounmap(struct pci_dev *dev, void __iomem *addr) +{ + iounmap(addr); +} +EXPORT_SYMBOL(pci_iounmap); + +EXPORT_SYMBOL(board_pci_channels); -- cgit v0.10.2 From 9833385131fc4e8c52f95320ab899051d1c06831 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 20 Apr 2009 15:51:45 +0900 Subject: sh: pci: HAVE_PCI_MMAP support. Derived from the MIPS version, now uses pgprot_noncached(). Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/Makefile b/arch/sh/drivers/pci/Makefile index 362a1ee..c8eab14 100644 --- a/arch/sh/drivers/pci/Makefile +++ b/arch/sh/drivers/pci/Makefile @@ -1,9 +1,9 @@ # # Makefile for the PCI specific kernel interface routines under Linux. # - -obj-$(CONFIG_PCI_AUTO) := pci.o pci-auto.o -obj-$(CONFIG_PCI_NEW) := pci-new.o +obj-y += pci-lib.o +obj-$(CONFIG_PCI_AUTO) += pci.o pci-auto.o +obj-$(CONFIG_PCI_NEW) += pci-new.o obj-$(CONFIG_CPU_SUBTYPE_SH7751) += pci-sh7751.o ops-sh4.o obj-$(CONFIG_CPU_SUBTYPE_SH7751R) += pci-sh7751.o ops-sh4.o diff --git a/arch/sh/drivers/pci/pci-lib.c b/arch/sh/drivers/pci/pci-lib.c new file mode 100644 index 0000000..1a43a35 --- /dev/null +++ b/arch/sh/drivers/pci/pci-lib.c @@ -0,0 +1,26 @@ +#include +#include +#include +#include +#include + +int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, + enum pci_mmap_state mmap_state, int write_combine) +{ + /* + * I/O space can be accessed via normal processor loads and stores on + * this platform but for now we elect not to do this and portable + * drivers should not do this anyway. + */ + if (mmap_state == pci_mmap_io) + return -EINVAL; + + /* + * Ignore write-combine; for now only return uncached mappings. + */ + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + vma->vm_end - vma->vm_start, + vma->vm_page_prot); +} diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h index 69cb615..46afd44 100644 --- a/arch/sh/include/asm/pci.h +++ b/arch/sh/include/asm/pci.h @@ -40,6 +40,9 @@ extern struct pci_channel board_pci_channels[]; struct pci_dev; +#define HAVE_PCI_MMAP +extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, + enum pci_mmap_state mmap_state, int write_combine); extern void pcibios_set_master(struct pci_dev *dev); static inline void pcibios_penalize_isa_irq(int irq, int active) -- cgit v0.10.2 From a3c0e0d0032d5bbfd7dc04827a257c717d432a5b Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 20 Apr 2009 16:14:29 +0900 Subject: sh: pci: Consolidate pcibios_align_resource() definitions. This introduces a saner pcibios_align_resource() that can be used regardless of whether pci-auto or pci-new are being used, and consolidates it in pci-lib.c. Signed-off-by: Paul Mundt diff --git a/arch/sh/boards/mach-se/7751/pci.c b/arch/sh/boards/mach-se/7751/pci.c index 203b292..9ec64a4 100644 --- a/arch/sh/boards/mach-se/7751/pci.c +++ b/arch/sh/boards/mach-se/7751/pci.c @@ -30,6 +30,9 @@ #define PCIC_WRITE(x,v) writel((v), PCI_REG(x)) #define PCIC_READ(x) readl(PCI_REG(x)) +#define xPCIBIOS_MIN_IO board_pci_channels->io_resource->start +#define xPCIBIOS_MIN_MEM board_pci_channels->mem_resource->start + /* * Description: This function sets up and initializes the pcic, sets * up the BARS, maps the DRAM into the address space etc, etc. @@ -97,12 +100,12 @@ int __init pcibios_init_platform(void) * meaning all calls go straight through... use BUG_ON to * catch erroneous assumption. */ - BUG_ON(PCIBIOS_MIN_MEM != SH7751_PCI_MEMORY_BASE); + BUG_ON(xPCIBIOS_MIN_MEM != SH7751_PCI_MEMORY_BASE); - PCIC_WRITE(SH7751_PCIMBR, PCIBIOS_MIN_MEM); + PCIC_WRITE(SH7751_PCIMBR, xPCIBIOS_MIN_MEM); /* Set IOBR for window containing area specified in pci.h */ - PCIC_WRITE(SH7751_PCIIOBR, (PCIBIOS_MIN_IO & SH7751_PCIIOBR_MASK)); + PCIC_WRITE(SH7751_PCIIOBR, (xPCIBIOS_MIN_IO & SH7751_PCIIOBR_MASK)); /* All done, may as well say so... */ printk("SH7751 PCI: Finished initialization of the PCI controller\n"); diff --git a/arch/sh/drivers/pci/pci-lib.c b/arch/sh/drivers/pci/pci-lib.c index 1a43a35..8ab1a2d 100644 --- a/arch/sh/drivers/pci/pci-lib.c +++ b/arch/sh/drivers/pci/pci-lib.c @@ -4,6 +4,41 @@ #include #include +unsigned long PCIBIOS_MIN_IO = 0x0000; +unsigned long PCIBIOS_MIN_MEM = 0; + +/* + * We need to avoid collisions with `mirrored' VGA ports + * and other strange ISA hardware, so we always want the + * addresses to be allocated in the 0x000-0x0ff region + * modulo 0x400. + */ +void pcibios_align_resource(void *data, struct resource *res, + resource_size_t size, resource_size_t align) +{ + struct pci_dev *dev = data; + struct pci_channel *chan = dev->sysdata; + resource_size_t start = res->start; + + if (res->flags & IORESOURCE_IO) { + if (start < PCIBIOS_MIN_IO + chan->io_resource->start) + start = PCIBIOS_MIN_IO + chan->io_resource->start; + + /* + * Put everything into 0x00-0xff region modulo 0x400. + */ + if (start & 0x300) { + start = (start + 0x3ff) & ~0x3ff; + res->start = start; + } + } else if (res->flags & IORESOURCE_MEM) { + if (start < PCIBIOS_MIN_MEM + chan->mem_resource->start) + start = PCIBIOS_MIN_MEM + chan->mem_resource->start; + } + + res->start = start; +} + int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, enum pci_mmap_state mmap_state, int write_combine) { @@ -24,3 +59,10 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, vma->vm_end - vma->vm_start, vma->vm_page_prot); } + +#ifdef CONFIG_HOTPLUG +EXPORT_SYMBOL(pcibios_resource_to_bus); +EXPORT_SYMBOL(pcibios_bus_to_resource); +EXPORT_SYMBOL(PCIBIOS_MIN_IO); +EXPORT_SYMBOL(PCIBIOS_MIN_MEM); +#endif diff --git a/arch/sh/drivers/pci/pci-new.c b/arch/sh/drivers/pci/pci-new.c index 097eb88..4e9251f 100644 --- a/arch/sh/drivers/pci/pci-new.c +++ b/arch/sh/drivers/pci/pci-new.c @@ -129,29 +129,6 @@ pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, res->end = region->end + offset; } -void pcibios_align_resource(void *data, struct resource *res, - resource_size_t size, resource_size_t align) - __attribute__ ((weak)); - -/* - * We need to avoid collisions with `mirrored' VGA ports - * and other strange ISA hardware, so we always want the - * addresses to be allocated in the 0x000-0x0ff region - * modulo 0x400. - */ -void pcibios_align_resource(void *data, struct resource *res, - resource_size_t size, resource_size_t align) -{ - if (res->flags & IORESOURCE_IO) { - resource_size_t start = res->start; - - if (start & 0x300) { - start = (start + 0x3ff) & ~0x3ff; - res->start = start; - } - } -} - int pcibios_enable_device(struct pci_dev *dev, int mask) { u16 cmd, old_cmd; diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c index 6d659cd..f670988 100644 --- a/arch/sh/drivers/pci/pci.c +++ b/arch/sh/drivers/pci/pci.c @@ -86,29 +86,6 @@ void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, res->end = region->end; } -void pcibios_align_resource(void *data, struct resource *res, - resource_size_t size, resource_size_t align) - __attribute__ ((weak)); - -/* - * We need to avoid collisions with `mirrored' VGA ports - * and other strange ISA hardware, so we always want the - * addresses to be allocated in the 0x000-0x0ff region - * modulo 0x400. - */ -void pcibios_align_resource(void *data, struct resource *res, - resource_size_t size, resource_size_t align) -{ - if (res->flags & IORESOURCE_IO) { - resource_size_t start = res->start; - - if (start & 0x300) { - start = (start + 0x3ff) & ~0x3ff; - res->start = start; - } - } -} - int pcibios_enable_device(struct pci_dev *dev, int mask) { u16 cmd, old_cmd; diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h index 46afd44..5212bf6 100644 --- a/arch/sh/include/asm/pci.h +++ b/arch/sh/include/asm/pci.h @@ -33,10 +33,7 @@ struct pci_channel { */ extern struct pci_channel board_pci_channels[]; -/* ugly as hell, but makes drivers/pci/setup-res.c compile and work */ -#define __PCI_CHAN(bus) ((struct pci_channel *)bus->sysdata) -#define PCIBIOS_MIN_IO __PCI_CHAN(bus)->io_resource->start -#define PCIBIOS_MIN_MEM __PCI_CHAN(bus)->mem_resource->start +extern unsigned long PCIBIOS_MIN_IO, PCIBIOS_MIN_MEM; struct pci_dev; -- cgit v0.10.2 From 394b6d2fe624246e258a218dac68d44fe9a8411f Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 20 Apr 2009 16:18:46 +0900 Subject: sh: pci: Kill off unused pcibios_fixup(). This is left over cruft that hasn't been used by anything in a long time, kill off bits that weren't purged previously. Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/ops-snapgear.c b/arch/sh/drivers/pci/ops-snapgear.c index dd2c5df..b64f2b9 100644 --- a/arch/sh/drivers/pci/ops-snapgear.c +++ b/arch/sh/drivers/pci/ops-snapgear.c @@ -85,8 +85,3 @@ int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin) return irq; } - -void __init pcibios_fixup(void) -{ - /* Nothing to fixup .. */ -} diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h index 5212bf6..e8265fd 100644 --- a/arch/sh/include/asm/pci.h +++ b/arch/sh/include/asm/pci.h @@ -138,7 +138,6 @@ static inline void __iomem *__get_pci_io_base(unsigned long port, #endif /* Board-specific fixup routines. */ -void pcibios_fixup(void); int pcibios_init_platform(void); int pcibios_map_platform_irq(struct pci_dev *dev, u8 slot, u8 pin); -- cgit v0.10.2 From 0bb34a6bf1f71d5ad2abfda582a2c2794957bc7b Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 20 Apr 2009 16:38:00 +0900 Subject: sh: pci: Consolidate pci_iomap() and use the generic I/O base. This consolidates the pci_iomap() definitions and reworks how the I/O port base is handled. PCI channels can register their own I/O map base, or if none is provided, the system-wide generic I/O base is used instead. Functionally nothing changes, while this allows us to kill off lots of I/O address special casing and lookups. Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/pci-lib.c b/arch/sh/drivers/pci/pci-lib.c index 8ab1a2d..654ffcc 100644 --- a/arch/sh/drivers/pci/pci-lib.c +++ b/arch/sh/drivers/pci/pci-lib.c @@ -60,6 +60,57 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, vma->vm_page_prot); } +static void __iomem *ioport_map_pci(struct pci_dev *dev, + unsigned long port, unsigned int nr) +{ + struct pci_channel *chan = dev->sysdata; + + if (!chan->io_map_base) + chan->io_map_base = generic_io_base; + + return (void __iomem *)(chan->io_map_base + port); +} + +void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) +{ + resource_size_t start = pci_resource_start(dev, bar); + resource_size_t len = pci_resource_len(dev, bar); + unsigned long flags = pci_resource_flags(dev, bar); + + if (unlikely(!len || !start)) + return NULL; + if (maxlen && len > maxlen) + len = maxlen; + + if (flags & IORESOURCE_IO) + return ioport_map_pci(dev, start, len); + + /* + * Presently the IORESOURCE_MEM case is a bit special, most + * SH7751 style PCI controllers have PCI memory at a fixed + * location in the address space where no remapping is desired. + * With the IORESOURCE_MEM case more care has to be taken + * to inhibit page table mapping for legacy cores, but this is + * punted off to __ioremap(). + * -- PFM. + */ + if (flags & IORESOURCE_MEM) { + if (flags & IORESOURCE_CACHEABLE) + return ioremap(start, len); + + return ioremap_nocache(start, len); + } + + return NULL; +} +EXPORT_SYMBOL(pci_iomap); + +void pci_iounmap(struct pci_dev *dev, void __iomem *addr) +{ + iounmap(addr); +} +EXPORT_SYMBOL(pci_iounmap); + #ifdef CONFIG_HOTPLUG EXPORT_SYMBOL(pcibios_resource_to_bus); EXPORT_SYMBOL(pcibios_bus_to_resource); diff --git a/arch/sh/drivers/pci/pci-new.c b/arch/sh/drivers/pci/pci-new.c index 4e9251f..c92e650 100644 --- a/arch/sh/drivers/pci/pci-new.c +++ b/arch/sh/drivers/pci/pci-new.c @@ -187,39 +187,4 @@ void __init pcibios_update_irq(struct pci_dev *dev, int irq) pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); } -void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) -{ - resource_size_t start = pci_resource_start(dev, bar); - resource_size_t len = pci_resource_len(dev, bar); - unsigned long flags = pci_resource_flags(dev, bar); - - if (unlikely(!len || !start)) - return NULL; - if (maxlen && len > maxlen) - len = maxlen; - - /* - * Presently the IORESOURCE_MEM case is a bit special, most - * SH7751 style PCI controllers have PCI memory at a fixed - * location in the address space where no remapping is desired. - * With the IORESOURCE_MEM case more care has to be taken - * to inhibit page table mapping for legacy cores, but this is - * punted off to __ioremap(). - * -- PFM. - */ - if (flags & IORESOURCE_IO) - return ioport_map(start, len); - if (flags & IORESOURCE_MEM) - return ioremap(start, len); - - return NULL; -} -EXPORT_SYMBOL(pci_iomap); - -void pci_iounmap(struct pci_dev *dev, void __iomem *addr) -{ - iounmap(addr); -} -EXPORT_SYMBOL(pci_iounmap); - EXPORT_SYMBOL(board_pci_channels); diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c index f670988..d39f240 100644 --- a/arch/sh/drivers/pci/pci.c +++ b/arch/sh/drivers/pci/pci.c @@ -144,39 +144,4 @@ void __init pcibios_update_irq(struct pci_dev *dev, int irq) pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); } -void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) -{ - resource_size_t start = pci_resource_start(dev, bar); - resource_size_t len = pci_resource_len(dev, bar); - unsigned long flags = pci_resource_flags(dev, bar); - - if (unlikely(!len || !start)) - return NULL; - if (maxlen && len > maxlen) - len = maxlen; - - /* - * Presently the IORESOURCE_MEM case is a bit special, most - * SH7751 style PCI controllers have PCI memory at a fixed - * location in the address space where no remapping is desired. - * With the IORESOURCE_MEM case more care has to be taken - * to inhibit page table mapping for legacy cores, but this is - * punted off to __ioremap(). - * -- PFM. - */ - if (flags & IORESOURCE_IO) - return ioport_map(start, len); - if (flags & IORESOURCE_MEM) - return ioremap(start, len); - - return NULL; -} -EXPORT_SYMBOL(pci_iomap); - -void pci_iounmap(struct pci_dev *dev, void __iomem *addr) -{ - iounmap(addr); -} -EXPORT_SYMBOL(pci_iounmap); - EXPORT_SYMBOL(board_pci_channels); diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h index e8265fd..5324282 100644 --- a/arch/sh/include/asm/pci.h +++ b/arch/sh/include/asm/pci.h @@ -26,6 +26,8 @@ struct pci_channel { int enabled; unsigned long reg_base; unsigned long io_base; + + unsigned long io_map_base; }; /* @@ -110,31 +112,11 @@ static inline int __is_pci_memory(unsigned long phys_addr, unsigned long size) } return 0; } - -static inline void __iomem *__get_pci_io_base(unsigned long port, - unsigned long size) -{ - struct pci_channel *p; - struct resource *res; - - for (p = board_pci_channels; p->init; p++) { - res = p->io_resource; - if (p->enabled && (port >= res->start) && - (port + size) <= (res->end + 1)) - return (void __iomem *)(p->io_base + port); - } - return NULL; -} #else static inline int __is_pci_memory(unsigned long phys_addr, unsigned long size) { return 0; } -static inline void __iomem *__get_pci_io_base(unsigned long port, - unsigned long size) -{ - return NULL; -} #endif /* Board-specific fixup routines. */ diff --git a/arch/sh/kernel/io.c b/arch/sh/kernel/io.c index 59fb020..4f85fff 100644 --- a/arch/sh/kernel/io.c +++ b/arch/sh/kernel/io.c @@ -70,10 +70,6 @@ void __iomem *ioport_map(unsigned long port, unsigned int nr) if (ret) return ret; - ret = __get_pci_io_base(port, nr); - if (ret) - return ret; - return __ioport_map(port, nr); } EXPORT_SYMBOL(ioport_map); -- cgit v0.10.2 From 67667263674663767ddf4250bab2437a00ee780e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 20 Apr 2009 10:49:25 +0200 Subject: ALSA: hda - Fix channels_max setting for CA0110 Added the missing definition of max channels for CA0110, which resulted in an error at opening PCM devices. Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/patch_ca0110.c b/sound/pci/hda/patch_ca0110.c index 7ec41da..9398d92 100644 --- a/sound/pci/hda/patch_ca0110.c +++ b/sound/pci/hda/patch_ca0110.c @@ -309,7 +309,7 @@ static int ca0110_build_pcms(struct hda_codec *codec) info->stream[SNDRV_PCM_STREAM_PLAYBACK] = ca0110_pcm_analog_playback; info->stream[SNDRV_PCM_STREAM_PLAYBACK].nid = spec->dacs[0]; info->stream[SNDRV_PCM_STREAM_PLAYBACK].channels_max = - spec->multiout.num_dacs * 2; + spec->multiout.max_channels; info->stream[SNDRV_PCM_STREAM_CAPTURE] = ca0110_pcm_analog_capture; info->stream[SNDRV_PCM_STREAM_CAPTURE].substreams = spec->num_inputs; info->stream[SNDRV_PCM_STREAM_CAPTURE].nid = spec->adcs[0]; @@ -418,6 +418,7 @@ static void parse_line_outs(struct hda_codec *codec) } spec->multiout.dac_nids = spec->dacs; spec->multiout.num_dacs = n; + spec->multiout.max_channels = n * 2; } static void parse_hp_out(struct hda_codec *codec) -- cgit v0.10.2 From 7670dc41b51983b369f9adfb8694a580e7b0cef2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 20 Apr 2009 10:51:11 +0200 Subject: ALSA: hda - Use snd_hda_codec_get_pincfg() in patch_ca0110.c Use the new function to reduce the access and allow the user setup via sysfs, too. Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/patch_ca0110.c b/sound/pci/hda/patch_ca0110.c index 9398d92..392d108 100644 --- a/sound/pci/hda/patch_ca0110.c +++ b/sound/pci/hda/patch_ca0110.c @@ -408,8 +408,7 @@ static void parse_line_outs(struct hda_codec *codec) n = 0; for (i = 0; i < cfg->line_outs; i++) { nid = cfg->line_out_pins[i]; - def_conf = snd_hda_codec_read(codec, nid, 0, - AC_VERB_GET_CONFIG_DEFAULT, 0); + def_conf = snd_hda_codec_get_pincfg(codec, nid); if (!def_conf) continue; /* invalid pin */ if (snd_hda_get_connections(codec, nid, &spec->dacs[i], 1) != 1) @@ -432,8 +431,7 @@ static void parse_hp_out(struct hda_codec *codec) if (!cfg->hp_outs) return; nid = cfg->hp_pins[0]; - def_conf = snd_hda_codec_read(codec, nid, 0, - AC_VERB_GET_CONFIG_DEFAULT, 0); + def_conf = snd_hda_codec_get_pincfg(codec, nid); if (!def_conf) { cfg->hp_outs = 0; return; -- cgit v0.10.2 From f3b9aae16219aaeca2dd5a9ca69f7a10faa063df Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 19 Apr 2009 23:39:33 +0200 Subject: tracing/ring-buffer: Add unlock recursion protection on discard The pair of helpers trace_recursive_lock() and trace_recursive_unlock() have been introduced recently to provide generic tracing recursion protection. They are used in a symetric way: - trace_recursive_lock() on buffer reserve - trace_recursive_unlock() on buffer commit However sometimes, we don't commit but discard on entry to the buffer, ie: in case of filter checking. Then we must also unlock the recursion protection on discard time, otherwise the tracing gets definitely deactivated and a warning is raised spuriously, such as: 111.119821] ------------[ cut here ]------------ [ 111.119829] WARNING: at kernel/trace/ring_buffer.c:1498 ring_buffer_lock_reserve+0x1b7/0x1d0() [ 111.119835] Hardware name: AMILO Li 2727 [ 111.119839] Modules linked in: [ 111.119846] Pid: 5731, comm: Xorg Tainted: G W 2.6.30-rc1 #69 [ 111.119851] Call Trace: [ 111.119863] [] warn_slowpath+0xd8/0x130 [ 111.119873] [] ? __lock_acquire+0x19f/0x1ae0 [ 111.119882] [] ? __lock_acquire+0x19f/0x1ae0 [ 111.119891] [] ? native_sched_clock+0x20/0x70 [ 111.119899] [] ? put_lock_stats+0xe/0x30 [ 111.119906] [] ? lock_release_holdtime+0xa8/0x150 [ 111.119913] [] ring_buffer_lock_reserve+0x1b7/0x1d0 [ 111.119921] [] trace_buffer_lock_reserve+0x30/0x70 [ 111.119930] [] trace_current_buffer_lock_reserve+0x20/0x30 [ 111.119939] [] ftrace_raw_event_sched_switch+0x58/0x100 [ 111.119948] [] __schedule+0x3a7/0x4cd [ 111.119957] [] ? ftrace_call+0x5/0x2b [ 111.119964] [] ? ftrace_call+0x5/0x2b [ 111.119971] [] schedule+0x18/0x40 [ 111.119977] [] preempt_schedule+0x39/0x60 [ 111.119985] [] _read_unlock+0x53/0x60 [ 111.119993] [] sock_def_readable+0x72/0x80 [ 111.120002] [] unix_stream_sendmsg+0x24d/0x3d0 [ 111.120011] [] sock_aio_write+0x143/0x160 [ 111.120019] [] ? ftrace_call+0x5/0x2b [ 111.120026] [] ? sock_aio_write+0x0/0x160 [ 111.120033] [] ? sock_aio_write+0x0/0x160 [ 111.120042] [] do_sync_readv_writev+0xf3/0x140 [ 111.120049] [] ? ftrace_call+0x5/0x2b [ 111.120057] [] ? autoremove_wake_function+0x0/0x40 [ 111.120067] [] ? cap_file_permission+0x9/0x10 [ 111.120074] [] ? security_file_permission+0x16/0x20 [ 111.120082] [] do_readv_writev+0xd4/0x1f0 [ 111.120089] [] ? ftrace_call+0x5/0x2b [ 111.120097] [] ? ftrace_call+0x5/0x2b [ 111.120105] [] vfs_writev+0x48/0x70 [ 111.120111] [] sys_writev+0x55/0xc0 [ 111.120119] [] system_call_fastpath+0x16/0x1b [ 111.120125] ---[ end trace 15605f4e98d5ccb5 ]--- [ Impact: fix spurious warning triggering tracing shutdown ] Signed-off-by: Frederic Weisbecker diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index bffde63..e145969 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1642,6 +1642,14 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer, } EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit); +static inline void rb_event_discard(struct ring_buffer_event *event) +{ + event->type = RINGBUF_TYPE_PADDING; + /* time delta must be non zero */ + if (!event->time_delta) + event->time_delta = 1; +} + /** * ring_buffer_event_discard - discard any event in the ring buffer * @event: the event to discard @@ -1656,10 +1664,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit); */ void ring_buffer_event_discard(struct ring_buffer_event *event) { - event->type = RINGBUF_TYPE_PADDING; - /* time delta must be non zero */ - if (!event->time_delta) - event->time_delta = 1; + rb_event_discard(event); + trace_recursive_unlock(); } EXPORT_SYMBOL_GPL(ring_buffer_event_discard); @@ -1690,7 +1696,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer, int cpu; /* The event is discarded regardless */ - ring_buffer_event_discard(event); + rb_event_discard(event); /* * This must only be called if the event has not been @@ -1735,6 +1741,8 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer, if (rb_is_commit(cpu_buffer, event)) rb_set_commit_to_write(cpu_buffer); + trace_recursive_unlock(); + /* * Only the last preempt count needs to restore preemption. */ -- cgit v0.10.2 From 99f95f117848088f2708b45c70be73152e78bb8a Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 20 Apr 2009 18:24:57 +0900 Subject: sh: pci: Rework fixed region checks in ioremap(). Not all PCI channels have non-translatable memory windows, this is a special property of the on-chip PCIC with its 0xfd00... mapping, handle this explicitly. Signed-off-by: Paul Mundt diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h index 5324282..82a9369 100644 --- a/arch/sh/include/asm/pci.h +++ b/arch/sh/include/asm/pci.h @@ -90,7 +90,6 @@ static inline void pcibios_penalize_isa_irq(int irq, int active) #define pci_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0) #endif -#ifdef CONFIG_PCI static inline void pci_dma_burst_advice(struct pci_dev *pdev, enum pci_dma_burst_strategy *strat, unsigned long *strategy_parameter) @@ -99,24 +98,18 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev, *strategy_parameter = ~0UL; } -static inline int __is_pci_memory(unsigned long phys_addr, unsigned long size) -{ - struct pci_channel *p; - struct resource *res; - - for (p = board_pci_channels; p->init; p++) { - res = p->mem_resource; - if (p->enabled && (phys_addr >= res->start) && - (phys_addr + size) <= (res->end + 1)) - return 1; - } - return 0; -} +#ifdef CONFIG_SUPERH32 +/* + * If we're on an SH7751 or SH7780 PCI controller, PCI memory is mapped + * at the end of the address space in a special non-translatable area. + */ +#define PCI_MEM_FIXED_START 0xfd000000 +#define PCI_MEM_FIXED_END (PCI_MEM_FIXED_START + 0x01000000) + +#define is_pci_memory_fixed_range(s, e) \ + ((s) >= PCI_MEM_FIXED_START && (e) < PCI_MEM_FIXED_END) #else -static inline int __is_pci_memory(unsigned long phys_addr, unsigned long size) -{ - return 0; -} +#define is_pci_memory_fixed_range(s, e) (0) #endif /* Board-specific fixup routines. */ diff --git a/arch/sh/mm/ioremap_32.c b/arch/sh/mm/ioremap_32.c index 7e04cc8..da2f418 100644 --- a/arch/sh/mm/ioremap_32.c +++ b/arch/sh/mm/ioremap_32.c @@ -46,17 +46,15 @@ void __iomem *__ioremap(unsigned long phys_addr, unsigned long size, return NULL; /* - * If we're on an SH7751 or SH7780 PCI controller, PCI memory is - * mapped at the end of the address space (typically 0xfd000000) - * in a non-translatable area, so mapping through page tables for - * this area is not only pointless, but also fundamentally - * broken. Just return the physical address instead. + * If we're in the fixed PCI memory range, mapping through page + * tables is not only pointless, but also fundamentally broken. + * Just return the physical address instead. * * For boards that map a small PCI memory aperture somewhere in * P1/P2 space, ioremap() will already do the right thing, * and we'll never get this far. */ - if (__is_pci_memory(phys_addr, size)) + if (is_pci_memory_fixed_range(phys_addr, size)) return (void __iomem *)phys_addr; #if !defined(CONFIG_PMB_FIXED) @@ -121,7 +119,9 @@ void __iounmap(void __iomem *addr) unsigned long seg = PXSEG(vaddr); struct vm_struct *p; - if (seg < P3SEG || vaddr >= P3_ADDR_MAX || __is_pci_memory(vaddr, 0)) + if (seg < P3SEG || vaddr >= P3_ADDR_MAX) + return; + if (is_pci_memory_fixed_range(vaddr, 0)) return; #ifdef CONFIG_PMB -- cgit v0.10.2 From e79066a659b893debe19010179d3f3f015d76d1c Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 20 Apr 2009 18:29:22 +0900 Subject: sh: pci: New-style controller registration. This moves off of the board_pci_channels[] approach for bus registration and over to a cleaner register_pci_controller(), all derived from the MIPS code. Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/pci-new.c b/arch/sh/drivers/pci/pci-new.c index c92e650..78b7292 100644 --- a/arch/sh/drivers/pci/pci-new.c +++ b/arch/sh/drivers/pci/pci-new.c @@ -13,40 +13,90 @@ #include #include #include +#include -static int __init pcibios_init(void) +/* + * The PCI controller list. + */ +static struct pci_channel *hose_head, **hose_tail = &hose_head; + +static int pci_initialized; + +static void __devinit pcibios_scanbus(struct pci_channel *hose) { - struct pci_channel *p; + static int next_busno; struct pci_bus *bus; - int busno; - - /* init channels */ - busno = 0; - for (p = board_pci_channels; p->init; p++) { - if (p->init(p) == 0) - p->enabled = 1; - else - pr_err("Unable to init pci channel %d\n", busno); - busno++; + + /* Catch botched conversion attempts */ + BUG_ON(hose->init); + + bus = pci_scan_bus(next_busno, hose->pci_ops, hose); + if (bus) { + next_busno = bus->subordinate + 1; + /* Don't allow 8-bit bus number overflow inside the hose - + reserve some space for bridges. */ + if (next_busno > 224) + next_busno = 0; + + pci_bus_size_bridges(bus); + pci_bus_assign_resources(bus); + pci_enable_bridges(bus); } +} - /* scan the buses */ - busno = 0; - for (p = board_pci_channels; p->init; p++) { - if (p->enabled) { - bus = pci_scan_bus(busno, p->pci_ops, p); - busno = bus->subordinate + 1; +static DEFINE_MUTEX(pci_scan_mutex); - pci_bus_size_bridges(bus); - pci_bus_assign_resources(bus); - pci_enable_bridges(bus); - } +void __devinit register_pci_controller(struct pci_channel *hose) +{ + if (request_resource(&iomem_resource, hose->mem_resource) < 0) + goto out; + if (request_resource(&ioport_resource, hose->io_resource) < 0) { + release_resource(hose->mem_resource); + goto out; + } + + *hose_tail = hose; + hose_tail = &hose->next; + + /* + * Do not panic here but later - this might hapen before console init. + */ + if (!hose->io_map_base) { + printk(KERN_WARNING + "registering PCI controller with io_map_base unset\n"); + } + + /* + * Scan the bus if it is register after the PCI subsystem + * initialization. + */ + if (pci_initialized) { + mutex_lock(&pci_scan_mutex); + pcibios_scanbus(hose); + mutex_unlock(&pci_scan_mutex); } + return; + +out: + printk(KERN_WARNING + "Skipping PCI bus scan due to resource conflict\n"); +} + +static int __init pcibios_init(void) +{ + struct pci_channel *hose; + + /* Scan all of the recorded PCI controllers. */ + for (hose = hose_head; hose; hose = hose->next) + pcibios_scanbus(hose); + pci_fixup_irqs(pci_common_swizzle, pcibios_map_platform_irq); dma_debug_add_bus(&pci_bus_type); + pci_initialized = 1; + return 0; } subsys_initcall(pcibios_init); @@ -74,7 +124,6 @@ static void pcibios_fixup_device_resources(struct pci_dev *dev, } } - /* * Called after each bus is probed, but before its children * are examined. @@ -186,5 +235,3 @@ void __init pcibios_update_irq(struct pci_dev *dev, int irq) { pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); } - -EXPORT_SYMBOL(board_pci_channels); diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c index f02d9df..4dd6e3b 100644 --- a/arch/sh/drivers/pci/pci-sh7780.c +++ b/arch/sh/drivers/pci/pci-sh7780.c @@ -15,11 +15,47 @@ #include #include "pci-sh4.h" -static int __init sh7780_pci_init(struct pci_channel *chan) +extern u8 pci_cache_line_size; + +static struct resource sh7785_io_resource = { + .name = "SH7785_IO", + .start = SH7780_PCI_IO_BASE, + .end = SH7780_PCI_IO_BASE + SH7780_PCI_IO_SIZE - 1, + .flags = IORESOURCE_IO +}; + +static struct resource sh7785_mem_resource = { + .name = "SH7785_mem", + .start = SH7780_PCI_MEMORY_BASE, + .end = SH7780_PCI_MEMORY_BASE + SH7780_PCI_MEM_SIZE - 1, + .flags = IORESOURCE_MEM +}; + +static struct pci_channel sh7780_pci_controller = { + .pci_ops = &sh4_pci_ops, + .mem_resource = &sh7785_mem_resource, + .io_resource = &sh7785_io_resource, +}; + +static struct sh4_pci_address_map sh7780_pci_map = { + .window0 = { +#if defined(CONFIG_32BIT) + .base = SH7780_32BIT_DDR_BASE_ADDR, + .size = 0x40000000, +#else + .base = SH7780_CS0_BASE_ADDR, + .size = 0x20000000, +#endif + }, +}; + +static int __init sh7780_pci_init(void) { + struct pci_channel *chan = &sh7780_pci_controller; unsigned int id; const char *type = NULL; int ret; + u32 word; printk(KERN_NOTICE "PCI: Starting intialization.\n"); @@ -55,52 +91,6 @@ static int __init sh7780_pci_init(struct pci_channel *chan) return ret; /* - * Platform specific initialization (BSC registers, and memory space - * mapping) will be called via the platform defined function - * pcibios_init_platform(). - */ - return pcibios_init_platform(); -} - -extern u8 pci_cache_line_size; - -static struct resource sh7785_io_resource = { - .name = "SH7785_IO", - .start = SH7780_PCI_IO_BASE, - .end = SH7780_PCI_IO_BASE + SH7780_PCI_IO_SIZE - 1, - .flags = IORESOURCE_IO -}; - -static struct resource sh7785_mem_resource = { - .name = "SH7785_mem", - .start = SH7780_PCI_MEMORY_BASE, - .end = SH7780_PCI_MEMORY_BASE + SH7780_PCI_MEM_SIZE - 1, - .flags = IORESOURCE_MEM -}; - -struct pci_channel board_pci_channels[] = { - { sh7780_pci_init, &sh4_pci_ops, &sh7785_io_resource, &sh7785_mem_resource, 0, 0xff }, - { NULL, NULL, NULL, 0, 0 }, -}; - -static struct sh4_pci_address_map sh7780_pci_map = { - .window0 = { -#if defined(CONFIG_32BIT) - .base = SH7780_32BIT_DDR_BASE_ADDR, - .size = 0x40000000, -#else - .base = SH7780_CS0_BASE_ADDR, - .size = 0x20000000, -#endif - }, -}; - -int __init pcibios_init_platform(void) -{ - struct pci_channel *chan = &board_pci_channels[0]; - u32 word; - - /* * Set the class and sub-class codes. */ __raw_writeb(PCI_CLASS_BRIDGE_HOST >> 8, @@ -153,5 +143,8 @@ int __init pcibios_init_platform(void) __set_io_port_base(SH7780_PCI_IO_BASE); + register_pci_controller(chan); + return 0; } +arch_initcall(sh7780_pci_init); diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h index 82a9369..e057ebd 100644 --- a/arch/sh/include/asm/pci.h +++ b/arch/sh/include/asm/pci.h @@ -17,17 +17,22 @@ * external) PCI controllers. */ struct pci_channel { - int (*init)(struct pci_channel *chan); - struct pci_ops *pci_ops; - struct resource *io_resource; - struct resource *mem_resource; - int first_devfn; - int last_devfn; - int enabled; - unsigned long reg_base; - unsigned long io_base; - - unsigned long io_map_base; + struct pci_channel *next; + + int (*init)(struct pci_channel *chan); + + struct pci_ops *pci_ops; + struct resource *io_resource; + struct resource *mem_resource; + + int first_devfn; + int last_devfn; + int enabled; + + unsigned long reg_base; + unsigned long io_base; + + unsigned long io_map_base; }; /* @@ -35,6 +40,8 @@ struct pci_channel { */ extern struct pci_channel board_pci_channels[]; +extern void register_pci_controller(struct pci_channel *hose); + extern unsigned long PCIBIOS_MIN_IO, PCIBIOS_MIN_MEM; struct pci_dev; -- cgit v0.10.2 From 09cfeb133e3cac39b8b9a2cb1d8ab4f77e396248 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 20 Apr 2009 18:42:00 +0900 Subject: sh: pci: Track io and mem_offset per-channel. This implements a per-hose offset for I/O and mem resources. Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/pci-new.c b/arch/sh/drivers/pci/pci-new.c index 78b7292..e8ac8da 100644 --- a/arch/sh/drivers/pci/pci-new.c +++ b/arch/sh/drivers/pci/pci-new.c @@ -105,7 +105,7 @@ static void pcibios_fixup_device_resources(struct pci_dev *dev, struct pci_bus *bus) { /* Update device resources. */ - struct pci_channel *chan = bus->sysdata; + struct pci_channel *hose = bus->sysdata; unsigned long offset = 0; int i; @@ -115,9 +115,9 @@ static void pcibios_fixup_device_resources(struct pci_dev *dev, if (dev->resource[i].flags & IORESOURCE_PCI_FIXED) continue; if (dev->resource[i].flags & IORESOURCE_IO) - offset = chan->io_base; + offset = hose->io_offset; else if (dev->resource[i].flags & IORESOURCE_MEM) - offset = 0; + offset = hose->mem_offset; dev->resource[i].start += offset; dev->resource[i].end += offset; @@ -150,13 +150,13 @@ void __devinit __weak pcibios_fixup_bus(struct pci_bus *bus) void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, struct resource *res) { - struct pci_channel *chan = dev->sysdata; + struct pci_channel *hose = dev->sysdata; unsigned long offset = 0; if (res->flags & IORESOURCE_IO) - offset = chan->io_base; + offset = hose->io_offset; else if (res->flags & IORESOURCE_MEM) - offset = 0; + offset = hose->mem_offset; region->start = res->start - offset; region->end = res->end - offset; @@ -166,13 +166,13 @@ void __devinit pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, struct pci_bus_region *region) { - struct pci_channel *chan = dev->sysdata; + struct pci_channel *hose = dev->sysdata; unsigned long offset = 0; if (res->flags & IORESOURCE_IO) - offset = chan->io_base; + offset = hose->io_offset; else if (res->flags & IORESOURCE_MEM) - offset = 0; + offset = hose->mem_offset; res->start = region->start + offset; res->end = region->end + offset; diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c index 4dd6e3b..57a3b87 100644 --- a/arch/sh/drivers/pci/pci-sh7780.c +++ b/arch/sh/drivers/pci/pci-sh7780.c @@ -34,7 +34,9 @@ static struct resource sh7785_mem_resource = { static struct pci_channel sh7780_pci_controller = { .pci_ops = &sh4_pci_ops, .mem_resource = &sh7785_mem_resource, + .mem_offset = 0x00000000, .io_resource = &sh7785_io_resource, + .io_offset = 0x00000000, }; static struct sh4_pci_address_map sh7780_pci_map = { diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h index e057ebd..0be2052 100644 --- a/arch/sh/include/asm/pci.h +++ b/arch/sh/include/asm/pci.h @@ -25,6 +25,9 @@ struct pci_channel { struct resource *io_resource; struct resource *mem_resource; + unsigned long io_offset; + unsigned long mem_offset; + int first_devfn; int last_devfn; int enabled; -- cgit v0.10.2 From 5160d3f782a5e0cdb3bdaa8a891a1fb9d9ab83ec Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 20 Apr 2009 18:47:21 +0900 Subject: sh: pci: Consolidate bus<->resource mapping in pci-lib. Now that the io and mem offsets are tracked accordingly, the pci-new version of the bus<->resource mappers can be used generically. This moves them in to pci-lib. Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/pci-lib.c b/arch/sh/drivers/pci/pci-lib.c index 654ffcc..9fd3af9 100644 --- a/arch/sh/drivers/pci/pci-lib.c +++ b/arch/sh/drivers/pci/pci-lib.c @@ -39,6 +39,37 @@ void pcibios_align_resource(void *data, struct resource *res, res->start = start; } +void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, + struct resource *res) +{ + struct pci_channel *hose = dev->sysdata; + unsigned long offset = 0; + + if (res->flags & IORESOURCE_IO) + offset = hose->io_offset; + else if (res->flags & IORESOURCE_MEM) + offset = hose->mem_offset; + + region->start = res->start - offset; + region->end = res->end - offset; +} + +void __devinit +pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, + struct pci_bus_region *region) +{ + struct pci_channel *hose = dev->sysdata; + unsigned long offset = 0; + + if (res->flags & IORESOURCE_IO) + offset = hose->io_offset; + else if (res->flags & IORESOURCE_MEM) + offset = hose->mem_offset; + + res->start = region->start + offset; + res->end = region->end + offset; +} + int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, enum pci_mmap_state mmap_state, int write_combine) { diff --git a/arch/sh/drivers/pci/pci-new.c b/arch/sh/drivers/pci/pci-new.c index e8ac8da..9d42614 100644 --- a/arch/sh/drivers/pci/pci-new.c +++ b/arch/sh/drivers/pci/pci-new.c @@ -147,37 +147,6 @@ void __devinit __weak pcibios_fixup_bus(struct pci_bus *bus) } } -void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, - struct resource *res) -{ - struct pci_channel *hose = dev->sysdata; - unsigned long offset = 0; - - if (res->flags & IORESOURCE_IO) - offset = hose->io_offset; - else if (res->flags & IORESOURCE_MEM) - offset = hose->mem_offset; - - region->start = res->start - offset; - region->end = res->end - offset; -} - -void __devinit -pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, - struct pci_bus_region *region) -{ - struct pci_channel *hose = dev->sysdata; - unsigned long offset = 0; - - if (res->flags & IORESOURCE_IO) - offset = hose->io_offset; - else if (res->flags & IORESOURCE_MEM) - offset = hose->mem_offset; - - res->start = region->start + offset; - res->end = region->end + offset; -} - int pcibios_enable_device(struct pci_dev *dev, int mask) { u16 cmd, old_cmd; diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c index d39f240..c15a6f0 100644 --- a/arch/sh/drivers/pci/pci.c +++ b/arch/sh/drivers/pci/pci.c @@ -72,20 +72,6 @@ void __devinit __weak pcibios_fixup_bus(struct pci_bus *bus) pci_read_bridge_bases(bus); } -void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, - struct resource *res) -{ - region->start = res->start; - region->end = res->end; -} - -void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, - struct pci_bus_region *region) -{ - res->start = region->start; - res->end = region->end; -} - int pcibios_enable_device(struct pci_dev *dev, int mask) { u16 cmd, old_cmd; -- cgit v0.10.2 From 3f8daeacd7ed7a502daf0998e2515cea4f467f21 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 20 Apr 2009 18:53:41 +0900 Subject: sh: pci: Consolidate the remaining common bits. This moves the remaining common bits in to pci-lib. Thereby reducing pci.c/pci-new.c to simple bus fixups and controller registration. As more platforms are moved over, the old code will disappear completely and the pci-new bits will be rolled in to pci-lib, eventually replacing pci.c completely. Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/pci-lib.c b/arch/sh/drivers/pci/pci-lib.c index 9fd3af9..ea83629 100644 --- a/arch/sh/drivers/pci/pci-lib.c +++ b/arch/sh/drivers/pci/pci-lib.c @@ -70,6 +70,70 @@ pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, res->end = region->end + offset; } +int pcibios_enable_device(struct pci_dev *dev, int mask) +{ + u16 cmd, old_cmd; + int idx; + struct resource *r; + + pci_read_config_word(dev, PCI_COMMAND, &cmd); + old_cmd = cmd; + for (idx=0; idx < PCI_NUM_RESOURCES; idx++) { + /* Only set up the requested stuff */ + if (!(mask & (1<resource[idx]; + if (!(r->flags & (IORESOURCE_IO | IORESOURCE_MEM))) + continue; + if ((idx == PCI_ROM_RESOURCE) && + (!(r->flags & IORESOURCE_ROM_ENABLE))) + continue; + if (!r->start && r->end) { + printk(KERN_ERR "PCI: Device %s not available " + "because of resource collisions\n", + pci_name(dev)); + return -EINVAL; + } + if (r->flags & IORESOURCE_IO) + cmd |= PCI_COMMAND_IO; + if (r->flags & IORESOURCE_MEM) + cmd |= PCI_COMMAND_MEMORY; + } + if (cmd != old_cmd) { + printk("PCI: Enabling device %s (%04x -> %04x)\n", + pci_name(dev), old_cmd, cmd); + pci_write_config_word(dev, PCI_COMMAND, cmd); + } + return 0; +} + +/* + * If we set up a device for bus mastering, we need to check and set + * the latency timer as it may not be properly set. + */ +static unsigned int pcibios_max_latency = 255; + +void pcibios_set_master(struct pci_dev *dev) +{ + u8 lat; + pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat); + if (lat < 16) + lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency; + else if (lat > pcibios_max_latency) + lat = pcibios_max_latency; + else + return; + printk(KERN_INFO "PCI: Setting latency timer of device %s to %d\n", + pci_name(dev), lat); + pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); +} + +void __init pcibios_update_irq(struct pci_dev *dev, int irq) +{ + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); +} + int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, enum pci_mmap_state mmap_state, int write_combine) { diff --git a/arch/sh/drivers/pci/pci-new.c b/arch/sh/drivers/pci/pci-new.c index 9d42614..8c0b136 100644 --- a/arch/sh/drivers/pci/pci-new.c +++ b/arch/sh/drivers/pci/pci-new.c @@ -146,61 +146,3 @@ void __devinit __weak pcibios_fixup_bus(struct pci_bus *bus) pcibios_fixup_device_resources(dev, bus); } } - -int pcibios_enable_device(struct pci_dev *dev, int mask) -{ - u16 cmd, old_cmd; - int idx; - struct resource *r; - - pci_read_config_word(dev, PCI_COMMAND, &cmd); - old_cmd = cmd; - for(idx=0; idx<6; idx++) { - if (!(mask & (1 << idx))) - continue; - r = &dev->resource[idx]; - if (!r->start && r->end) { - printk(KERN_ERR "PCI: Device %s not available because " - "of resource collisions\n", pci_name(dev)); - return -EINVAL; - } - if (r->flags & IORESOURCE_IO) - cmd |= PCI_COMMAND_IO; - if (r->flags & IORESOURCE_MEM) - cmd |= PCI_COMMAND_MEMORY; - } - if (dev->resource[PCI_ROM_RESOURCE].start) - cmd |= PCI_COMMAND_MEMORY; - if (cmd != old_cmd) { - printk(KERN_INFO "PCI: Enabling device %s (%04x -> %04x)\n", - pci_name(dev), old_cmd, cmd); - pci_write_config_word(dev, PCI_COMMAND, cmd); - } - return 0; -} - -/* - * If we set up a device for bus mastering, we need to check and set - * the latency timer as it may not be properly set. - */ -static unsigned int pcibios_max_latency = 255; - -void pcibios_set_master(struct pci_dev *dev) -{ - u8 lat; - pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat); - if (lat < 16) - lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency; - else if (lat > pcibios_max_latency) - lat = pcibios_max_latency; - else - return; - printk(KERN_INFO "PCI: Setting latency timer of device %s to %d\n", - pci_name(dev), lat); - pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); -} - -void __init pcibios_update_irq(struct pci_dev *dev, int irq) -{ - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); -} diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c index c15a6f0..8c332b2 100644 --- a/arch/sh/drivers/pci/pci.c +++ b/arch/sh/drivers/pci/pci.c @@ -72,62 +72,4 @@ void __devinit __weak pcibios_fixup_bus(struct pci_bus *bus) pci_read_bridge_bases(bus); } -int pcibios_enable_device(struct pci_dev *dev, int mask) -{ - u16 cmd, old_cmd; - int idx; - struct resource *r; - - pci_read_config_word(dev, PCI_COMMAND, &cmd); - old_cmd = cmd; - for(idx=0; idx<6; idx++) { - if (!(mask & (1 << idx))) - continue; - r = &dev->resource[idx]; - if (!r->start && r->end) { - printk(KERN_ERR "PCI: Device %s not available because " - "of resource collisions\n", pci_name(dev)); - return -EINVAL; - } - if (r->flags & IORESOURCE_IO) - cmd |= PCI_COMMAND_IO; - if (r->flags & IORESOURCE_MEM) - cmd |= PCI_COMMAND_MEMORY; - } - if (dev->resource[PCI_ROM_RESOURCE].start) - cmd |= PCI_COMMAND_MEMORY; - if (cmd != old_cmd) { - printk(KERN_INFO "PCI: Enabling device %s (%04x -> %04x)\n", - pci_name(dev), old_cmd, cmd); - pci_write_config_word(dev, PCI_COMMAND, cmd); - } - return 0; -} - -/* - * If we set up a device for bus mastering, we need to check and set - * the latency timer as it may not be properly set. - */ -static unsigned int pcibios_max_latency = 255; - -void pcibios_set_master(struct pci_dev *dev) -{ - u8 lat; - pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat); - if (lat < 16) - lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency; - else if (lat > pcibios_max_latency) - lat = pcibios_max_latency; - else - return; - printk(KERN_INFO "PCI: Setting latency timer of device %s to %d\n", - pci_name(dev), lat); - pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); -} - -void __init pcibios_update_irq(struct pci_dev *dev, int irq) -{ - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); -} - EXPORT_SYMBOL(board_pci_channels); -- cgit v0.10.2 From 5ba7205fc49ff72e88784c94fb661f93e7ae7d36 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 20 Apr 2009 19:00:32 +0900 Subject: sh: pci: Kill off the now unused hose->io_base. Nothing is using this any more, so kill it off. Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/pci-sh7751.c b/arch/sh/drivers/pci/pci-sh7751.c index af88744..4c08fd7 100644 --- a/arch/sh/drivers/pci/pci-sh7751.c +++ b/arch/sh/drivers/pci/pci-sh7751.c @@ -40,7 +40,6 @@ int __init sh7751_pci_init(struct pci_channel *chan) pr_debug("PCI: Starting intialization.\n"); chan->reg_base = 0xfe200000; - chan->io_base = 0xfe240000; /* check for SH7751/SH7751R hardware */ id = pci_read_reg(chan, SH7751_PCICONF0); @@ -136,13 +135,6 @@ int __init sh7751_pcic_init(struct pci_channel *chan, pr_debug("PCI: Setting upper bits of Memory window to 0x%x\n", word); pci_write_reg(chan, word , SH4_PCIMBR); - /* Map IO space into PCI IO window: - * IO addresses will be translated to the PCI IO window base address - */ - pr_debug("PCI: Mapping IO address 0x%x - 0x%x to base 0x%lx\n", - chan->io_resource->start, chan->io_resource->end, - chan->io_base + chan->io_resource->start); - /* Make sure the MSB's of IO window are set to access PCI space * correctly */ word = chan->io_resource->start & SH4_PCIIOBR_MASK; diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c index 57a3b87..ae13ff9 100644 --- a/arch/sh/drivers/pci/pci-sh7780.c +++ b/arch/sh/drivers/pci/pci-sh7780.c @@ -62,7 +62,6 @@ static int __init sh7780_pci_init(void) printk(KERN_NOTICE "PCI: Starting intialization.\n"); chan->reg_base = 0xfe040000; - chan->io_base = 0xfe200000; /* Enable CPU access to the PCIC registers. */ __raw_writel(PCIECR_ENBL, PCIECR); diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h index 0be2052..f36c789 100644 --- a/arch/sh/include/asm/pci.h +++ b/arch/sh/include/asm/pci.h @@ -33,7 +33,6 @@ struct pci_channel { int enabled; unsigned long reg_base; - unsigned long io_base; unsigned long io_map_base; }; -- cgit v0.10.2 From bb3396477bf46c7cae6bd04b969580277957966e Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 20 Apr 2009 19:26:45 +0900 Subject: sh: pci: Kill off superfluous lboxre2 pci fixups. This is a verbatim copy of the r2d one, use that instead. Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/Makefile b/arch/sh/drivers/pci/Makefile index c8eab14..4cac866 100644 --- a/arch/sh/drivers/pci/Makefile +++ b/arch/sh/drivers/pci/Makefile @@ -22,5 +22,5 @@ obj-$(CONFIG_SH_SDK7780) += fixups-sdk7780.o obj-$(CONFIG_SH_7780_SOLUTION_ENGINE) += fixups-sdk7780.o obj-$(CONFIG_SH_TITAN) += ops-titan.o obj-$(CONFIG_SH_LANDISK) += ops-landisk.o -obj-$(CONFIG_SH_LBOX_RE2) += ops-lboxre2.o fixups-lboxre2.o +obj-$(CONFIG_SH_LBOX_RE2) += ops-lboxre2.o fixups-rts7751r2d.o obj-$(CONFIG_SH_CAYMAN) += ops-cayman.o diff --git a/arch/sh/drivers/pci/fixups-lboxre2.c b/arch/sh/drivers/pci/fixups-lboxre2.c deleted file mode 100644 index a82011d..0000000 --- a/arch/sh/drivers/pci/fixups-lboxre2.c +++ /dev/null @@ -1,42 +0,0 @@ -/* - * arch/sh/drivers/pci/fixups-lboxre2.c - * - * L-BOX RE2 PCI fixups - * - * Copyright (C) 2007 Nobuhiro Iwamatsu - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ -#include -#include "pci-sh4.h" - -#define PCIMCR_MRSET_OFF 0xBFFFFFFF -#define PCIMCR_RFSH_OFF 0xFFFFFFFB - -int pci_fixup_pcic(struct pci_channel *chan) -{ - unsigned long bcr1, mcr; - - bcr1 = ctrl_inl(SH7751_BCR1); - bcr1 |= 0x40080000; /* Enable Bit 19 BREQEN, set PCIC to slave */ - pci_write_reg(chan, bcr1, SH4_PCIBCR1); - - /* Enable all interrupts, so we known what to fix */ - pci_write_reg(chan, 0x0000c3ff, SH4_PCIINTM); - pci_write_reg(chan, 0x0000380f, SH4_PCIAINTM); - pci_write_reg(chan, 0xfb900047, SH7751_PCICONF1); - pci_write_reg(chan, 0xab000001, SH7751_PCICONF4); - - mcr = ctrl_inl(SH7751_MCR); - mcr = (mcr & PCIMCR_MRSET_OFF) & PCIMCR_RFSH_OFF; - pci_write_reg(chan, mcr, SH4_PCIMCR); - - pci_write_reg(chan, 0x0c000000, SH7751_PCICONF5); - pci_write_reg(chan, 0xd0000000, SH7751_PCICONF6); - pci_write_reg(chan, 0x0c000000, SH4_PCILAR0); - pci_write_reg(chan, 0x00000000, SH4_PCILAR1); - - return 0; -} -- cgit v0.10.2 From d556fcc101c4b0d57ac9742ab806a6bfed78eac1 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 20 Apr 2009 19:31:20 +0900 Subject: sh: pci: Flag the dreamcast BBA as IORESOURCE_PCI_FIXED. This isn't a real BAR, so prevent any attempts to move it, as we don't wish to encourage a bus luck by overzealous PCI initialization code. Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/fixups-dreamcast.c b/arch/sh/drivers/pci/fixups-dreamcast.c index 2bf85cf..48c6381 100644 --- a/arch/sh/drivers/pci/fixups-dreamcast.c +++ b/arch/sh/drivers/pci/fixups-dreamcast.c @@ -41,6 +41,13 @@ static void __init gapspci_fixup_resources(struct pci_dev *dev) */ dev->resource[1].start = p->io_resource->start + 0x100; dev->resource[1].end = dev->resource[1].start + 0x200 - 1; + + /* + * This is not a normal BAR, prevent any attempts to move + * the BAR, as this will result in a bus lock. + */ + dev->resource[1].flags |= IORESOURCE_PCI_FIXED; + /* * Redirect dma memory allocations to special memory window. */ -- cgit v0.10.2 From c563bf0965c86cc6087b09c34ca063fe96d6deca Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 20 Apr 2009 19:39:57 +0900 Subject: sh: pci: Kill off dead references to is_pci_ioaddr and friends. Some old boards are still using this in their I/O routines, kill it off. Signed-off-by: Paul Mundt diff --git a/arch/sh/boards/mach-se/7751/io.c b/arch/sh/boards/mach-se/7751/io.c index 6287ae5..6e75bd4 100644 --- a/arch/sh/boards/mach-se/7751/io.c +++ b/arch/sh/boards/mach-se/7751/io.c @@ -34,8 +34,6 @@ unsigned char sh7751se_inb(unsigned long port) { if (PXSEG(port)) return *(volatile unsigned char *)port; - else if (is_pci_ioaddr(port)) - return *(volatile unsigned char *)pci_ioaddr(port); else return (*port2adr(port)) & 0xff; } @@ -46,8 +44,6 @@ unsigned char sh7751se_inb_p(unsigned long port) if (PXSEG(port)) v = *(volatile unsigned char *)port; - else if (is_pci_ioaddr(port)) - v = *(volatile unsigned char *)pci_ioaddr(port); else v = (*port2adr(port)) & 0xff; ctrl_delay(); @@ -58,8 +54,6 @@ unsigned short sh7751se_inw(unsigned long port) { if (PXSEG(port)) return *(volatile unsigned short *)port; - else if (is_pci_ioaddr(port)) - return *(volatile unsigned short *)pci_ioaddr(port); else if (port >= 0x2000) return *port2adr(port); else @@ -71,8 +65,6 @@ unsigned int sh7751se_inl(unsigned long port) { if (PXSEG(port)) return *(volatile unsigned long *)port; - else if (is_pci_ioaddr(port)) - return *(volatile unsigned int *)pci_ioaddr(port); else if (port >= 0x2000) return *port2adr(port); else @@ -85,8 +77,6 @@ void sh7751se_outb(unsigned char value, unsigned long port) if (PXSEG(port)) *(volatile unsigned char *)port = value; - else if (is_pci_ioaddr(port)) - *((unsigned char*)pci_ioaddr(port)) = value; else *(port2adr(port)) = value; } @@ -95,8 +85,6 @@ void sh7751se_outb_p(unsigned char value, unsigned long port) { if (PXSEG(port)) *(volatile unsigned char *)port = value; - else if (is_pci_ioaddr(port)) - *((unsigned char*)pci_ioaddr(port)) = value; else *(port2adr(port)) = value; ctrl_delay(); @@ -106,8 +94,6 @@ void sh7751se_outw(unsigned short value, unsigned long port) { if (PXSEG(port)) *(volatile unsigned short *)port = value; - else if (is_pci_ioaddr(port)) - *((unsigned short *)pci_ioaddr(port)) = value; else if (port >= 0x2000) *port2adr(port) = value; else @@ -118,8 +104,6 @@ void sh7751se_outl(unsigned int value, unsigned long port) { if (PXSEG(port)) *(volatile unsigned long *)port = value; - else if (is_pci_ioaddr(port)) - *((unsigned long*)pci_ioaddr(port)) = value; else maybebadio(port); } diff --git a/arch/sh/boards/mach-snapgear/io.c b/arch/sh/boards/mach-snapgear/io.c index 0f48242..476650e 100644 --- a/arch/sh/boards/mach-snapgear/io.c +++ b/arch/sh/boards/mach-snapgear/io.c @@ -36,8 +36,6 @@ unsigned char snapgear_inb(unsigned long port) { if (PXSEG(port)) return *(volatile unsigned char *)port; - else if (is_pci_ioaddr(port)) - return *(volatile unsigned char *)pci_ioaddr(port); else return (*port2adr(port)) & 0xff; } @@ -48,8 +46,6 @@ unsigned char snapgear_inb_p(unsigned long port) if (PXSEG(port)) v = *(volatile unsigned char *)port; - else if (is_pci_ioaddr(port)) - v = *(volatile unsigned char *)pci_ioaddr(port); else v = (*port2adr(port))&0xff; ctrl_delay(); @@ -60,8 +56,6 @@ unsigned short snapgear_inw(unsigned long port) { if (PXSEG(port)) return *(volatile unsigned short *)port; - else if (is_pci_ioaddr(port)) - return *(volatile unsigned short *)pci_ioaddr(port); else if (port >= 0x2000) return *port2adr(port); else @@ -73,8 +67,6 @@ unsigned int snapgear_inl(unsigned long port) { if (PXSEG(port)) return *(volatile unsigned long *)port; - else if (is_pci_ioaddr(port)) - return *(volatile unsigned int *)pci_ioaddr(port); else if (port >= 0x2000) return *port2adr(port); else @@ -87,8 +79,6 @@ void snapgear_outb(unsigned char value, unsigned long port) if (PXSEG(port)) *(volatile unsigned char *)port = value; - else if (is_pci_ioaddr(port)) - *((unsigned char*)pci_ioaddr(port)) = value; else *(port2adr(port)) = value; } @@ -97,8 +87,6 @@ void snapgear_outb_p(unsigned char value, unsigned long port) { if (PXSEG(port)) *(volatile unsigned char *)port = value; - else if (is_pci_ioaddr(port)) - *((unsigned char*)pci_ioaddr(port)) = value; else *(port2adr(port)) = value; ctrl_delay(); @@ -108,8 +96,6 @@ void snapgear_outw(unsigned short value, unsigned long port) { if (PXSEG(port)) *(volatile unsigned short *)port = value; - else if (is_pci_ioaddr(port)) - *((unsigned short *)pci_ioaddr(port)) = value; else if (port >= 0x2000) *port2adr(port) = value; else @@ -120,8 +106,6 @@ void snapgear_outl(unsigned int value, unsigned long port) { if (PXSEG(port)) *(volatile unsigned long *)port = value; - else if (is_pci_ioaddr(port)) - *((unsigned long*)pci_ioaddr(port)) = value; else maybebadio(port); } diff --git a/arch/sh/boards/mach-systemh/io.c b/arch/sh/boards/mach-systemh/io.c index dec3db0..15577ff 100644 --- a/arch/sh/boards/mach-systemh/io.c +++ b/arch/sh/boards/mach-systemh/io.c @@ -35,8 +35,6 @@ unsigned char sh7751systemh_inb(unsigned long port) { if (PXSEG(port)) return *(volatile unsigned char *)port; - else if (is_pci_ioaddr(port)) - return *(volatile unsigned char *)pci_ioaddr(port); else if (port <= 0x3F1) return *(volatile unsigned char *)ETHER_IOMAP(port); else @@ -49,8 +47,6 @@ unsigned char sh7751systemh_inb_p(unsigned long port) if (PXSEG(port)) v = *(volatile unsigned char *)port; - else if (is_pci_ioaddr(port)) - v = *(volatile unsigned char *)pci_ioaddr(port); else if (port <= 0x3F1) v = *(volatile unsigned char *)ETHER_IOMAP(port); else @@ -63,8 +59,6 @@ unsigned short sh7751systemh_inw(unsigned long port) { if (PXSEG(port)) return *(volatile unsigned short *)port; - else if (is_pci_ioaddr(port)) - return *(volatile unsigned short *)pci_ioaddr(port); else if (port >= 0x2000) return *port2adr(port); else if (port <= 0x3F1) @@ -78,8 +72,6 @@ unsigned int sh7751systemh_inl(unsigned long port) { if (PXSEG(port)) return *(volatile unsigned long *)port; - else if (is_pci_ioaddr(port)) - return *(volatile unsigned int *)pci_ioaddr(port); else if (port >= 0x2000) return *port2adr(port); else if (port <= 0x3F1) @@ -94,8 +86,6 @@ void sh7751systemh_outb(unsigned char value, unsigned long port) if (PXSEG(port)) *(volatile unsigned char *)port = value; - else if (is_pci_ioaddr(port)) - *((unsigned char*)pci_ioaddr(port)) = value; else if (port <= 0x3F1) *(volatile unsigned char *)ETHER_IOMAP(port) = value; else @@ -106,8 +96,6 @@ void sh7751systemh_outb_p(unsigned char value, unsigned long port) { if (PXSEG(port)) *(volatile unsigned char *)port = value; - else if (is_pci_ioaddr(port)) - *((unsigned char*)pci_ioaddr(port)) = value; else if (port <= 0x3F1) *(volatile unsigned char *)ETHER_IOMAP(port) = value; else @@ -119,8 +107,6 @@ void sh7751systemh_outw(unsigned short value, unsigned long port) { if (PXSEG(port)) *(volatile unsigned short *)port = value; - else if (is_pci_ioaddr(port)) - *((unsigned short *)pci_ioaddr(port)) = value; else if (port >= 0x2000) *port2adr(port) = value; else if (port <= 0x3F1) @@ -133,8 +119,6 @@ void sh7751systemh_outl(unsigned int value, unsigned long port) { if (PXSEG(port)) *(volatile unsigned long *)port = value; - else if (is_pci_ioaddr(port)) - *((unsigned long*)pci_ioaddr(port)) = value; else maybebadio(port); } diff --git a/arch/sh/boards/mach-titan/io.c b/arch/sh/boards/mach-titan/io.c index 053b3ed..0130e98 100644 --- a/arch/sh/boards/mach-titan/io.c +++ b/arch/sh/boards/mach-titan/io.c @@ -17,8 +17,6 @@ u8 titan_inb(unsigned long port) { if (PXSEG(port)) return ctrl_inb(port); - else if (is_pci_ioaddr(port)) - return ctrl_inb(pci_ioaddr(port)); return ctrl_inw(port2adr(port)) & 0xff; } @@ -28,8 +26,6 @@ u8 titan_inb_p(unsigned long port) if (PXSEG(port)) v = ctrl_inb(port); - else if (is_pci_ioaddr(port)) - v = ctrl_inb(pci_ioaddr(port)); else v = ctrl_inw(port2adr(port)) & 0xff; ctrl_delay(); @@ -40,8 +36,6 @@ u16 titan_inw(unsigned long port) { if (PXSEG(port)) return ctrl_inw(port); - else if (is_pci_ioaddr(port)) - return ctrl_inw(pci_ioaddr(port)); else if (port >= 0x2000) return ctrl_inw(port2adr(port)); else @@ -53,8 +47,6 @@ u32 titan_inl(unsigned long port) { if (PXSEG(port)) return ctrl_inl(port); - else if (is_pci_ioaddr(port)) - return ctrl_inl(pci_ioaddr(port)); else if (port >= 0x2000) return ctrl_inw(port2adr(port)); else @@ -66,8 +58,6 @@ void titan_outb(u8 value, unsigned long port) { if (PXSEG(port)) ctrl_outb(value, port); - else if (is_pci_ioaddr(port)) - ctrl_outb(value, pci_ioaddr(port)); else ctrl_outw(value, port2adr(port)); } @@ -76,8 +66,6 @@ void titan_outb_p(u8 value, unsigned long port) { if (PXSEG(port)) ctrl_outb(value, port); - else if (is_pci_ioaddr(port)) - ctrl_outb(value, pci_ioaddr(port)); else ctrl_outw(value, port2adr(port)); ctrl_delay(); @@ -87,8 +75,6 @@ void titan_outw(u16 value, unsigned long port) { if (PXSEG(port)) ctrl_outw(value, port); - else if (is_pci_ioaddr(port)) - ctrl_outw(value, pci_ioaddr(port)); else if (port >= 0x2000) ctrl_outw(value, port2adr(port)); else @@ -99,8 +85,6 @@ void titan_outl(u32 value, unsigned long port) { if (PXSEG(port)) ctrl_outl(value, port); - else if (is_pci_ioaddr(port)) - ctrl_outl(value, pci_ioaddr(port)); else maybebadio(port); } @@ -119,8 +103,6 @@ void __iomem *titan_ioport_map(unsigned long port, unsigned int size) { if (PXSEG(port)) return (void __iomem *)port; - else if (is_pci_ioaddr(port)) - return (void __iomem *)pci_ioaddr(port); return (void __iomem *)port2adr(port); } -- cgit v0.10.2 From 0e75148108914062cb46ad3dc8f054a628018df7 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 20 Apr 2009 19:48:48 +0900 Subject: sh: pci: Consolidate pcibios_setup() in pci-lib. This wasn't really being used for anything useful, so just stub it in pci-lib. Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/ops-dreamcast.c b/arch/sh/drivers/pci/ops-dreamcast.c index 5bc81d5..529aa4f 100644 --- a/arch/sh/drivers/pci/ops-dreamcast.c +++ b/arch/sh/drivers/pci/ops-dreamcast.c @@ -154,12 +154,6 @@ static int __init gapspci_init(struct pci_channel *chan) return 0; } -/* Haven't done anything here as yet */ -char * __devinit pcibios_setup(char *str) -{ - return str; -} - struct pci_channel board_pci_channels[] = { { gapspci_init, &gapspci_pci_ops, &gapspci_io_resource, &gapspci_mem_resource, 0, 1 }, diff --git a/arch/sh/drivers/pci/ops-sh4.c b/arch/sh/drivers/pci/ops-sh4.c index 2a7f7b5..7cc1fcc 100644 --- a/arch/sh/drivers/pci/ops-sh4.c +++ b/arch/sh/drivers/pci/ops-sh4.c @@ -106,30 +106,27 @@ struct pci_ops sh4_pci_ops = { * Not really related to pci_ops, but it's common and not worth shoving * somewhere else for now.. */ -static unsigned int pci_probe = PCI_PROBE_CONF1; - int __init sh4_pci_check_direct(struct pci_channel *chan) { /* * Check if configuration works. */ - if (pci_probe & PCI_PROBE_CONF1) { - unsigned int tmp = pci_read_reg(chan, SH4_PCIPAR); - - pci_write_reg(chan, P1SEG, SH4_PCIPAR); + unsigned int tmp = pci_read_reg(chan, SH4_PCIPAR); - if (pci_read_reg(chan, SH4_PCIPAR) == P1SEG) { - pci_write_reg(chan, tmp, SH4_PCIPAR); - printk(KERN_INFO "PCI: Using configuration type 1\n"); - request_region(chan->reg_base + SH4_PCIPAR, 8, - "PCI conf1"); - return 0; - } + pci_write_reg(chan, P1SEG, SH4_PCIPAR); + if (pci_read_reg(chan, SH4_PCIPAR) == P1SEG) { pci_write_reg(chan, tmp, SH4_PCIPAR); + printk(KERN_INFO "PCI: Using configuration type 1\n"); + request_region(chan->reg_base + SH4_PCIPAR, 8, + "PCI conf1"); + return 0; } - pr_debug("PCI: pci_check_direct failed\n"); + pci_write_reg(chan, tmp, SH4_PCIPAR); + + printk(KERN_ERR "PCI: %s failed\n", __func__); + return -EINVAL; } @@ -155,16 +152,6 @@ static void __init pci_fixup_ide_bases(struct pci_dev *d) } DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_ide_bases); -char * __devinit pcibios_setup(char *str) -{ - if (!strcmp(str, "off")) { - pci_probe = 0; - return NULL; - } - - return str; -} - int __attribute__((weak)) pci_fixup_pcic(struct pci_channel *chan) { /* Nothing to do. */ diff --git a/arch/sh/drivers/pci/ops-sh5.c b/arch/sh/drivers/pci/ops-sh5.c index 729e38a..b10e2d5 100644 --- a/arch/sh/drivers/pci/ops-sh5.c +++ b/arch/sh/drivers/pci/ops-sh5.c @@ -42,11 +42,6 @@ static void __init pci_fixup_ide_bases(struct pci_dev *d) } DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_ide_bases); -char * __devinit pcibios_setup(char *str) -{ - return str; -} - static int sh5pci_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *val) { diff --git a/arch/sh/drivers/pci/pci-lib.c b/arch/sh/drivers/pci/pci-lib.c index ea83629..f072aca 100644 --- a/arch/sh/drivers/pci/pci-lib.c +++ b/arch/sh/drivers/pci/pci-lib.c @@ -134,6 +134,11 @@ void __init pcibios_update_irq(struct pci_dev *dev, int irq) pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); } +char * __devinit pcibios_setup(char *str) +{ + return str; +} + int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, enum pci_mmap_state mmap_state, int write_combine) { -- cgit v0.10.2 From 0db38cea69fc478a5c25b3c915ec680cc5538783 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 20 Apr 2009 19:54:47 +0900 Subject: sh: pci: Kill off legacy ide quirks. These fixups seem to have bitrotted a bit since their introduction in the 2.4 days. As we never had much use for them in the first place, and nothing is using them any more, kill them off the rest of the way. Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/ops-sh4.c b/arch/sh/drivers/pci/ops-sh4.c index 7cc1fcc..78bebeb 100644 --- a/arch/sh/drivers/pci/ops-sh4.c +++ b/arch/sh/drivers/pci/ops-sh4.c @@ -130,28 +130,6 @@ int __init sh4_pci_check_direct(struct pci_channel *chan) return -EINVAL; } -/* Handle generic fixups */ -static void __init pci_fixup_ide_bases(struct pci_dev *d) -{ - int i; - - /* - * PCI IDE controllers use non-standard I/O port decoding, respect it. - */ - if ((d->class >> 8) != PCI_CLASS_STORAGE_IDE) - return; - pr_debug("PCI: IDE base address fixup for %s\n", pci_name(d)); - for(i = 0; i < 4; i++) { - struct resource *r = &d->resource[i]; - - if ((r->start & ~0x80) == 0x374) { - r->start |= 2; - r->end = r->start; - } - } -} -DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_ide_bases); - int __attribute__((weak)) pci_fixup_pcic(struct pci_channel *chan) { /* Nothing to do. */ diff --git a/arch/sh/drivers/pci/ops-sh5.c b/arch/sh/drivers/pci/ops-sh5.c index b10e2d5..4ce95a0 100644 --- a/arch/sh/drivers/pci/ops-sh5.c +++ b/arch/sh/drivers/pci/ops-sh5.c @@ -22,26 +22,6 @@ #include #include "pci-sh5.h" -static void __init pci_fixup_ide_bases(struct pci_dev *d) -{ - int i; - - /* - * PCI IDE controllers use non-standard I/O port decoding, respect it. - */ - if ((d->class >> 8) != PCI_CLASS_STORAGE_IDE) - return; - printk("PCI: IDE base address fixup for %s\n", pci_name(d)); - for(i=0; i<4; i++) { - struct resource *r = &d->resource[i]; - if ((r->start & ~0x80) == 0x374) { - r->start |= 2; - r->end = r->start; - } - } -} -DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_ide_bases); - static int sh5pci_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *val) { -- cgit v0.10.2 From 3444f5ec49bc6cb901ffea38e085db1d76e1189c Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 20 Apr 2009 20:22:05 +0900 Subject: sh: pci: Tidy up the dreamcast PCI support. Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/Makefile b/arch/sh/drivers/pci/Makefile index 4cac866..2160a06 100644 --- a/arch/sh/drivers/pci/Makefile +++ b/arch/sh/drivers/pci/Makefile @@ -12,7 +12,8 @@ obj-$(CONFIG_CPU_SUBTYPE_SH7780) += pci-sh7780.o ops-sh4.o obj-$(CONFIG_CPU_SUBTYPE_SH7785) += pci-sh7780.o ops-sh4.o obj-$(CONFIG_CPU_SH5) += pci-sh5.o ops-sh5.o -obj-$(CONFIG_SH_DREAMCAST) += ops-dreamcast.o fixups-dreamcast.o +obj-$(CONFIG_SH_DREAMCAST) += ops-dreamcast.o fixups-dreamcast.o \ + pci-dreamcast.o obj-$(CONFIG_SH_SECUREEDGE5410) += ops-snapgear.o obj-$(CONFIG_SH_RTS7751R2D) += ops-rts7751r2d.o fixups-rts7751r2d.o obj-$(CONFIG_SH_SH03) += ops-sh03.o fixups-sh03.o diff --git a/arch/sh/drivers/pci/ops-dreamcast.c b/arch/sh/drivers/pci/ops-dreamcast.c index 529aa4f..e83d0d3 100644 --- a/arch/sh/drivers/pci/ops-dreamcast.c +++ b/arch/sh/drivers/pci/ops-dreamcast.c @@ -1,15 +1,9 @@ /* - * arch/sh/drivers/pci/ops-dreamcast.c - * * PCI operations for the Sega Dreamcast * * Copyright (C) 2001, 2002 M. R. Brown * Copyright (C) 2002, 2003 Paul Mundt * - * This file originally bore the message (with enclosed-$): - * Id: pci.c,v 1.3 2003/05/04 19:29:46 lethal Exp - * Dreamcast PCI: Supports SEGA Broadband Adaptor only. - * * This file is subject to the terms and conditions of the GNU General Public * License. See the file "COPYING" in the main directory of this archive * for more details. @@ -23,25 +17,10 @@ #include #include #include - -#include -#include +#include +#include #include -static struct resource gapspci_io_resource = { - .name = "GAPSPCI IO", - .start = GAPSPCI_BBA_CONFIG, - .end = GAPSPCI_BBA_CONFIG + GAPSPCI_BBA_CONFIG_SIZE - 1, - .flags = IORESOURCE_IO, -}; - -static struct resource gapspci_mem_resource = { - .name = "GAPSPCI mem", - .start = GAPSPCI_DMA_BASE, - .end = GAPSPCI_DMA_BASE + GAPSPCI_DMA_SIZE - 1, - .flags = IORESOURCE_MEM, -}; - /* * The !gapspci_config_access case really shouldn't happen, ever, unless * someone implicitly messes around with the last devfn value.. otherwise we @@ -76,10 +55,10 @@ static int gapspci_read(struct pci_bus *bus, unsigned int devfn, int where, int return PCIBIOS_DEVICE_NOT_FOUND; switch (size) { - case 1: *val = inb(GAPSPCI_BBA_CONFIG+where); break; - case 2: *val = inw(GAPSPCI_BBA_CONFIG+where); break; - case 4: *val = inl(GAPSPCI_BBA_CONFIG+where); break; - } + case 1: *val = inb(GAPSPCI_BBA_CONFIG+where); break; + case 2: *val = inw(GAPSPCI_BBA_CONFIG+where); break; + case 4: *val = inl(GAPSPCI_BBA_CONFIG+where); break; + } return PCIBIOS_SUCCESSFUL; } @@ -90,72 +69,15 @@ static int gapspci_write(struct pci_bus *bus, unsigned int devfn, int where, int return PCIBIOS_DEVICE_NOT_FOUND; switch (size) { - case 1: outb(( u8)val, GAPSPCI_BBA_CONFIG+where); break; - case 2: outw((u16)val, GAPSPCI_BBA_CONFIG+where); break; - case 4: outl((u32)val, GAPSPCI_BBA_CONFIG+where); break; + case 1: outb(( u8)val, GAPSPCI_BBA_CONFIG+where); break; + case 2: outw((u16)val, GAPSPCI_BBA_CONFIG+where); break; + case 4: outl((u32)val, GAPSPCI_BBA_CONFIG+where); break; } return PCIBIOS_SUCCESSFUL; } -static struct pci_ops gapspci_pci_ops = { +struct pci_ops gapspci_pci_ops = { .read = gapspci_read, .write = gapspci_write, }; - -/* - * gapspci init - */ - -static int __init gapspci_init(struct pci_channel *chan) -{ - char idbuf[16]; - int i; - - /* - * FIXME: All of this wants documenting to some degree, - * even some basic register definitions would be nice. - * - * I haven't seen anything this ugly since.. maple. - */ - - for (i=0; i<16; i++) - idbuf[i] = inb(GAPSPCI_REGS+i); - - if (strncmp(idbuf, "GAPSPCI_BRIDGE_2", 16)) - return -ENODEV; - - outl(0x5a14a501, GAPSPCI_REGS+0x18); - - for (i=0; i<1000000; i++) - ; - - if (inl(GAPSPCI_REGS+0x18) != 1) - return -EINVAL; - - outl(0x01000000, GAPSPCI_REGS+0x20); - outl(0x01000000, GAPSPCI_REGS+0x24); - - outl(GAPSPCI_DMA_BASE, GAPSPCI_REGS+0x28); - outl(GAPSPCI_DMA_BASE+GAPSPCI_DMA_SIZE, GAPSPCI_REGS+0x2c); - - outl(1, GAPSPCI_REGS+0x14); - outl(1, GAPSPCI_REGS+0x34); - - /* Setting Broadband Adapter */ - outw(0xf900, GAPSPCI_BBA_CONFIG+0x06); - outl(0x00000000, GAPSPCI_BBA_CONFIG+0x30); - outb(0x00, GAPSPCI_BBA_CONFIG+0x3c); - outb(0xf0, GAPSPCI_BBA_CONFIG+0x0d); - outw(0x0006, GAPSPCI_BBA_CONFIG+0x04); - outl(0x00002001, GAPSPCI_BBA_CONFIG+0x10); - outl(0x01000000, GAPSPCI_BBA_CONFIG+0x14); - - return 0; -} - -struct pci_channel board_pci_channels[] = { - { gapspci_init, &gapspci_pci_ops, &gapspci_io_resource, - &gapspci_mem_resource, 0, 1 }, - { 0, } -}; diff --git a/arch/sh/drivers/pci/pci-dreamcast.c b/arch/sh/drivers/pci/pci-dreamcast.c new file mode 100644 index 0000000..0897be5 --- /dev/null +++ b/arch/sh/drivers/pci/pci-dreamcast.c @@ -0,0 +1,105 @@ +/* + * PCI support for the Sega Dreamcast + * + * Copyright (C) 2001, 2002 M. R. Brown + * Copyright (C) 2002, 2003 Paul Mundt + * + * This file originally bore the message (with enclosed-$): + * Id: pci.c,v 1.3 2003/05/04 19:29:46 lethal Exp + * Dreamcast PCI: Supports SEGA Broadband Adaptor only. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static struct resource gapspci_io_resource = { + .name = "GAPSPCI IO", + .start = GAPSPCI_BBA_CONFIG, + .end = GAPSPCI_BBA_CONFIG + GAPSPCI_BBA_CONFIG_SIZE - 1, + .flags = IORESOURCE_IO, +}; + +static struct resource gapspci_mem_resource = { + .name = "GAPSPCI mem", + .start = GAPSPCI_DMA_BASE, + .end = GAPSPCI_DMA_BASE + GAPSPCI_DMA_SIZE - 1, + .flags = IORESOURCE_MEM, +}; + +/* + * gapspci init + */ + +static int __init gapspci_init(struct pci_channel *chan) +{ + char idbuf[16]; + int i; + + /* + * FIXME: All of this wants documenting to some degree, + * even some basic register definitions would be nice. + * + * I haven't seen anything this ugly since.. maple. + */ + + for (i=0; i<16; i++) + idbuf[i] = inb(GAPSPCI_REGS+i); + + if (strncmp(idbuf, "GAPSPCI_BRIDGE_2", 16)) + return -ENODEV; + + outl(0x5a14a501, GAPSPCI_REGS+0x18); + + for (i=0; i<1000000; i++) + cpu_relax(); + + if (inl(GAPSPCI_REGS+0x18) != 1) + return -EINVAL; + + outl(0x01000000, GAPSPCI_REGS+0x20); + outl(0x01000000, GAPSPCI_REGS+0x24); + + outl(GAPSPCI_DMA_BASE, GAPSPCI_REGS+0x28); + outl(GAPSPCI_DMA_BASE+GAPSPCI_DMA_SIZE, GAPSPCI_REGS+0x2c); + + outl(1, GAPSPCI_REGS+0x14); + outl(1, GAPSPCI_REGS+0x34); + + /* Setting Broadband Adapter */ + outw(0xf900, GAPSPCI_BBA_CONFIG+0x06); + outl(0x00000000, GAPSPCI_BBA_CONFIG+0x30); + outb(0x00, GAPSPCI_BBA_CONFIG+0x3c); + outb(0xf0, GAPSPCI_BBA_CONFIG+0x0d); + outw(0x0006, GAPSPCI_BBA_CONFIG+0x04); + outl(0x00002001, GAPSPCI_BBA_CONFIG+0x10); + outl(0x01000000, GAPSPCI_BBA_CONFIG+0x14); + + return 0; +} + +struct pci_channel board_pci_channels[] = { + { + .init = gapspci_init, + .pci_ops = &gapspci_pci_ops, + .io_resource = &gapspci_io_resource, + .mem_resource = &gapspci_mem_resource, + .first_devfn = 0, + .last_devfn = 1, + }, { + .init = NULL, + } +}; diff --git a/arch/sh/include/mach-dreamcast/mach/pci.h b/arch/sh/include/mach-dreamcast/mach/pci.h index 75fc900..0314d97 100644 --- a/arch/sh/include/mach-dreamcast/mach/pci.h +++ b/arch/sh/include/mach-dreamcast/mach/pci.h @@ -21,5 +21,7 @@ #define GAPSPCI_IRQ HW_EVENT_EXTERNAL +extern struct pci_ops gapspci_pci_ops; + #endif /* __ASM_SH_DREAMCAST_PCI_H */ -- cgit v0.10.2 From 48e4237d96fdcb1237b63bcddb37771f97452eec Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 20 Apr 2009 20:40:48 +0900 Subject: sh: pci: Convert the SH-5 code over to the new interface. Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/Kconfig b/arch/sh/drivers/pci/Kconfig index efe8cf9..f9fb1d1 100644 --- a/arch/sh/drivers/pci/Kconfig +++ b/arch/sh/drivers/pci/Kconfig @@ -23,7 +23,7 @@ config PCI_NEW bool depends on PCI default y if CPU_SUBTYPE_SH7763 || CPU_SUBTYPE_SH7780 || \ - CPU_SUBTYPE_SH7785 + CPU_SUBTYPE_SH7785 || CPU_SH5 # This is also board-specific config PCI_AUTO diff --git a/arch/sh/drivers/pci/ops-cayman.c b/arch/sh/drivers/pci/ops-cayman.c index cbaaf02..b68b61d 100644 --- a/arch/sh/drivers/pci/ops-cayman.c +++ b/arch/sh/drivers/pci/ops-cayman.c @@ -75,14 +75,3 @@ int __init pcibios_map_platform_irq(struct pci_dev *dev, u8 slot, u8 pin) return result; } - -struct pci_channel board_pci_channels[] = { - { sh5_pci_init, &sh5_pci_ops, NULL, NULL, 0, 0xff }, - { NULL, NULL, NULL, 0, 0 }, -}; - -int __init pcibios_init_platform(void) -{ - return sh5pci_init(__pa(memory_start), - __pa(memory_end) - __pa(memory_start)); -} diff --git a/arch/sh/drivers/pci/pci-sh5.c b/arch/sh/drivers/pci/pci-sh5.c index 7750da2..cf43185 100644 --- a/arch/sh/drivers/pci/pci-sh5.c +++ b/arch/sh/drivers/pci/pci-sh5.c @@ -27,12 +27,6 @@ unsigned long pcicr_virt; unsigned long PCI_IO_AREA; -int __init sh5_pci_init(struct pci_channel *chan) -{ - pr_debug("PCI: Starting intialization.\n"); - return pcibios_init_platform(); -} - /* Rounds a number UP to the nearest power of two. Used for * sizing the PCI window. */ @@ -95,8 +89,21 @@ static irqreturn_t pcish5_serr_irq(int irq, void *dev_id) return IRQ_NONE; } -int __init sh5pci_init(unsigned long memStart, unsigned long memSize) +static struct resource sh5_io_resource = { /* place holder */ }; +static struct resource sh5_mem_resource = { /* place holder */ }; + +static struct pci_channel sh5pci_controller = { + .pci_ops = &sh5_pci_ops, + .mem_resource = &sh5_mem_resource, + .mem_offset = 0x00000000, + .io_resource = &sh5_io_resource, + .io_offset = 0x00000000, +}; + +static int __init sh5pci_init(void) { + unsigned long memStart = __pa(memory_start); + unsigned long memSize = __pa(memory_end) - memStart; u32 lsr0; u32 uval; @@ -203,35 +210,14 @@ int __init sh5pci_init(unsigned long memStart, unsigned long memSize) SH5PCI_WRITE(AINTM, ~0); SH5PCI_WRITE(PINTM, ~0); - return 0; -} + sh5_io_resource.start = PCI_IO_AREA; + sh5_io_resource.end = PCI_IO_AREA + 0x10000; -#define xPCIBIOS_MIN_IO board_pci_channels->io_resource->start -#define xPCIBIOS_MIN_MEM board_pci_channels->mem_resource->start + sh5_mem_resource.start = memStart; + sh5_mem_resource.end = memStart + memSize; -void __devinit pcibios_fixup_bus(struct pci_bus *bus) -{ - struct pci_dev *dev = bus->self; - int i; - - if (dev) { - for (i= 0; i < 3; i++) { - bus->resource[i] = - &dev->resource[PCI_BRIDGE_RESOURCES+i]; - bus->resource[i]->name = bus->name; - } - bus->resource[0]->flags |= IORESOURCE_IO; - bus->resource[1]->flags |= IORESOURCE_MEM; - - /* For now, propagate host limits to the bus; - * we'll adjust them later. */ - bus->resource[0]->end = 64*1024 - 1 ; - bus->resource[1]->end = xPCIBIOS_MIN_MEM+(256*1024*1024)-1; - bus->resource[0]->start = xPCIBIOS_MIN_IO; - bus->resource[1]->start = xPCIBIOS_MIN_MEM; - - /* Turn off downstream PF memory address range by default */ - bus->resource[2]->start = 1024*1024; - bus->resource[2]->end = bus->resource[2]->start - 1; - } + register_pci_controller(&sh5pci_controller); + + return 0; } +arch_initcall(sh5pci_init); diff --git a/arch/sh/drivers/pci/pci-sh5.h b/arch/sh/drivers/pci/pci-sh5.h index af09f38..f277628 100644 --- a/arch/sh/drivers/pci/pci-sh5.h +++ b/arch/sh/drivers/pci/pci-sh5.h @@ -107,8 +107,4 @@ extern unsigned long pcicr_virt; extern struct pci_ops sh5_pci_ops; -/* arch/sh/drivers/pci/pci-sh5.c */ -int sh5_pci_init(struct pci_channel *chan); -int sh5pci_init(unsigned long memStart, unsigned long memSize); - #endif /* __PCI_SH5_H */ -- cgit v0.10.2 From a5b08047129f214af1899bd9088605c7adc21ed5 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 20 Apr 2009 20:41:45 +0900 Subject: sh: pci: Rename ops-cayman -> fixups-cayman. Now that ops-cayman.c only contains IRQ routing fixups, rename it. Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/Makefile b/arch/sh/drivers/pci/Makefile index 2160a06..cb2190c 100644 --- a/arch/sh/drivers/pci/Makefile +++ b/arch/sh/drivers/pci/Makefile @@ -24,4 +24,4 @@ obj-$(CONFIG_SH_7780_SOLUTION_ENGINE) += fixups-sdk7780.o obj-$(CONFIG_SH_TITAN) += ops-titan.o obj-$(CONFIG_SH_LANDISK) += ops-landisk.o obj-$(CONFIG_SH_LBOX_RE2) += ops-lboxre2.o fixups-rts7751r2d.o -obj-$(CONFIG_SH_CAYMAN) += ops-cayman.o +obj-$(CONFIG_SH_CAYMAN) += fixups-cayman.o diff --git a/arch/sh/drivers/pci/fixups-cayman.c b/arch/sh/drivers/pci/fixups-cayman.c new file mode 100644 index 0000000..b68b61d --- /dev/null +++ b/arch/sh/drivers/pci/fixups-cayman.c @@ -0,0 +1,77 @@ +#include +#include +#include +#include +#include +#include "pci-sh5.h" + +int __init pcibios_map_platform_irq(struct pci_dev *dev, u8 slot, u8 pin) +{ + int result = -1; + + /* The complication here is that the PCI IRQ lines from the Cayman's 2 + 5V slots get into the CPU via a different path from the IRQ lines + from the 3 3.3V slots. Thus, we have to detect whether the card's + interrupts go via the 5V or 3.3V path, i.e. the 'bridge swizzling' + at the point where we cross from 5V to 3.3V is not the normal case. + + The added complication is that we don't know that the 5V slots are + always bus 2, because a card containing a PCI-PCI bridge may be + plugged into a 3.3V slot, and this changes the bus numbering. + + Also, the Cayman has an intermediate PCI bus that goes a custom + expansion board header (and to the secondary bridge). This bus has + never been used in practice. + + The 1ary onboard PCI-PCI bridge is device 3 on bus 0 + The 2ary onboard PCI-PCI bridge is device 0 on the 2ary bus of + the 1ary bridge. + */ + + struct slot_pin { + int slot; + int pin; + } path[4]; + int i=0; + + while (dev->bus->number > 0) { + + slot = path[i].slot = PCI_SLOT(dev->devfn); + pin = path[i].pin = pci_swizzle_interrupt_pin(dev, pin); + dev = dev->bus->self; + i++; + if (i > 3) panic("PCI path to root bus too long!\n"); + } + + slot = PCI_SLOT(dev->devfn); + /* This is the slot on bus 0 through which the device is eventually + reachable. */ + + /* Now work back up. */ + if ((slot < 3) || (i == 0)) { + /* Bus 0 (incl. PCI-PCI bridge itself) : perform the final + swizzle now. */ + result = IRQ_INTA + pci_swizzle_interrupt_pin(dev, pin) - 1; + } else { + i--; + slot = path[i].slot; + pin = path[i].pin; + if (slot > 0) { + panic("PCI expansion bus device found - not handled!\n"); + } else { + if (i > 0) { + /* 5V slots */ + i--; + slot = path[i].slot; + pin = path[i].pin; + /* 'pin' was swizzled earlier wrt slot, don't do it again. */ + result = IRQ_P2INTA + (pin - 1); + } else { + /* IRQ for 2ary PCI-PCI bridge : unused */ + result = -1; + } + } + } + + return result; +} diff --git a/arch/sh/drivers/pci/ops-cayman.c b/arch/sh/drivers/pci/ops-cayman.c deleted file mode 100644 index b68b61d..0000000 --- a/arch/sh/drivers/pci/ops-cayman.c +++ /dev/null @@ -1,77 +0,0 @@ -#include -#include -#include -#include -#include -#include "pci-sh5.h" - -int __init pcibios_map_platform_irq(struct pci_dev *dev, u8 slot, u8 pin) -{ - int result = -1; - - /* The complication here is that the PCI IRQ lines from the Cayman's 2 - 5V slots get into the CPU via a different path from the IRQ lines - from the 3 3.3V slots. Thus, we have to detect whether the card's - interrupts go via the 5V or 3.3V path, i.e. the 'bridge swizzling' - at the point where we cross from 5V to 3.3V is not the normal case. - - The added complication is that we don't know that the 5V slots are - always bus 2, because a card containing a PCI-PCI bridge may be - plugged into a 3.3V slot, and this changes the bus numbering. - - Also, the Cayman has an intermediate PCI bus that goes a custom - expansion board header (and to the secondary bridge). This bus has - never been used in practice. - - The 1ary onboard PCI-PCI bridge is device 3 on bus 0 - The 2ary onboard PCI-PCI bridge is device 0 on the 2ary bus of - the 1ary bridge. - */ - - struct slot_pin { - int slot; - int pin; - } path[4]; - int i=0; - - while (dev->bus->number > 0) { - - slot = path[i].slot = PCI_SLOT(dev->devfn); - pin = path[i].pin = pci_swizzle_interrupt_pin(dev, pin); - dev = dev->bus->self; - i++; - if (i > 3) panic("PCI path to root bus too long!\n"); - } - - slot = PCI_SLOT(dev->devfn); - /* This is the slot on bus 0 through which the device is eventually - reachable. */ - - /* Now work back up. */ - if ((slot < 3) || (i == 0)) { - /* Bus 0 (incl. PCI-PCI bridge itself) : perform the final - swizzle now. */ - result = IRQ_INTA + pci_swizzle_interrupt_pin(dev, pin) - 1; - } else { - i--; - slot = path[i].slot; - pin = path[i].pin; - if (slot > 0) { - panic("PCI expansion bus device found - not handled!\n"); - } else { - if (i > 0) { - /* 5V slots */ - i--; - slot = path[i].slot; - pin = path[i].pin; - /* 'pin' was swizzled earlier wrt slot, don't do it again. */ - result = IRQ_P2INTA + (pin - 1); - } else { - /* IRQ for 2ary PCI-PCI bridge : unused */ - result = -1; - } - } - } - - return result; -} -- cgit v0.10.2 From 7154b3e80203ee91f9ba7d0a43d3daa05c49d9e9 Mon Sep 17 00:00:00 2001 From: Joonyoung Shim Date: Mon, 20 Apr 2009 19:21:35 +0900 Subject: ASoC: TWL4030: Add support Voice DAI Add Voice DAI to support the PCM voice interface of the twl4030 codec. The PCM voice interface can be used with 8-kHz(voice narrowband) or 16-kHz(voice wideband) sampling rates, and 16bits, and mono RX and mono TX or stereo TX. The PCM voice interface has two modes - PCM mode1 : This uses the normal FS polarity and the rising edge of the clock signal. - PCM mode2 : This uses the FS polarity inverted and the falling edge of the clock signal. If the system master clock is not 26MHz or the twl4030 codec mode is not option2, the voice PCM interface is not available. Signed-off-by: Joonyoung Shim Acked-by: Peter Ujfalusi Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c index a1b76d7..cc2968c 100644 --- a/sound/soc/codecs/twl4030.c +++ b/sound/soc/codecs/twl4030.c @@ -1484,6 +1484,144 @@ static int twl4030_set_dai_fmt(struct snd_soc_dai *codec_dai, return 0; } +static int twl4030_voice_startup(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct snd_soc_device *socdev = rtd->socdev; + struct snd_soc_codec *codec = socdev->card->codec; + u8 infreq; + u8 mode; + + /* If the system master clock is not 26MHz, the voice PCM interface is + * not avilable. + */ + infreq = twl4030_read_reg_cache(codec, TWL4030_REG_APLL_CTL) + & TWL4030_APLL_INFREQ; + + if (infreq != TWL4030_APLL_INFREQ_26000KHZ) { + printk(KERN_ERR "TWL4030 voice startup: " + "MCLK is not 26MHz, call set_sysclk() on init\n"); + return -EINVAL; + } + + /* If the codec mode is not option2, the voice PCM interface is not + * avilable. + */ + mode = twl4030_read_reg_cache(codec, TWL4030_REG_CODEC_MODE) + & TWL4030_OPT_MODE; + + if (mode != TWL4030_OPTION_2) { + printk(KERN_ERR "TWL4030 voice startup: " + "the codec mode is not option2\n"); + return -EINVAL; + } + + return 0; +} + +static int twl4030_voice_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params, struct snd_soc_dai *dai) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct snd_soc_device *socdev = rtd->socdev; + struct snd_soc_codec *codec = socdev->card->codec; + u8 old_mode, mode; + + /* bit rate */ + old_mode = twl4030_read_reg_cache(codec, TWL4030_REG_CODEC_MODE) + & ~(TWL4030_CODECPDZ); + mode = old_mode; + + switch (params_rate(params)) { + case 8000: + mode &= ~(TWL4030_SEL_16K); + break; + case 16000: + mode |= TWL4030_SEL_16K; + break; + default: + printk(KERN_ERR "TWL4030 voice hw params: unknown rate %d\n", + params_rate(params)); + return -EINVAL; + } + + if (mode != old_mode) { + /* change rate and set CODECPDZ */ + twl4030_codec_enable(codec, 0); + twl4030_write(codec, TWL4030_REG_CODEC_MODE, mode); + twl4030_codec_enable(codec, 1); + } + + return 0; +} + +static int twl4030_voice_set_dai_sysclk(struct snd_soc_dai *codec_dai, + int clk_id, unsigned int freq, int dir) +{ + struct snd_soc_codec *codec = codec_dai->codec; + u8 infreq; + + switch (freq) { + case 26000000: + infreq = TWL4030_APLL_INFREQ_26000KHZ; + break; + default: + printk(KERN_ERR "TWL4030 voice set sysclk: unknown rate %d\n", + freq); + return -EINVAL; + } + + infreq |= TWL4030_APLL_EN; + twl4030_write(codec, TWL4030_REG_APLL_CTL, infreq); + + return 0; +} + +static int twl4030_voice_set_dai_fmt(struct snd_soc_dai *codec_dai, + unsigned int fmt) +{ + struct snd_soc_codec *codec = codec_dai->codec; + u8 old_format, format; + + /* get format */ + old_format = twl4030_read_reg_cache(codec, TWL4030_REG_VOICE_IF); + format = old_format; + + /* set master/slave audio interface */ + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBS_CFM: + format &= ~(TWL4030_VIF_SLAVE_EN); + break; + case SND_SOC_DAIFMT_CBS_CFS: + format |= TWL4030_VIF_SLAVE_EN; + break; + default: + return -EINVAL; + } + + /* clock inversion */ + switch (fmt & SND_SOC_DAIFMT_INV_MASK) { + case SND_SOC_DAIFMT_IB_NF: + format &= ~(TWL4030_VIF_FORMAT); + break; + case SND_SOC_DAIFMT_NB_IF: + format |= TWL4030_VIF_FORMAT; + break; + default: + return -EINVAL; + } + + if (format != old_format) { + /* change format and set CODECPDZ */ + twl4030_codec_enable(codec, 0); + twl4030_write(codec, TWL4030_REG_VOICE_IF, format); + twl4030_codec_enable(codec, 1); + } + + return 0; +} + #define TWL4030_RATES (SNDRV_PCM_RATE_8000_48000) #define TWL4030_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FORMAT_S24_LE) @@ -1495,7 +1633,15 @@ static struct snd_soc_dai_ops twl4030_dai_ops = { .set_fmt = twl4030_set_dai_fmt, }; -struct snd_soc_dai twl4030_dai = { +static struct snd_soc_dai_ops twl4030_dai_voice_ops = { + .startup = twl4030_voice_startup, + .hw_params = twl4030_voice_hw_params, + .set_sysclk = twl4030_voice_set_dai_sysclk, + .set_fmt = twl4030_voice_set_dai_fmt, +}; + +struct snd_soc_dai twl4030_dai[] = { +{ .name = "twl4030", .playback = { .stream_name = "Playback", @@ -1510,6 +1656,23 @@ struct snd_soc_dai twl4030_dai = { .rates = TWL4030_RATES, .formats = TWL4030_FORMATS,}, .ops = &twl4030_dai_ops, +}, +{ + .name = "twl4030 Voice", + .playback = { + .stream_name = "Playback", + .channels_min = 1, + .channels_max = 1, + .rates = SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000, + .formats = SNDRV_PCM_FMTBIT_S16_LE,}, + .capture = { + .stream_name = "Capture", + .channels_min = 1, + .channels_max = 2, + .rates = SNDRV_PCM_RATE_8000 | SNDRV_PCM_RATE_16000, + .formats = SNDRV_PCM_FMTBIT_S16_LE,}, + .ops = &twl4030_dai_voice_ops, +}, }; EXPORT_SYMBOL_GPL(twl4030_dai); @@ -1550,8 +1713,8 @@ static int twl4030_init(struct snd_soc_device *socdev) codec->read = twl4030_read_reg_cache; codec->write = twl4030_write; codec->set_bias_level = twl4030_set_bias_level; - codec->dai = &twl4030_dai; - codec->num_dai = 1; + codec->dai = twl4030_dai; + codec->num_dai = ARRAY_SIZE(twl4030_dai), codec->reg_cache_size = sizeof(twl4030_reg); codec->reg_cache = kmemdup(twl4030_reg, sizeof(twl4030_reg), GFP_KERNEL); @@ -1645,13 +1808,13 @@ EXPORT_SYMBOL_GPL(soc_codec_dev_twl4030); static int __init twl4030_modinit(void) { - return snd_soc_register_dai(&twl4030_dai); + return snd_soc_register_dais(&twl4030_dai[0], ARRAY_SIZE(twl4030_dai)); } module_init(twl4030_modinit); static void __exit twl4030_exit(void) { - snd_soc_unregister_dai(&twl4030_dai); + snd_soc_unregister_dais(&twl4030_dai[0], ARRAY_SIZE(twl4030_dai)); } module_exit(twl4030_exit); diff --git a/sound/soc/codecs/twl4030.h b/sound/soc/codecs/twl4030.h index cb63765..981ec60 100644 --- a/sound/soc/codecs/twl4030.h +++ b/sound/soc/codecs/twl4030.h @@ -113,6 +113,8 @@ #define TWL4030_SEL_16K 0x04 #define TWL4030_CODECPDZ 0x02 #define TWL4030_OPT_MODE 0x01 +#define TWL4030_OPTION_1 (1 << 0) +#define TWL4030_OPTION_2 (0 << 0) /* TWL4030_REG_MICBIAS_CTL (0x04) Fields */ @@ -171,6 +173,17 @@ #define TWL4030_CLK256FS_EN 0x02 #define TWL4030_AIF_EN 0x01 +/* VOICE_IF (0x0F) Fields */ + +#define TWL4030_VIF_SLAVE_EN 0x80 +#define TWL4030_VIF_DIN_EN 0x40 +#define TWL4030_VIF_DOUT_EN 0x20 +#define TWL4030_VIF_SWAP 0x10 +#define TWL4030_VIF_FORMAT 0x08 +#define TWL4030_VIF_TRI_EN 0x04 +#define TWL4030_VIF_SUB_EN 0x02 +#define TWL4030_VIF_EN 0x01 + /* EAR_CTL (0x21) */ #define TWL4030_EAR_GAIN 0x30 @@ -236,7 +249,10 @@ #define TWL4030_SMOOTH_ANAVOL_EN 0x02 #define TWL4030_DIGMIC_LR_SWAP_EN 0x01 -extern struct snd_soc_dai twl4030_dai; +#define TWL4030_DAI_HIFI 0 +#define TWL4030_DAI_VOICE 1 + +extern struct snd_soc_dai twl4030_dai[2]; extern struct snd_soc_codec_device soc_codec_dev_twl4030; #endif /* End of __TWL4030_AUDIO_H__ */ diff --git a/sound/soc/omap/omap2evm.c b/sound/soc/omap/omap2evm.c index 0c2322d..027e1a4 100644 --- a/sound/soc/omap/omap2evm.c +++ b/sound/soc/omap/omap2evm.c @@ -86,7 +86,7 @@ static struct snd_soc_dai_link omap2evm_dai = { .name = "TWL4030", .stream_name = "TWL4030", .cpu_dai = &omap_mcbsp_dai[0], - .codec_dai = &twl4030_dai, + .codec_dai = &twl4030_dai[TWL4030_DAI_HIFI], .ops = &omap2evm_ops, }; diff --git a/sound/soc/omap/omap3beagle.c b/sound/soc/omap/omap3beagle.c index fd24a4a..6aa428e 100644 --- a/sound/soc/omap/omap3beagle.c +++ b/sound/soc/omap/omap3beagle.c @@ -83,7 +83,7 @@ static struct snd_soc_dai_link omap3beagle_dai = { .name = "TWL4030", .stream_name = "TWL4030", .cpu_dai = &omap_mcbsp_dai[0], - .codec_dai = &twl4030_dai, + .codec_dai = &twl4030_dai[TWL4030_DAI_HIFI], .ops = &omap3beagle_ops, }; diff --git a/sound/soc/omap/omap3pandora.c b/sound/soc/omap/omap3pandora.c index fe282d4..ad219aa 100644 --- a/sound/soc/omap/omap3pandora.c +++ b/sound/soc/omap/omap3pandora.c @@ -228,14 +228,14 @@ static struct snd_soc_dai_link omap3pandora_dai[] = { .name = "PCM1773", .stream_name = "HiFi Out", .cpu_dai = &omap_mcbsp_dai[0], - .codec_dai = &twl4030_dai, + .codec_dai = &twl4030_dai[TWL4030_DAI_HIFI], .ops = &omap3pandora_out_ops, .init = omap3pandora_out_init, }, { .name = "TWL4030", .stream_name = "Line/Mic In", .cpu_dai = &omap_mcbsp_dai[1], - .codec_dai = &twl4030_dai, + .codec_dai = &twl4030_dai[TWL4030_DAI_HIFI], .ops = &omap3pandora_in_ops, .init = omap3pandora_in_init, } diff --git a/sound/soc/omap/overo.c b/sound/soc/omap/overo.c index a72dc4e..ec4f8fd 100644 --- a/sound/soc/omap/overo.c +++ b/sound/soc/omap/overo.c @@ -83,7 +83,7 @@ static struct snd_soc_dai_link overo_dai = { .name = "TWL4030", .stream_name = "TWL4030", .cpu_dai = &omap_mcbsp_dai[0], - .codec_dai = &twl4030_dai, + .codec_dai = &twl4030_dai[TWL4030_DAI_HIFI], .ops = &overo_ops, }; diff --git a/sound/soc/omap/sdp3430.c b/sound/soc/omap/sdp3430.c index 10f1c86..1c79741 100644 --- a/sound/soc/omap/sdp3430.c +++ b/sound/soc/omap/sdp3430.c @@ -197,7 +197,7 @@ static struct snd_soc_dai_link sdp3430_dai = { .name = "TWL4030", .stream_name = "TWL4030", .cpu_dai = &omap_mcbsp_dai[0], - .codec_dai = &twl4030_dai, + .codec_dai = &twl4030_dai[TWL4030_DAI_HIFI], .init = sdp3430_twl4030_init, .ops = &sdp3430_ops, }; -- cgit v0.10.2 From 757e3c16f8bafa2a470aebf9b04671c5d4d18f49 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 20 Apr 2009 21:11:07 +0900 Subject: sh: pci: Rewrite SH7751 PCI support to follow SH7780. This follows the similar sort of scheme that the refactored SH7780 code uses, using a 64MB CS3 mapping to handle the window0 case, and simply discarding window1. This vastly simplifies the code, and allows most of the board-specific setup to go die. Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/Kconfig b/arch/sh/drivers/pci/Kconfig index f9fb1d1..5aaee3c 100644 --- a/arch/sh/drivers/pci/Kconfig +++ b/arch/sh/drivers/pci/Kconfig @@ -23,7 +23,8 @@ config PCI_NEW bool depends on PCI default y if CPU_SUBTYPE_SH7763 || CPU_SUBTYPE_SH7780 || \ - CPU_SUBTYPE_SH7785 || CPU_SH5 + CPU_SUBTYPE_SH7785 || CPU_SH5 || \ + CPU_SUBTYPE_SH7751 || CPU_SUBTYPE_SH7751R # This is also board-specific config PCI_AUTO diff --git a/arch/sh/drivers/pci/Makefile b/arch/sh/drivers/pci/Makefile index cb2190c..b8667de 100644 --- a/arch/sh/drivers/pci/Makefile +++ b/arch/sh/drivers/pci/Makefile @@ -16,7 +16,7 @@ obj-$(CONFIG_SH_DREAMCAST) += ops-dreamcast.o fixups-dreamcast.o \ pci-dreamcast.o obj-$(CONFIG_SH_SECUREEDGE5410) += ops-snapgear.o obj-$(CONFIG_SH_RTS7751R2D) += ops-rts7751r2d.o fixups-rts7751r2d.o -obj-$(CONFIG_SH_SH03) += ops-sh03.o fixups-sh03.o +obj-$(CONFIG_SH_SH03) += fixups-sh03.o obj-$(CONFIG_SH_HIGHLANDER) += fixups-r7780rp.o obj-$(CONFIG_SH_SH7785LCR) += fixups-r7780rp.o obj-$(CONFIG_SH_SDK7780) += fixups-sdk7780.o diff --git a/arch/sh/drivers/pci/ops-landisk.c b/arch/sh/drivers/pci/ops-landisk.c index 178b778..bb1a6bb 100644 --- a/arch/sh/drivers/pci/ops-landisk.c +++ b/arch/sh/drivers/pci/ops-landisk.c @@ -15,37 +15,6 @@ #include #include "pci-sh4.h" -static struct resource sh7751_io_resource = { - .name = "SH7751 IO", - .start = SH7751_PCI_IO_BASE, - .end = SH7751_PCI_IO_BASE + SH7751_PCI_IO_SIZE - 1, - .flags = IORESOURCE_IO -}; - -static struct resource sh7751_mem_resource = { - .name = "SH7751 mem", - .start = SH7751_PCI_MEMORY_BASE, - .end = SH7751_PCI_MEMORY_BASE + SH7751_PCI_MEM_SIZE - 1, - .flags = IORESOURCE_MEM -}; - -struct pci_channel board_pci_channels[] = { - { sh7751_pci_init, &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0x3ff}, - {NULL, NULL, NULL, 0, 0}, -}; - -static struct sh4_pci_address_map sh7751_pci_map = { - .window0 = { - .base = SH7751_CS3_BASE_ADDR, - .size = (64 << 20), /* 64MB */ - }, -}; - -int __init pcibios_init_platform(void) -{ - return sh7751_pcic_init(&board_pci_channels[0], &sh7751_pci_map); -} - int pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin) { /* diff --git a/arch/sh/drivers/pci/ops-lboxre2.c b/arch/sh/drivers/pci/ops-lboxre2.c index 91cabd8..6db2c20 100644 --- a/arch/sh/drivers/pci/ops-lboxre2.c +++ b/arch/sh/drivers/pci/ops-lboxre2.c @@ -21,36 +21,3 @@ int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin) { return lboxre2_irq_tab[slot]; } - -static struct resource sh7751_io_resource = { - .name = "SH7751_IO", - .start = SH7751_PCI_IO_BASE , - .end = SH7751_PCI_IO_BASE + SH7751_PCI_IO_SIZE - 1, - .flags = IORESOURCE_IO -}; - -static struct resource sh7751_mem_resource = { - .name = "SH7751_mem", - .start = SH7751_PCI_MEMORY_BASE, - .end = SH7751_PCI_MEMORY_BASE + SH7751_PCI_MEM_SIZE - 1, - .flags = IORESOURCE_MEM -}; - -extern struct pci_ops sh7751_pci_ops; - -struct pci_channel board_pci_channels[] = { - { sh7751_pci_init, &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff }, - { NULL, NULL, NULL, 0, 0 }, -}; - -static struct sh4_pci_address_map sh7751_pci_map = { - .window0 = { - .base = SH7751_CS3_BASE_ADDR, - .size = 0x04000000, - }, -}; - -int __init pcibios_init_platform(void) -{ - return sh7751_pcic_init(&board_pci_channels[0], &sh7751_pci_map); -} diff --git a/arch/sh/drivers/pci/ops-rts7751r2d.c b/arch/sh/drivers/pci/ops-rts7751r2d.c index 96b916c..d950b8a 100644 --- a/arch/sh/drivers/pci/ops-rts7751r2d.c +++ b/arch/sh/drivers/pci/ops-rts7751r2d.c @@ -29,37 +29,3 @@ int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin) { return rts7751r2d_irq_tab[slot]; } - -static struct resource sh7751_io_resource = { - .name = "SH7751_IO", - .start = 0x4000, - .end = SH7751_PCI_IO_SIZE - 1, - .flags = IORESOURCE_IO -}; - -static struct resource sh7751_mem_resource = { - .name = "SH7751_mem", - .start = SH7751_PCI_MEMORY_BASE, - .end = SH7751_PCI_MEMORY_BASE + SH7751_PCI_MEM_SIZE - 1, - .flags = IORESOURCE_MEM -}; - -extern struct pci_ops sh7751_pci_ops; - -struct pci_channel board_pci_channels[] = { - { sh7751_pci_init, &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff }, - { NULL, NULL, NULL, 0, 0 }, -}; - -static struct sh4_pci_address_map sh7751_pci_map = { - .window0 = { - .base = SH7751_CS3_BASE_ADDR, - .size = 0x04000000, - }, -}; - -int __init pcibios_init_platform(void) -{ - return sh7751_pcic_init(&board_pci_channels[0], &sh7751_pci_map); -} - diff --git a/arch/sh/drivers/pci/ops-sh03.c b/arch/sh/drivers/pci/ops-sh03.c deleted file mode 100644 index 0218135..0000000 --- a/arch/sh/drivers/pci/ops-sh03.c +++ /dev/null @@ -1,45 +0,0 @@ -/* - * linux/arch/sh/drivers/pci/ops-sh03.c - * - * PCI initialization for the Interface CTP/PCI-SH03 board - */ - -#include -#include -#include -#include -#include -#include -#include "pci-sh7751.h" - -/* - * Description: This function sets up and initializes the pcic, sets - * up the BARS, maps the DRAM into the address space etc, etc. - */ -int __init pcibios_init_platform(void) -{ - __set_io_port_base(SH7751_PCI_IO_BASE); - return 1; -} - -static struct resource sh7751_io_resource = { - .name = "SH03 IO", - .start = SH7751_PCI_IO_BASE, - .end = SH7751_PCI_IO_BASE + SH7751_PCI_IO_SIZE - 1, - .flags = IORESOURCE_IO -}; - -static struct resource sh7751_mem_resource = { - .name = "SH03 mem", - .start = SH7751_PCI_MEMORY_BASE, - .end = SH7751_PCI_MEMORY_BASE + SH7751_PCI_MEM_SIZE - 1, - .flags = IORESOURCE_MEM -}; - -extern struct pci_ops sh4_pci_ops; - -struct pci_channel board_pci_channels[] = { - { sh7751_pci_init, &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff }, - { NULL, NULL, NULL, 0, 0 }, -}; - diff --git a/arch/sh/drivers/pci/ops-snapgear.c b/arch/sh/drivers/pci/ops-snapgear.c index b64f2b9..5a39ecc 100644 --- a/arch/sh/drivers/pci/ops-snapgear.c +++ b/arch/sh/drivers/pci/ops-snapgear.c @@ -18,55 +18,6 @@ #include #include "pci-sh4.h" -#define SNAPGEAR_PCI_IO 0x4000 -#define SNAPGEAR_PCI_MEM 0xfd000000 - -/* PCI: default LOCAL memory window sizes (seen from PCI bus) */ -#define SNAPGEAR_LSR0_SIZE (64*(1<<20)) //64MB -#define SNAPGEAR_LSR1_SIZE (64*(1<<20)) //64MB - -static struct resource sh7751_io_resource = { - .name = "SH7751 IO", - .start = SNAPGEAR_PCI_IO, - .end = SNAPGEAR_PCI_IO + (64*1024) - 1, /* 64KiB I/O */ - .flags = IORESOURCE_IO, -}; - -static struct resource sh7751_mem_resource = { - .name = "SH7751 mem", - .start = SNAPGEAR_PCI_MEM, - .end = SNAPGEAR_PCI_MEM + (64*1024*1024) - 1, /* 64MiB mem */ - .flags = IORESOURCE_MEM, -}; - -struct pci_channel board_pci_channels[] = { - { sh7751_pci_init, &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff }, - { 0, } -}; - -static struct sh4_pci_address_map sh7751_pci_map = { - .window0 = { - .base = SH7751_CS2_BASE_ADDR, - .size = SNAPGEAR_LSR0_SIZE, - }, - - .window1 = { - .base = SH7751_CS2_BASE_ADDR, - .size = SNAPGEAR_LSR1_SIZE, - }, -}; - -/* - * Initialize the SnapGear PCI interface - * Setup hardware to be Central Funtion - * Copy the BSR regs to the PCI interface - * Setup PCI windows into local RAM - */ -int __init pcibios_init_platform(void) -{ - return sh7751_pcic_init(&board_pci_channels[0], &sh7751_pci_map); -} - int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin) { int irq = -1; diff --git a/arch/sh/drivers/pci/ops-titan.c b/arch/sh/drivers/pci/ops-titan.c index e45bb62..3a79fa8 100644 --- a/arch/sh/drivers/pci/ops-titan.c +++ b/arch/sh/drivers/pci/ops-titan.c @@ -36,39 +36,3 @@ int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin) return irq; } - -static struct resource sh7751_io_resource = { - .name = "SH7751_IO", - .start = SH7751_PCI_IO_BASE, - .end = SH7751_PCI_IO_BASE + SH7751_PCI_IO_SIZE - 1, - .flags = IORESOURCE_IO -}; - -static struct resource sh7751_mem_resource = { - .name = "SH7751_mem", - .start = SH7751_PCI_MEMORY_BASE, - .end = SH7751_PCI_MEMORY_BASE + SH7751_PCI_MEM_SIZE - 1, - .flags = IORESOURCE_MEM -}; - -struct pci_channel board_pci_channels[] = { - { sh7751_pci_init, &sh4_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff }, - { NULL, NULL, NULL, 0, 0 }, -}; - -static struct sh4_pci_address_map sh7751_pci_map = { - .window0 = { - .base = SH7751_CS2_BASE_ADDR, - .size = SH7751_MEM_REGION_SIZE*2, /* cs2 and cs3 */ - }, - - .window1 = { - .base = SH7751_CS2_BASE_ADDR, - .size = SH7751_MEM_REGION_SIZE*2, - }, -}; - -int __init pcibios_init_platform(void) -{ - return sh7751_pcic_init(&board_pci_channels[0], &sh7751_pci_map); -} diff --git a/arch/sh/drivers/pci/pci-sh7751.c b/arch/sh/drivers/pci/pci-sh7751.c index 4c08fd7..c4fa0bb1 100644 --- a/arch/sh/drivers/pci/pci-sh7751.c +++ b/arch/sh/drivers/pci/pci-sh7751.c @@ -1,78 +1,41 @@ /* - * Low-Level PCI Support for the SH7751 + * Low-Level PCI Support for the SH7751 * - * Dustin McIntire (dustin@sensoria.com) - * Derived from arch/i386/kernel/pci-*.c which bore the message: - * (c) 1999--2000 Martin Mares + * Copyright (C) 2003 - 2009 Paul Mundt + * Copyright (C) 2001 Dustin McIntire * - * Ported to the new API by Paul Mundt - * With cleanup by Paul van Gool - * - * May be copied or modified under the terms of the GNU General Public - * License. See linux/COPYING for more information. + * With cleanup by Paul van Gool , 2003. * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. */ -#undef DEBUG - #include #include #include #include -#include +#include #include "pci-sh4.h" #include -#include - -/* - * Initialization. Try all known PCI access methods. Note that we support - * using both PCI BIOS and direct access: in such cases, we use I/O ports - * to access config space. - * - * Note that the platform specific initialization (BSC registers, and memory - * space mapping) will be called via the platform defined function - * pcibios_init_platform(). - */ -int __init sh7751_pci_init(struct pci_channel *chan) -{ - unsigned int id; - int ret; - - pr_debug("PCI: Starting intialization.\n"); - - chan->reg_base = 0xfe200000; - - /* check for SH7751/SH7751R hardware */ - id = pci_read_reg(chan, SH7751_PCICONF0); - if (id != ((SH7751_DEVICE_ID << 16) | SH7751_VENDOR_ID) && - id != ((SH7751R_DEVICE_ID << 16) | SH7751_VENDOR_ID)) { - pr_debug("PCI: This is not an SH7751(R) (%x)\n", id); - return -ENODEV; - } - - if ((ret = sh4_pci_check_direct(chan)) != 0) - return ret; - - return pcibios_init_platform(); -} static int __init __area_sdram_check(struct pci_channel *chan, unsigned int area) { - u32 word; + unsigned long word; - word = ctrl_inl(SH7751_BCR1); + word = __raw_readl(SH7751_BCR1); /* check BCR for SDRAM in area */ if (((word >> area) & 1) == 0) { - printk("PCI: Area %d is not configured for SDRAM. BCR1=0x%x\n", + printk("PCI: Area %d is not configured for SDRAM. BCR1=0x%lx\n", area, word); return 0; } pci_write_reg(chan, word, SH4_PCIBCR1); - word = (u16)ctrl_inw(SH7751_BCR2); + word = __raw_readw(SH7751_BCR2); /* check BCR2 for 32bit SDRAM interface*/ if (((word >> (area << 1)) & 0x3) != 0x3) { - printk("PCI: Area %d is not 32 bit SDRAM. BCR2=0x%x\n", + printk("PCI: Area %d is not 32 bit SDRAM. BCR2=0x%lx\n", area, word); return 0; } @@ -81,11 +44,56 @@ static int __init __area_sdram_check(struct pci_channel *chan, return 1; } -int __init sh7751_pcic_init(struct pci_channel *chan, - struct sh4_pci_address_map *map) +static struct resource sh7751_io_resource = { + .name = "SH7751_IO", + .start = SH7751_PCI_IO_BASE, + .end = SH7751_PCI_IO_BASE + SH7751_PCI_IO_SIZE - 1, + .flags = IORESOURCE_IO +}; + +static struct resource sh7751_mem_resource = { + .name = "SH7785_mem", + .start = SH7751_PCI_MEMORY_BASE, + .end = SH7751_PCI_MEMORY_BASE + SH7751_PCI_MEM_SIZE - 1, + .flags = IORESOURCE_MEM +}; + +static struct pci_channel sh7751_pci_controller = { + .pci_ops = &sh4_pci_ops, + .mem_resource = &sh7751_mem_resource, + .mem_offset = 0x00000000, + .io_resource = &sh7751_io_resource, + .io_offset = 0x00000000, +}; + +static struct sh4_pci_address_map sh7751_pci_map = { + .window0 = { + .base = SH7751_CS3_BASE_ADDR, + .size = 0x04000000, + }, +}; + +static int __init sh7751_pci_init(void) { - u32 reg; - u32 word; + struct pci_channel *chan = &sh7751_pci_controller; + unsigned int id; + u32 word, reg; + int ret; + + printk(KERN_NOTICE "PCI: Starting intialization.\n"); + + chan->reg_base = 0xfe200000; + + /* check for SH7751/SH7751R hardware */ + id = pci_read_reg(chan, SH7751_PCICONF0); + if (id != ((SH7751_DEVICE_ID << 16) | SH7751_VENDOR_ID) && + id != ((SH7751R_DEVICE_ID << 16) | SH7751_VENDOR_ID)) { + pr_debug("PCI: This is not an SH7751(R) (%x)\n", id); + return -ENODEV; + } + + if ((ret = sh4_pci_check_direct(chan)) != 0) + return ret; /* Set the BCR's to enable PCI access */ reg = ctrl_inl(SH7751_BCR1); @@ -112,21 +120,13 @@ int __init sh7751_pcic_init(struct pci_channel *chan, /* Set IO and Mem windows to local address * Make PCI and local address the same for easy 1 to 1 mapping - * Window0 = map->window0.size @ non-cached area base = SDRAM - * Window1 = map->window1.size @ cached area base = SDRAM */ - word = map->window0.size - 1; + word = sh7751_pci_map.window0.size - 1; pci_write_reg(chan, word, SH4_PCILSR0); - word = map->window1.size - 1; - pci_write_reg(chan, word, SH4_PCILSR1); /* Set the values on window 0 PCI config registers */ - word = P2SEGADDR(map->window0.base); + word = P2SEGADDR(sh7751_pci_map.window0.base); pci_write_reg(chan, word, SH4_PCILAR0); pci_write_reg(chan, word, SH7751_PCICONF5); - /* Set the values on window 1 PCI config registers */ - word = PHYSADDR(map->window1.base); - pci_write_reg(chan, word, SH4_PCILAR1); - pci_write_reg(chan, word, SH7751_PCICONF6); /* Set the local 16MB PCI memory space window to * the lowest PCI mapped address @@ -144,7 +144,7 @@ int __init sh7751_pcic_init(struct pci_channel *chan, /* Set PCI WCRx, BCRx's, copy from BSC locations */ /* check BCR for SDRAM in specified area */ - switch (map->window0.base) { + switch (sh7751_pci_map.window0.base) { case SH7751_CS0_BASE_ADDR: word = __area_sdram_check(chan, 0); break; case SH7751_CS1_BASE_ADDR: word = __area_sdram_check(chan, 1); break; case SH7751_CS2_BASE_ADDR: word = __area_sdram_check(chan, 2); break; @@ -179,5 +179,10 @@ int __init sh7751_pcic_init(struct pci_channel *chan, word = SH4_PCICR_PREFIX | SH4_PCICR_CFIN | SH4_PCICR_ARBM; pci_write_reg(chan, word, SH4_PCICR); + __set_io_port_base(SH7751_PCI_IO_BASE); + + register_pci_controller(chan); + return 0; } +arch_initcall(sh7751_pci_init); diff --git a/arch/sh/drivers/pci/pci-sh7751.h b/arch/sh/drivers/pci/pci-sh7751.h index c390dd2..4983a4d 100644 --- a/arch/sh/drivers/pci/pci-sh7751.h +++ b/arch/sh/drivers/pci/pci-sh7751.h @@ -57,7 +57,7 @@ #define SH7751_PCICONF2_SCC 0x00FF0000 /* Sub-Class Code */ #define SH7751_PCICONF2_RLPI 0x0000FF00 /* Programming Interface */ #define SH7751_PCICONF2_REV 0x000000FF /* Revision ID */ -#define SH7751_PCICONF3 0xC /* PCI Config Reg 3 */ +#define SH7751_PCICONF3 0xC /* PCI Config Reg 3 */ #define SH7751_PCICONF3_BIST7 0x80000000 /* Bist Supported */ #define SH7751_PCICONF3_BIST6 0x40000000 /* Bist Executing */ #define SH7751_PCICONF3_BIST3_0 0x0F000000 /* Bist Passed */ @@ -72,12 +72,12 @@ #define SH7751_PCICONF5_BASE 0xFFFFFFF0 /* Mem Space Base Addr */ #define SH7751_PCICONF5_LAP 0x00000008 /* Prefetch Enabled */ #define SH7751_PCICONF5_LAT 0x00000006 /* Local Memory type */ - #define SH7751_PCICONF5_ASI 0x00000001 /* Address Space Type */ + #define SH7751_PCICONF5_ASI 0x00000001 /* Address Space Type */ #define SH7751_PCICONF6 0x18 /* PCI Config Reg 6 */ #define SH7751_PCICONF6_BASE 0xFFFFFFF0 /* Mem Space Base Addr */ #define SH7751_PCICONF6_LAP 0x00000008 /* Prefetch Enabled */ #define SH7751_PCICONF6_LAT 0x00000006 /* Local Memory type */ - #define SH7751_PCICONF6_ASI 0x00000001 /* Address Space Type */ + #define SH7751_PCICONF6_ASI 0x00000001 /* Address Space Type */ /* PCICONF7 - PCICONF10 are undefined */ #define SH7751_PCICONF11 0x2C /* PCI Config Reg 11 */ #define SH7751_PCICONF11_SSID 0xFFFF0000 /* Subsystem ID */ @@ -126,11 +126,4 @@ #define SH7751_CS5_BASE_ADDR (SH7751_CS4_BASE_ADDR + SH7751_MEM_REGION_SIZE) #define SH7751_CS6_BASE_ADDR (SH7751_CS5_BASE_ADDR + SH7751_MEM_REGION_SIZE) -struct sh4_pci_address_map; - -/* arch/sh/drivers/pci/pci-sh7751.c */ -int sh7751_pci_init(struct pci_channel *chan); -int sh7751_pcic_init(struct pci_channel *chan, - struct sh4_pci_address_map *map); - #endif /* _PCI_SH7751_H_ */ -- cgit v0.10.2 From 84972ec0c206b73ffb74e5114b574186ce6166b8 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 20 Apr 2009 21:17:10 +0900 Subject: sh: pci: Rename SH7751 platform ops files to fixups. None of these contain pci_ops, only IRQ routing bits, rename them accordingly. Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/Makefile b/arch/sh/drivers/pci/Makefile index b8667de..abd931c 100644 --- a/arch/sh/drivers/pci/Makefile +++ b/arch/sh/drivers/pci/Makefile @@ -14,14 +14,14 @@ obj-$(CONFIG_CPU_SH5) += pci-sh5.o ops-sh5.o obj-$(CONFIG_SH_DREAMCAST) += ops-dreamcast.o fixups-dreamcast.o \ pci-dreamcast.o -obj-$(CONFIG_SH_SECUREEDGE5410) += ops-snapgear.o +obj-$(CONFIG_SH_SECUREEDGE5410) += fixups-snapgear.o obj-$(CONFIG_SH_RTS7751R2D) += ops-rts7751r2d.o fixups-rts7751r2d.o obj-$(CONFIG_SH_SH03) += fixups-sh03.o obj-$(CONFIG_SH_HIGHLANDER) += fixups-r7780rp.o obj-$(CONFIG_SH_SH7785LCR) += fixups-r7780rp.o obj-$(CONFIG_SH_SDK7780) += fixups-sdk7780.o obj-$(CONFIG_SH_7780_SOLUTION_ENGINE) += fixups-sdk7780.o -obj-$(CONFIG_SH_TITAN) += ops-titan.o -obj-$(CONFIG_SH_LANDISK) += ops-landisk.o -obj-$(CONFIG_SH_LBOX_RE2) += ops-lboxre2.o fixups-rts7751r2d.o +obj-$(CONFIG_SH_TITAN) += fixups-titan.o +obj-$(CONFIG_SH_LANDISK) += fixups-landisk.o +obj-$(CONFIG_SH_LBOX_RE2) += fixups-lboxre2.o fixups-rts7751r2d.o obj-$(CONFIG_SH_CAYMAN) += fixups-cayman.o diff --git a/arch/sh/drivers/pci/fixups-landisk.c b/arch/sh/drivers/pci/fixups-landisk.c new file mode 100644 index 0000000..bb1a6bb --- /dev/null +++ b/arch/sh/drivers/pci/fixups-landisk.c @@ -0,0 +1,34 @@ +/* + * arch/sh/drivers/pci/ops-landisk.c + * + * PCI initialization for the I-O DATA Device, Inc. LANDISK board + * + * Copyright (C) 2006 kogiidena + * + * May be copied or modified under the terms of the GNU General Public + * License. See linux/COPYING for more information. + */ +#include +#include +#include +#include +#include +#include "pci-sh4.h" + +int pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin) +{ + /* + * slot0: pin1-4 = irq5,6,7,8 + * slot1: pin1-4 = irq6,7,8,5 + * slot2: pin1-4 = irq7,8,5,6 + * slot3: pin1-4 = irq8,5,6,7 + */ + int irq = ((slot + pin - 1) & 0x3) + 5; + + if ((slot | (pin - 1)) > 0x3) { + printk("PCI: Bad IRQ mapping request for slot %d pin %c\n", + slot, pin - 1 + 'A'); + return -1; + } + return irq; +} diff --git a/arch/sh/drivers/pci/fixups-lboxre2.c b/arch/sh/drivers/pci/fixups-lboxre2.c new file mode 100644 index 0000000..6db2c20 --- /dev/null +++ b/arch/sh/drivers/pci/fixups-lboxre2.c @@ -0,0 +1,23 @@ +/* + * linux/arch/sh/drivers/pci/ops-lboxre2.c + * + * Copyright (C) 2007 Nobuhiro Iwamatsu + * + * PCI initialization for the NTT COMWARE L-BOX RE2 + */ +#include +#include +#include +#include +#include +#include +#include "pci-sh4.h" + +static char lboxre2_irq_tab[] __initdata = { + IRQ_ETH0, IRQ_ETH1, IRQ_INTA, IRQ_INTD, +}; + +int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin) +{ + return lboxre2_irq_tab[slot]; +} diff --git a/arch/sh/drivers/pci/fixups-snapgear.c b/arch/sh/drivers/pci/fixups-snapgear.c new file mode 100644 index 0000000..5a39ecc --- /dev/null +++ b/arch/sh/drivers/pci/fixups-snapgear.c @@ -0,0 +1,38 @@ +/* + * arch/sh/drivers/pci/ops-snapgear.c + * + * Author: David McCullough + * + * Ported to new API by Paul Mundt + * + * Highly leveraged from pci-bigsur.c, written by Dustin McIntire. + * + * May be copied or modified under the terms of the GNU General Public + * License. See linux/COPYING for more information. + * + * PCI initialization for the SnapGear boards + */ +#include +#include +#include +#include +#include "pci-sh4.h" + +int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin) +{ + int irq = -1; + + switch (slot) { + case 8: /* the PCI bridge */ break; + case 11: irq = 8; break; /* USB */ + case 12: irq = 11; break; /* PCMCIA */ + case 13: irq = 5; break; /* eth0 */ + case 14: irq = 8; break; /* eth1 */ + case 15: irq = 11; break; /* safenet (unused) */ + } + + printk("PCI: Mapping SnapGear IRQ for slot %d, pin %c to irq %d\n", + slot, pin - 1 + 'A', irq); + + return irq; +} diff --git a/arch/sh/drivers/pci/fixups-titan.c b/arch/sh/drivers/pci/fixups-titan.c new file mode 100644 index 0000000..3a79fa8 --- /dev/null +++ b/arch/sh/drivers/pci/fixups-titan.c @@ -0,0 +1,38 @@ +/* + * arch/sh/drivers/pci/ops-titan.c + * + * Ported to new API by Paul Mundt + * + * Modified from ops-snapgear.c written by David McCullough + * Highly leveraged from pci-bigsur.c, written by Dustin McIntire. + * + * May be copied or modified under the terms of the GNU General Public + * License. See linux/COPYING for more information. + * + * PCI initialization for the Titan boards + */ +#include +#include +#include +#include +#include +#include +#include "pci-sh4.h" + +static char titan_irq_tab[] __initdata = { + TITAN_IRQ_WAN, + TITAN_IRQ_LAN, + TITAN_IRQ_MPCIA, + TITAN_IRQ_MPCIB, + TITAN_IRQ_USB, +}; + +int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin) +{ + int irq = titan_irq_tab[slot]; + + printk("PCI: Mapping TITAN IRQ for slot %d, pin %c to irq %d\n", + slot, pin - 1 + 'A', irq); + + return irq; +} diff --git a/arch/sh/drivers/pci/ops-landisk.c b/arch/sh/drivers/pci/ops-landisk.c deleted file mode 100644 index bb1a6bb..0000000 --- a/arch/sh/drivers/pci/ops-landisk.c +++ /dev/null @@ -1,34 +0,0 @@ -/* - * arch/sh/drivers/pci/ops-landisk.c - * - * PCI initialization for the I-O DATA Device, Inc. LANDISK board - * - * Copyright (C) 2006 kogiidena - * - * May be copied or modified under the terms of the GNU General Public - * License. See linux/COPYING for more information. - */ -#include -#include -#include -#include -#include -#include "pci-sh4.h" - -int pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin) -{ - /* - * slot0: pin1-4 = irq5,6,7,8 - * slot1: pin1-4 = irq6,7,8,5 - * slot2: pin1-4 = irq7,8,5,6 - * slot3: pin1-4 = irq8,5,6,7 - */ - int irq = ((slot + pin - 1) & 0x3) + 5; - - if ((slot | (pin - 1)) > 0x3) { - printk("PCI: Bad IRQ mapping request for slot %d pin %c\n", - slot, pin - 1 + 'A'); - return -1; - } - return irq; -} diff --git a/arch/sh/drivers/pci/ops-lboxre2.c b/arch/sh/drivers/pci/ops-lboxre2.c deleted file mode 100644 index 6db2c20..0000000 --- a/arch/sh/drivers/pci/ops-lboxre2.c +++ /dev/null @@ -1,23 +0,0 @@ -/* - * linux/arch/sh/drivers/pci/ops-lboxre2.c - * - * Copyright (C) 2007 Nobuhiro Iwamatsu - * - * PCI initialization for the NTT COMWARE L-BOX RE2 - */ -#include -#include -#include -#include -#include -#include -#include "pci-sh4.h" - -static char lboxre2_irq_tab[] __initdata = { - IRQ_ETH0, IRQ_ETH1, IRQ_INTA, IRQ_INTD, -}; - -int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin) -{ - return lboxre2_irq_tab[slot]; -} diff --git a/arch/sh/drivers/pci/ops-snapgear.c b/arch/sh/drivers/pci/ops-snapgear.c deleted file mode 100644 index 5a39ecc..0000000 --- a/arch/sh/drivers/pci/ops-snapgear.c +++ /dev/null @@ -1,38 +0,0 @@ -/* - * arch/sh/drivers/pci/ops-snapgear.c - * - * Author: David McCullough - * - * Ported to new API by Paul Mundt - * - * Highly leveraged from pci-bigsur.c, written by Dustin McIntire. - * - * May be copied or modified under the terms of the GNU General Public - * License. See linux/COPYING for more information. - * - * PCI initialization for the SnapGear boards - */ -#include -#include -#include -#include -#include "pci-sh4.h" - -int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin) -{ - int irq = -1; - - switch (slot) { - case 8: /* the PCI bridge */ break; - case 11: irq = 8; break; /* USB */ - case 12: irq = 11; break; /* PCMCIA */ - case 13: irq = 5; break; /* eth0 */ - case 14: irq = 8; break; /* eth1 */ - case 15: irq = 11; break; /* safenet (unused) */ - } - - printk("PCI: Mapping SnapGear IRQ for slot %d, pin %c to irq %d\n", - slot, pin - 1 + 'A', irq); - - return irq; -} diff --git a/arch/sh/drivers/pci/ops-titan.c b/arch/sh/drivers/pci/ops-titan.c deleted file mode 100644 index 3a79fa8..0000000 --- a/arch/sh/drivers/pci/ops-titan.c +++ /dev/null @@ -1,38 +0,0 @@ -/* - * arch/sh/drivers/pci/ops-titan.c - * - * Ported to new API by Paul Mundt - * - * Modified from ops-snapgear.c written by David McCullough - * Highly leveraged from pci-bigsur.c, written by Dustin McIntire. - * - * May be copied or modified under the terms of the GNU General Public - * License. See linux/COPYING for more information. - * - * PCI initialization for the Titan boards - */ -#include -#include -#include -#include -#include -#include -#include "pci-sh4.h" - -static char titan_irq_tab[] __initdata = { - TITAN_IRQ_WAN, - TITAN_IRQ_LAN, - TITAN_IRQ_MPCIA, - TITAN_IRQ_MPCIB, - TITAN_IRQ_USB, -}; - -int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin) -{ - int irq = titan_irq_tab[slot]; - - printk("PCI: Mapping TITAN IRQ for slot %d, pin %c to irq %d\n", - slot, pin - 1 + 'A', irq); - - return irq; -} -- cgit v0.10.2 From 37c8ac361efdbae969eff1a603bc39412d3e873f Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 20 Apr 2009 21:27:15 +0900 Subject: sh: pci: Consolidate lboxre2 and r2d IRQ fixups. Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/Makefile b/arch/sh/drivers/pci/Makefile index abd931c..fbebd73 100644 --- a/arch/sh/drivers/pci/Makefile +++ b/arch/sh/drivers/pci/Makefile @@ -15,7 +15,7 @@ obj-$(CONFIG_CPU_SH5) += pci-sh5.o ops-sh5.o obj-$(CONFIG_SH_DREAMCAST) += ops-dreamcast.o fixups-dreamcast.o \ pci-dreamcast.o obj-$(CONFIG_SH_SECUREEDGE5410) += fixups-snapgear.o -obj-$(CONFIG_SH_RTS7751R2D) += ops-rts7751r2d.o fixups-rts7751r2d.o +obj-$(CONFIG_SH_RTS7751R2D) += fixups-rts7751r2d.o obj-$(CONFIG_SH_SH03) += fixups-sh03.o obj-$(CONFIG_SH_HIGHLANDER) += fixups-r7780rp.o obj-$(CONFIG_SH_SH7785LCR) += fixups-r7780rp.o @@ -23,5 +23,5 @@ obj-$(CONFIG_SH_SDK7780) += fixups-sdk7780.o obj-$(CONFIG_SH_7780_SOLUTION_ENGINE) += fixups-sdk7780.o obj-$(CONFIG_SH_TITAN) += fixups-titan.o obj-$(CONFIG_SH_LANDISK) += fixups-landisk.o -obj-$(CONFIG_SH_LBOX_RE2) += fixups-lboxre2.o fixups-rts7751r2d.o +obj-$(CONFIG_SH_LBOX_RE2) += fixups-rts7751r2d.o obj-$(CONFIG_SH_CAYMAN) += fixups-cayman.o diff --git a/arch/sh/drivers/pci/fixups-lboxre2.c b/arch/sh/drivers/pci/fixups-lboxre2.c deleted file mode 100644 index 6db2c20..0000000 --- a/arch/sh/drivers/pci/fixups-lboxre2.c +++ /dev/null @@ -1,23 +0,0 @@ -/* - * linux/arch/sh/drivers/pci/ops-lboxre2.c - * - * Copyright (C) 2007 Nobuhiro Iwamatsu - * - * PCI initialization for the NTT COMWARE L-BOX RE2 - */ -#include -#include -#include -#include -#include -#include -#include "pci-sh4.h" - -static char lboxre2_irq_tab[] __initdata = { - IRQ_ETH0, IRQ_ETH1, IRQ_INTA, IRQ_INTD, -}; - -int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin) -{ - return lboxre2_irq_tab[slot]; -} diff --git a/arch/sh/drivers/pci/fixups-rts7751r2d.c b/arch/sh/drivers/pci/fixups-rts7751r2d.c index 3885233..052b354 100644 --- a/arch/sh/drivers/pci/fixups-rts7751r2d.c +++ b/arch/sh/drivers/pci/fixups-rts7751r2d.c @@ -1,21 +1,44 @@ /* * arch/sh/drivers/pci/fixups-rts7751r2d.c * - * RTS7751R2D PCI fixups + * RTS7751R2D / LBOXRE2 PCI fixups * * Copyright (C) 2003 Lineo uSolutions, Inc. * Copyright (C) 2004 Paul Mundt + * Copyright (C) 2007 Nobuhiro Iwamatsu * * This file is subject to the terms and conditions of the GNU General Public * License. See the file "COPYING" in the main directory of this archive * for more details. */ #include +#include +#include #include "pci-sh4.h" +#include #define PCIMCR_MRSET_OFF 0xBFFFFFFF #define PCIMCR_RFSH_OFF 0xFFFFFFFB +static u8 rts7751r2d_irq_tab[] __initdata = { + IRQ_PCI_INTA, + IRQ_PCI_INTB, + IRQ_PCI_INTC, + IRQ_PCI_INTD, +}; + +static char lboxre2_irq_tab[] __initdata = { + IRQ_ETH0, IRQ_ETH1, IRQ_INTA, IRQ_INTD, +}; + +int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin) +{ + if (mach_is_lboxre2()) + return lboxre2_irq_tab[slot]; + else + return rts7751r2d_irq_tab[slot]; +} + int pci_fixup_pcic(struct pci_channel *chan) { unsigned long bcr1, mcr; diff --git a/arch/sh/drivers/pci/ops-rts7751r2d.c b/arch/sh/drivers/pci/ops-rts7751r2d.c deleted file mode 100644 index d950b8a..0000000 --- a/arch/sh/drivers/pci/ops-rts7751r2d.c +++ /dev/null @@ -1,31 +0,0 @@ -/* - * linux/arch/sh/drivers/pci/ops-rts7751r2d.c - * - * Author: Ian DaSilva (idasilva@mvista.com) - * - * Highly leveraged from pci-bigsur.c, written by Dustin McIntire. - * - * May be copied or modified under the terms of the GNU General Public - * License. See linux/COPYING for more information. - * - * PCI initialization for the Renesas SH7751R RTS7751R2D board - */ -#include -#include -#include -#include -#include -#include -#include "pci-sh4.h" - -static u8 rts7751r2d_irq_tab[] __initdata = { - IRQ_PCI_INTA, - IRQ_PCI_INTB, - IRQ_PCI_INTC, - IRQ_PCI_INTD, -}; - -int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin) -{ - return rts7751r2d_irq_tab[slot]; -} -- cgit v0.10.2 From 951a681bda844491de8699b3bdc6c3899cbd4c9f Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 20 Apr 2009 21:34:36 +0900 Subject: sh: pci: Convert dreamcast to new-style interface. Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/Kconfig b/arch/sh/drivers/pci/Kconfig index 5aaee3c..1d53496 100644 --- a/arch/sh/drivers/pci/Kconfig +++ b/arch/sh/drivers/pci/Kconfig @@ -20,11 +20,8 @@ config SH_PCIDMA_NONCOHERENT # Temporary config option for transitioning off of PCI_AUTO config PCI_NEW - bool + def_bool y depends on PCI - default y if CPU_SUBTYPE_SH7763 || CPU_SUBTYPE_SH7780 || \ - CPU_SUBTYPE_SH7785 || CPU_SH5 || \ - CPU_SUBTYPE_SH7751 || CPU_SUBTYPE_SH7751R # This is also board-specific config PCI_AUTO diff --git a/arch/sh/drivers/pci/fixups-dreamcast.c b/arch/sh/drivers/pci/fixups-dreamcast.c index 48c6381..ed7f489 100644 --- a/arch/sh/drivers/pci/fixups-dreamcast.c +++ b/arch/sh/drivers/pci/fixups-dreamcast.c @@ -30,7 +30,7 @@ static void __init gapspci_fixup_resources(struct pci_dev *dev) { - struct pci_channel *p = board_pci_channels; + struct pci_channel *p = dev->sysdata; printk(KERN_NOTICE "PCI: Fixing up device %s\n", pci_name(dev)); diff --git a/arch/sh/drivers/pci/pci-dreamcast.c b/arch/sh/drivers/pci/pci-dreamcast.c index 0897be5..210f9d4 100644 --- a/arch/sh/drivers/pci/pci-dreamcast.c +++ b/arch/sh/drivers/pci/pci-dreamcast.c @@ -21,7 +21,6 @@ #include #include #include - #include #include #include @@ -40,11 +39,19 @@ static struct resource gapspci_mem_resource = { .flags = IORESOURCE_MEM, }; +static struct pci_channel dreamcast_pci_controller = { + .pci_ops = &gapspci_pci_ops, + .io_resource = &gapspci_io_resource, + .io_offset = 0x00000000, + .mem_resource = &gapspci_mem_resource, + .mem_offset = 0x00000000, +}; + /* * gapspci init */ -static int __init gapspci_init(struct pci_channel *chan) +static int __init gapspci_init(void) { char idbuf[16]; int i; @@ -88,18 +95,8 @@ static int __init gapspci_init(struct pci_channel *chan) outl(0x00002001, GAPSPCI_BBA_CONFIG+0x10); outl(0x01000000, GAPSPCI_BBA_CONFIG+0x14); + register_pci_controller(&dreamcast_pci_controller); + return 0; } - -struct pci_channel board_pci_channels[] = { - { - .init = gapspci_init, - .pci_ops = &gapspci_pci_ops, - .io_resource = &gapspci_io_resource, - .mem_resource = &gapspci_mem_resource, - .first_devfn = 0, - .last_devfn = 1, - }, { - .init = NULL, - } -}; +arch_initcall(gapspci_init); -- cgit v0.10.2 From 2d5efc190eb415dbff79ffab4f8ea731ab0288a9 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 20 Apr 2009 21:42:59 +0900 Subject: sh: pci: Move the se7751 fixups in to arch/sh/drivers/pci/. The se7751 was still doing the PCI fixups in its own board directory, so we move it over to arch/sh/drivers/pci/ with the rest of the board fixups. It has bitrotted significantly over the years, so will still likely need a bit of work to bring back up to date. Signed-off-by: Paul Mundt diff --git a/arch/sh/boards/mach-se/7751/Makefile b/arch/sh/boards/mach-se/7751/Makefile index dbc29f3..e6f4341 100644 --- a/arch/sh/boards/mach-se/7751/Makefile +++ b/arch/sh/boards/mach-se/7751/Makefile @@ -3,5 +3,3 @@ # obj-y := setup.o io.o irq.o - -obj-$(CONFIG_PCI) += pci.o diff --git a/arch/sh/boards/mach-se/7751/pci.c b/arch/sh/boards/mach-se/7751/pci.c deleted file mode 100644 index 9ec64a4..0000000 --- a/arch/sh/boards/mach-se/7751/pci.c +++ /dev/null @@ -1,150 +0,0 @@ -/* - * linux/arch/sh/boards/se/7751/pci.c - * - * Author: Ian DaSilva (idasilva@mvista.com) - * - * Highly leveraged from pci-bigsur.c, written by Dustin McIntire. - * - * May be copied or modified under the terms of the GNU General Public - * License. See linux/COPYING for more information. - * - * PCI initialization for the Hitachi SH7751 Solution Engine board (MS7751SE01) - */ - -#include -#include -#include -#include -#include - -#include -#include "../../../drivers/pci/pci-sh7751.h" - -#define PCIMCR_MRSET_OFF 0xBFFFFFFF -#define PCIMCR_RFSH_OFF 0xFFFFFFFB - -/* - * Only long word accesses of the PCIC's internal local registers and the - * configuration registers from the CPU is supported. - */ -#define PCIC_WRITE(x,v) writel((v), PCI_REG(x)) -#define PCIC_READ(x) readl(PCI_REG(x)) - -#define xPCIBIOS_MIN_IO board_pci_channels->io_resource->start -#define xPCIBIOS_MIN_MEM board_pci_channels->mem_resource->start - -/* - * Description: This function sets up and initializes the pcic, sets - * up the BARS, maps the DRAM into the address space etc, etc. - */ -int __init pcibios_init_platform(void) -{ - unsigned long bcr1, wcr1, wcr2, wcr3, mcr; - unsigned short bcr2; - - /* - * Initialize the slave bus controller on the pcic. The values used - * here should not be hardcoded, but they should be taken from the bsc - * on the processor, to make this function as generic as possible. - * (i.e. Another sbc may usr different SDRAM timing settings -- in order - * for the pcic to work, its settings need to be exactly the same.) - */ - bcr1 = (*(volatile unsigned long*)(SH7751_BCR1)); - bcr2 = (*(volatile unsigned short*)(SH7751_BCR2)); - wcr1 = (*(volatile unsigned long*)(SH7751_WCR1)); - wcr2 = (*(volatile unsigned long*)(SH7751_WCR2)); - wcr3 = (*(volatile unsigned long*)(SH7751_WCR3)); - mcr = (*(volatile unsigned long*)(SH7751_MCR)); - - bcr1 = bcr1 | 0x00080000; /* Enable Bit 19, BREQEN */ - (*(volatile unsigned long*)(SH7751_BCR1)) = bcr1; - - bcr1 = bcr1 | 0x40080000; /* Enable Bit 19 BREQEN, set PCIC to slave */ - PCIC_WRITE(SH7751_PCIBCR1, bcr1); /* PCIC BCR1 */ - PCIC_WRITE(SH7751_PCIBCR2, bcr2); /* PCIC BCR2 */ - PCIC_WRITE(SH7751_PCIWCR1, wcr1); /* PCIC WCR1 */ - PCIC_WRITE(SH7751_PCIWCR2, wcr2); /* PCIC WCR2 */ - PCIC_WRITE(SH7751_PCIWCR3, wcr3); /* PCIC WCR3 */ - mcr = (mcr & PCIMCR_MRSET_OFF) & PCIMCR_RFSH_OFF; - PCIC_WRITE(SH7751_PCIMCR, mcr); /* PCIC MCR */ - - - /* Enable all interrupts, so we know what to fix */ - PCIC_WRITE(SH7751_PCIINTM, 0x0000c3ff); - PCIC_WRITE(SH7751_PCIAINTM, 0x0000380f); - - /* Set up standard PCI config registers */ - PCIC_WRITE(SH7751_PCICONF1, 0xF39000C7); /* Bus Master, Mem & I/O access */ - PCIC_WRITE(SH7751_PCICONF2, 0x00000000); /* PCI Class code & Revision ID */ - PCIC_WRITE(SH7751_PCICONF4, 0xab000001); /* PCI I/O address (local regs) */ - PCIC_WRITE(SH7751_PCICONF5, 0x0c000000); /* PCI MEM address (local RAM) */ - PCIC_WRITE(SH7751_PCICONF6, 0xd0000000); /* PCI MEM address (unused) */ - PCIC_WRITE(SH7751_PCICONF11, 0x35051054); /* PCI Subsystem ID & Vendor ID */ - PCIC_WRITE(SH7751_PCILSR0, 0x03f00000); /* MEM (full 64M exposed) */ - PCIC_WRITE(SH7751_PCILSR1, 0x00000000); /* MEM (unused) */ - PCIC_WRITE(SH7751_PCILAR0, 0x0c000000); /* MEM (direct map from PCI) */ - PCIC_WRITE(SH7751_PCILAR1, 0x00000000); /* MEM (unused) */ - - /* Now turn it on... */ - PCIC_WRITE(SH7751_PCICR, 0xa5000001); - - /* - * Set PCIMBR and PCIIOBR here, assuming a single window - * (16M MEM, 256K IO) is enough. If a larger space is - * needed, the readx/writex and inx/outx functions will - * have to do more (e.g. setting registers for each call). - */ - - /* - * Set the MBR so PCI address is one-to-one with window, - * meaning all calls go straight through... use BUG_ON to - * catch erroneous assumption. - */ - BUG_ON(xPCIBIOS_MIN_MEM != SH7751_PCI_MEMORY_BASE); - - PCIC_WRITE(SH7751_PCIMBR, xPCIBIOS_MIN_MEM); - - /* Set IOBR for window containing area specified in pci.h */ - PCIC_WRITE(SH7751_PCIIOBR, (xPCIBIOS_MIN_IO & SH7751_PCIIOBR_MASK)); - - /* All done, may as well say so... */ - printk("SH7751 PCI: Finished initialization of the PCI controller\n"); - - return 1; -} - -int __init pcibios_map_platform_irq(u8 slot, u8 pin) -{ - switch (slot) { - case 0: return 13; - case 1: return 13; /* AMD Ethernet controller */ - case 2: return -1; - case 3: return -1; - case 4: return -1; - default: - printk("PCI: Bad IRQ mapping request for slot %d\n", slot); - return -1; - } -} - -static struct resource sh7751_io_resource = { - .name = "SH7751 IO", - .start = SH7751_PCI_IO_BASE, - .end = SH7751_PCI_IO_BASE + SH7751_PCI_IO_SIZE - 1, - .flags = IORESOURCE_IO -}; - -static struct resource sh7751_mem_resource = { - .name = "SH7751 mem", - .start = SH7751_PCI_MEMORY_BASE, - .end = SH7751_PCI_MEMORY_BASE + SH7751_PCI_MEM_SIZE - 1, - .flags = IORESOURCE_MEM -}; - -extern struct pci_ops sh7751_pci_ops; - -struct pci_channel board_pci_channels[] = { - { &sh7751_pci_ops, &sh7751_io_resource, &sh7751_mem_resource, 0, 0xff }, - { NULL, NULL, NULL, 0, 0 }, -}; - diff --git a/arch/sh/drivers/pci/Makefile b/arch/sh/drivers/pci/Makefile index fbebd73..e388a70 100644 --- a/arch/sh/drivers/pci/Makefile +++ b/arch/sh/drivers/pci/Makefile @@ -15,6 +15,7 @@ obj-$(CONFIG_CPU_SH5) += pci-sh5.o ops-sh5.o obj-$(CONFIG_SH_DREAMCAST) += ops-dreamcast.o fixups-dreamcast.o \ pci-dreamcast.o obj-$(CONFIG_SH_SECUREEDGE5410) += fixups-snapgear.o +obj-$(CONFIG_SH_7751_SOLUTION_ENGINE) += fixups-se7751.o obj-$(CONFIG_SH_RTS7751R2D) += fixups-rts7751r2d.o obj-$(CONFIG_SH_SH03) += fixups-sh03.o obj-$(CONFIG_SH_HIGHLANDER) += fixups-r7780rp.o diff --git a/arch/sh/drivers/pci/fixups-se7751.c b/arch/sh/drivers/pci/fixups-se7751.c new file mode 100644 index 0000000..475fa9f --- /dev/null +++ b/arch/sh/drivers/pci/fixups-se7751.c @@ -0,0 +1,111 @@ +#include +#include +#include +#include +#include +#include +#include "pci-sh4.h" + +int __init pcibios_map_platform_irq(u8 slot, u8 pin) +{ + switch (slot) { + case 0: return 13; + case 1: return 13; /* AMD Ethernet controller */ + case 2: return -1; + case 3: return -1; + case 4: return -1; + default: + printk("PCI: Bad IRQ mapping request for slot %d\n", slot); + return -1; + } +} + +#define PCIMCR_MRSET_OFF 0xBFFFFFFF +#define PCIMCR_RFSH_OFF 0xFFFFFFFB + +/* + * Only long word accesses of the PCIC's internal local registers and the + * configuration registers from the CPU is supported. + */ +#define PCIC_WRITE(x,v) writel((v), PCI_REG(x)) +#define PCIC_READ(x) readl(PCI_REG(x)) + +/* + * Description: This function sets up and initializes the pcic, sets + * up the BARS, maps the DRAM into the address space etc, etc. + */ +int pci_fixup_pcic(struct pci_channel *chan) +{ + unsigned long bcr1, wcr1, wcr2, wcr3, mcr; + unsigned short bcr2; + + /* + * Initialize the slave bus controller on the pcic. The values used + * here should not be hardcoded, but they should be taken from the bsc + * on the processor, to make this function as generic as possible. + * (i.e. Another sbc may usr different SDRAM timing settings -- in order + * for the pcic to work, its settings need to be exactly the same.) + */ + bcr1 = (*(volatile unsigned long*)(SH7751_BCR1)); + bcr2 = (*(volatile unsigned short*)(SH7751_BCR2)); + wcr1 = (*(volatile unsigned long*)(SH7751_WCR1)); + wcr2 = (*(volatile unsigned long*)(SH7751_WCR2)); + wcr3 = (*(volatile unsigned long*)(SH7751_WCR3)); + mcr = (*(volatile unsigned long*)(SH7751_MCR)); + + bcr1 = bcr1 | 0x00080000; /* Enable Bit 19, BREQEN */ + (*(volatile unsigned long*)(SH7751_BCR1)) = bcr1; + + bcr1 = bcr1 | 0x40080000; /* Enable Bit 19 BREQEN, set PCIC to slave */ + PCIC_WRITE(SH7751_PCIBCR1, bcr1); /* PCIC BCR1 */ + PCIC_WRITE(SH7751_PCIBCR2, bcr2); /* PCIC BCR2 */ + PCIC_WRITE(SH7751_PCIWCR1, wcr1); /* PCIC WCR1 */ + PCIC_WRITE(SH7751_PCIWCR2, wcr2); /* PCIC WCR2 */ + PCIC_WRITE(SH7751_PCIWCR3, wcr3); /* PCIC WCR3 */ + mcr = (mcr & PCIMCR_MRSET_OFF) & PCIMCR_RFSH_OFF; + PCIC_WRITE(SH7751_PCIMCR, mcr); /* PCIC MCR */ + + + /* Enable all interrupts, so we know what to fix */ + PCIC_WRITE(SH7751_PCIINTM, 0x0000c3ff); + PCIC_WRITE(SH7751_PCIAINTM, 0x0000380f); + + /* Set up standard PCI config registers */ + PCIC_WRITE(SH7751_PCICONF1, 0xF39000C7); /* Bus Master, Mem & I/O access */ + PCIC_WRITE(SH7751_PCICONF2, 0x00000000); /* PCI Class code & Revision ID */ + PCIC_WRITE(SH7751_PCICONF4, 0xab000001); /* PCI I/O address (local regs) */ + PCIC_WRITE(SH7751_PCICONF5, 0x0c000000); /* PCI MEM address (local RAM) */ + PCIC_WRITE(SH7751_PCICONF6, 0xd0000000); /* PCI MEM address (unused) */ + PCIC_WRITE(SH7751_PCICONF11, 0x35051054); /* PCI Subsystem ID & Vendor ID */ + PCIC_WRITE(SH7751_PCILSR0, 0x03f00000); /* MEM (full 64M exposed) */ + PCIC_WRITE(SH7751_PCILSR1, 0x00000000); /* MEM (unused) */ + PCIC_WRITE(SH7751_PCILAR0, 0x0c000000); /* MEM (direct map from PCI) */ + PCIC_WRITE(SH7751_PCILAR1, 0x00000000); /* MEM (unused) */ + + /* Now turn it on... */ + PCIC_WRITE(SH7751_PCICR, 0xa5000001); + + /* + * Set PCIMBR and PCIIOBR here, assuming a single window + * (16M MEM, 256K IO) is enough. If a larger space is + * needed, the readx/writex and inx/outx functions will + * have to do more (e.g. setting registers for each call). + */ + + /* + * Set the MBR so PCI address is one-to-one with window, + * meaning all calls go straight through... use BUG_ON to + * catch erroneous assumption. + */ + BUG_ON(chan->mem_resource->start != SH7751_PCI_MEMORY_BASE); + + PCIC_WRITE(SH7751_PCIMBR, chan->mem_resource->start); + + /* Set IOBR for window containing area specified in pci.h */ + PCIC_WRITE(SH7751_PCIIOBR, (chan->io_resource->start & SH7751_PCIIOBR_MASK)); + + /* All done, may as well say so... */ + printk("SH7751 PCI: Finished initialization of the PCI controller\n"); + + return 1; +} -- cgit v0.10.2 From 805fcc88999162b361ef0b0ce25782ef65f147d7 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 20 Apr 2009 21:46:42 +0900 Subject: sh: pci: Kill off the last remnants of the now unused pci-auto code. Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/Kconfig b/arch/sh/drivers/pci/Kconfig index 1d53496..ea903a9 100644 --- a/arch/sh/drivers/pci/Kconfig +++ b/arch/sh/drivers/pci/Kconfig @@ -18,24 +18,6 @@ config SH_PCIDMA_NONCOHERENT bridge integrated with your SH CPU, refer carefully to the chip specs to see if you can say 'N' here. Otherwise, leave it as 'Y'. -# Temporary config option for transitioning off of PCI_AUTO config PCI_NEW def_bool y depends on PCI - -# This is also board-specific -config PCI_AUTO - bool - depends on PCI && !PCI_NEW - default y - -config PCI_AUTO_UPDATE_RESOURCES - bool - depends on PCI_AUTO - default y if !SH_DREAMCAST - help - Selecting this option will cause the PCI auto code to leave your - BAR values alone. Otherwise they will be updated automatically. If - for some reason, you have a board that simply refuses to work - with its resources updated beyond what they are when the device - is powered up, set this to N. Everyone else will want this as Y. diff --git a/arch/sh/drivers/pci/Makefile b/arch/sh/drivers/pci/Makefile index e388a70..a8350cc 100644 --- a/arch/sh/drivers/pci/Makefile +++ b/arch/sh/drivers/pci/Makefile @@ -2,7 +2,6 @@ # Makefile for the PCI specific kernel interface routines under Linux. # obj-y += pci-lib.o -obj-$(CONFIG_PCI_AUTO) += pci.o pci-auto.o obj-$(CONFIG_PCI_NEW) += pci-new.o obj-$(CONFIG_CPU_SUBTYPE_SH7751) += pci-sh7751.o ops-sh4.o diff --git a/arch/sh/drivers/pci/pci-auto.c b/arch/sh/drivers/pci/pci-auto.c deleted file mode 100644 index 1d715ec..0000000 --- a/arch/sh/drivers/pci/pci-auto.c +++ /dev/null @@ -1,546 +0,0 @@ -/* - * PCI autoconfiguration library - * - * Author: Matt Porter - * - * Copyright 2000, 2001 MontaVista Software Inc. - * Copyright 2001 Bradley D. LaRonde - * Copyright 2003 Paul Mundt - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -/* - * Modified for MIPS by Jun Sun, jsun@mvista.com - * - * . Simplify the interface between pci_auto and the rest: a single function. - * . Assign resources from low address to upper address. - * . change most int to u32. - * - * Further modified to include it as mips generic code, ppopov@mvista.com. - * - * 2001-10-26 Bradley D. LaRonde - * - Add a top_bus argument to the "early config" functions so that - * they can set a fake parent bus pointer to convince the underlying - * pci ops to use type 1 configuration for sub busses. - * - Set bridge base and limit registers correctly. - * - Align io and memory base properly before and after bridge setup. - * - Don't fall through to pci_setup_bars for bridge. - * - Reformat the debug output to look more like lspci's output. - * - * Cloned for SuperH by M. R. Brown, mrbrown@0xd6.org - * - * 2003-08-05 Paul Mundt - * - Don't update the BAR values on systems that already have valid addresses - * and don't want these updated for whatever reason, by way of a new config - * option check. However, we still read in the old BAR values so that they - * can still be reported through the debug output. - */ - -#include -#include -#include -#include - -#define DEBUG -#ifdef DEBUG -#define DBG(x...) printk(x) -#else -#define DBG(x...) -#endif - -/* - * These functions are used early on before PCI scanning is done - * and all of the pci_dev and pci_bus structures have been created. - */ -static struct pci_dev *fake_pci_dev(struct pci_channel *hose, - int top_bus, int busnr, int devfn) -{ - static struct pci_dev dev; - static struct pci_bus bus; - - dev.bus = &bus; - dev.sysdata = hose; - dev.devfn = devfn; - bus.number = busnr; - bus.ops = hose->pci_ops; - bus.sysdata = hose; - - if(busnr != top_bus) - /* Fake a parent bus structure. */ - bus.parent = &bus; - else - bus.parent = NULL; - - return &dev; -} - -#define EARLY_PCI_OP(rw, size, type) \ -static int early_##rw##_config_##size(struct pci_channel *hose, \ - int top_bus, int bus, int devfn, int offset, type value) \ -{ \ - return pci_##rw##_config_##size( \ - fake_pci_dev(hose, top_bus, bus, devfn), \ - offset, value); \ -} - -EARLY_PCI_OP(read, byte, u8 *) -EARLY_PCI_OP(read, word, u16 *) -EARLY_PCI_OP(read, dword, u32 *) -EARLY_PCI_OP(write, byte, u8) -EARLY_PCI_OP(write, word, u16) -EARLY_PCI_OP(write, dword, u32) - -static struct resource *io_resource_inuse; -static struct resource *mem_resource_inuse; - -static u32 pciauto_lower_iospc; -static u32 pciauto_upper_iospc; - -static u32 pciauto_lower_memspc; -static u32 pciauto_upper_memspc; - -static void __init -pciauto_setup_bars(struct pci_channel *hose, - int top_bus, - int current_bus, - int pci_devfn, - int bar_limit) -{ - u32 bar_response, bar_size, bar_value; - u32 bar, addr_mask, bar_nr = 0; - u32 * upper_limit; - u32 * lower_limit; - int found_mem64 = 0; - - for (bar = PCI_BASE_ADDRESS_0; bar <= bar_limit; bar+=4) { - u32 bar_addr; - - /* Read the old BAR value */ - early_read_config_dword(hose, top_bus, - current_bus, - pci_devfn, - bar, - &bar_addr); - - /* Tickle the BAR and get the response */ - early_write_config_dword(hose, top_bus, - current_bus, - pci_devfn, - bar, - 0xffffffff); - - early_read_config_dword(hose, top_bus, - current_bus, - pci_devfn, - bar, - &bar_response); - - /* - * Write the old BAR value back out, only update the BAR - * if we implicitly want resources to be updated, which - * is done by the generic code further down. -- PFM. - */ - early_write_config_dword(hose, top_bus, - current_bus, - pci_devfn, - bar, - bar_addr); - - /* If BAR is not implemented go to the next BAR */ - if (!bar_response) - continue; - - /* - * Workaround for a BAR that doesn't use its upper word, - * like the ALi 1535D+ PCI DC-97 Controller Modem (M5457). - * bdl - */ - if (!(bar_response & 0xffff0000)) - bar_response |= 0xffff0000; - -retry: - /* Check the BAR type and set our address mask */ - if (bar_response & PCI_BASE_ADDRESS_SPACE) { - addr_mask = PCI_BASE_ADDRESS_IO_MASK; - upper_limit = &pciauto_upper_iospc; - lower_limit = &pciauto_lower_iospc; - DBG(" I/O"); - } else { - if ((bar_response & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == - PCI_BASE_ADDRESS_MEM_TYPE_64) - found_mem64 = 1; - - addr_mask = PCI_BASE_ADDRESS_MEM_MASK; - upper_limit = &pciauto_upper_memspc; - lower_limit = &pciauto_lower_memspc; - DBG(" Mem"); - } - - - /* Calculate requested size */ - bar_size = ~(bar_response & addr_mask) + 1; - - /* Allocate a base address */ - bar_value = ((*lower_limit - 1) & ~(bar_size - 1)) + bar_size; - - if ((bar_value + bar_size) > *upper_limit) { - if (bar_response & PCI_BASE_ADDRESS_SPACE) { - if (io_resource_inuse->child) { - io_resource_inuse = - io_resource_inuse->child; - pciauto_lower_iospc = - io_resource_inuse->start; - pciauto_upper_iospc = - io_resource_inuse->end + 1; - goto retry; - } - - } else { - if (mem_resource_inuse->child) { - mem_resource_inuse = - mem_resource_inuse->child; - pciauto_lower_memspc = - mem_resource_inuse->start; - pciauto_upper_memspc = - mem_resource_inuse->end + 1; - goto retry; - } - } - DBG(" unavailable -- skipping, value %x size %x\n", - bar_value, bar_size); - continue; - } - - if (bar_value < *lower_limit || (bar_value + bar_size) >= *upper_limit) { - DBG(" unavailable -- skipping, value %x size %x\n", - bar_value, bar_size); - continue; - } - -#ifdef CONFIG_PCI_AUTO_UPDATE_RESOURCES - /* Write it out and update our limit */ - early_write_config_dword(hose, top_bus, current_bus, pci_devfn, - bar, bar_value); -#endif - - *lower_limit = bar_value + bar_size; - - /* - * If we are a 64-bit decoder then increment to the - * upper 32 bits of the bar and force it to locate - * in the lower 4GB of memory. - */ - if (found_mem64) { - bar += 4; - early_write_config_dword(hose, top_bus, - current_bus, - pci_devfn, - bar, - 0x00000000); - } - - DBG(" at 0x%.8x [size=0x%x]\n", bar_value, bar_size); - - bar_nr++; - } - -} - -static void __init -pciauto_prescan_setup_bridge(struct pci_channel *hose, - int top_bus, - int current_bus, - int pci_devfn, - int sub_bus) -{ - /* Configure bus number registers */ - early_write_config_byte(hose, top_bus, current_bus, pci_devfn, - PCI_PRIMARY_BUS, current_bus); - early_write_config_byte(hose, top_bus, current_bus, pci_devfn, - PCI_SECONDARY_BUS, sub_bus + 1); - early_write_config_byte(hose, top_bus, current_bus, pci_devfn, - PCI_SUBORDINATE_BUS, 0xff); - - /* Align memory and I/O to 1MB and 4KB boundaries. */ - pciauto_lower_memspc = (pciauto_lower_memspc + (0x100000 - 1)) - & ~(0x100000 - 1); - pciauto_lower_iospc = (pciauto_lower_iospc + (0x1000 - 1)) - & ~(0x1000 - 1); - - /* Set base (lower limit) of address range behind bridge. */ - early_write_config_word(hose, top_bus, current_bus, pci_devfn, - PCI_MEMORY_BASE, pciauto_lower_memspc >> 16); - early_write_config_byte(hose, top_bus, current_bus, pci_devfn, - PCI_IO_BASE, (pciauto_lower_iospc & 0x0000f000) >> 8); - early_write_config_word(hose, top_bus, current_bus, pci_devfn, - PCI_IO_BASE_UPPER16, pciauto_lower_iospc >> 16); - - /* We don't support prefetchable memory for now, so disable */ - early_write_config_word(hose, top_bus, current_bus, pci_devfn, - PCI_PREF_MEMORY_BASE, 0); - early_write_config_word(hose, top_bus, current_bus, pci_devfn, - PCI_PREF_MEMORY_LIMIT, 0); -} - -static void __init -pciauto_postscan_setup_bridge(struct pci_channel *hose, - int top_bus, - int current_bus, - int pci_devfn, - int sub_bus) -{ - u32 temp; - - /* - * [jsun] we always bump up baselines a little, so that if there - * nothing behind P2P bridge, we don't wind up overlapping IO/MEM - * spaces. - */ - pciauto_lower_memspc += 1; - pciauto_lower_iospc += 1; - - /* Configure bus number registers */ - early_write_config_byte(hose, top_bus, current_bus, pci_devfn, - PCI_SUBORDINATE_BUS, sub_bus); - - /* Set upper limit of address range behind bridge. */ - early_write_config_word(hose, top_bus, current_bus, pci_devfn, - PCI_MEMORY_LIMIT, pciauto_lower_memspc >> 16); - early_write_config_byte(hose, top_bus, current_bus, pci_devfn, - PCI_IO_LIMIT, (pciauto_lower_iospc & 0x0000f000) >> 8); - early_write_config_word(hose, top_bus, current_bus, pci_devfn, - PCI_IO_LIMIT_UPPER16, pciauto_lower_iospc >> 16); - - /* Align memory and I/O to 1MB and 4KB boundaries. */ - pciauto_lower_memspc = (pciauto_lower_memspc + (0x100000 - 1)) - & ~(0x100000 - 1); - pciauto_lower_iospc = (pciauto_lower_iospc + (0x1000 - 1)) - & ~(0x1000 - 1); - - /* Enable memory and I/O accesses, enable bus master */ - early_read_config_dword(hose, top_bus, current_bus, pci_devfn, - PCI_COMMAND, &temp); - early_write_config_dword(hose, top_bus, current_bus, pci_devfn, - PCI_COMMAND, temp | PCI_COMMAND_IO | PCI_COMMAND_MEMORY - | PCI_COMMAND_MASTER); -} - -static void __init -pciauto_prescan_setup_cardbus_bridge(struct pci_channel *hose, - int top_bus, - int current_bus, - int pci_devfn, - int sub_bus) -{ - /* Configure bus number registers */ - early_write_config_byte(hose, top_bus, current_bus, pci_devfn, - PCI_PRIMARY_BUS, current_bus); - early_write_config_byte(hose, top_bus, current_bus, pci_devfn, - PCI_SECONDARY_BUS, sub_bus + 1); - early_write_config_byte(hose, top_bus, current_bus, pci_devfn, - PCI_SUBORDINATE_BUS, 0xff); - - /* Align memory and I/O to 4KB and 4 byte boundaries. */ - pciauto_lower_memspc = (pciauto_lower_memspc + (0x1000 - 1)) - & ~(0x1000 - 1); - pciauto_lower_iospc = (pciauto_lower_iospc + (0x4 - 1)) - & ~(0x4 - 1); - - early_write_config_dword(hose, top_bus, current_bus, pci_devfn, - PCI_CB_MEMORY_BASE_0, pciauto_lower_memspc); - early_write_config_dword(hose, top_bus, current_bus, pci_devfn, - PCI_CB_IO_BASE_0, pciauto_lower_iospc); -} - -static void __init -pciauto_postscan_setup_cardbus_bridge(struct pci_channel *hose, - int top_bus, - int current_bus, - int pci_devfn, - int sub_bus) -{ - u32 temp; - - /* - * [jsun] we always bump up baselines a little, so that if there - * nothing behind P2P bridge, we don't wind up overlapping IO/MEM - * spaces. - */ - pciauto_lower_memspc += 1; - pciauto_lower_iospc += 1; - - /* - * Configure subordinate bus number. The PCI subsystem - * bus scan will renumber buses (reserving three additional - * for this PCI<->CardBus bridge for the case where a CardBus - * adapter contains a P2P or CB2CB bridge. - */ - - early_write_config_byte(hose, top_bus, current_bus, pci_devfn, - PCI_SUBORDINATE_BUS, sub_bus); - - /* - * Reserve an additional 4MB for mem space and 16KB for - * I/O space. This should cover any additional space - * requirement of unusual CardBus devices with - * additional bridges that can consume more address space. - * - * Although pcmcia-cs currently will reprogram bridge - * windows, the goal is to add an option to leave them - * alone and use the bridge window ranges as the regions - * that are searched for free resources upon hot-insertion - * of a device. This will allow a PCI<->CardBus bridge - * configured by this routine to happily live behind a - * P2P bridge in a system. - */ - /* Align memory and I/O to 4KB and 4 byte boundaries. */ - pciauto_lower_memspc = (pciauto_lower_memspc + (0x1000 - 1)) - & ~(0x1000 - 1); - pciauto_lower_iospc = (pciauto_lower_iospc + (0x4 - 1)) - & ~(0x4 - 1); - /* Set up memory and I/O filter limits, assume 32-bit I/O space */ - early_write_config_dword(hose, top_bus, current_bus, pci_devfn, - PCI_CB_MEMORY_LIMIT_0, pciauto_lower_memspc - 1); - early_write_config_dword(hose, top_bus, current_bus, pci_devfn, - PCI_CB_IO_LIMIT_0, pciauto_lower_iospc - 1); - - /* Enable memory and I/O accesses, enable bus master */ - early_read_config_dword(hose, top_bus, current_bus, pci_devfn, - PCI_COMMAND, &temp); - early_write_config_dword(hose, top_bus, current_bus, pci_devfn, - PCI_COMMAND, temp | PCI_COMMAND_IO | PCI_COMMAND_MEMORY | - PCI_COMMAND_MASTER); -} - -#define PCIAUTO_IDE_MODE_MASK 0x05 - -static int __init -pciauto_bus_scan(struct pci_channel *hose, int top_bus, int current_bus) -{ - int sub_bus; - u32 pci_devfn, pci_class, cmdstat, found_multi=0; - unsigned short vid, did; - unsigned char header_type; - int devfn_start = 0; - int devfn_stop = 0xff; - - sub_bus = current_bus; - - if (hose->first_devfn) - devfn_start = hose->first_devfn; - if (hose->last_devfn) - devfn_stop = hose->last_devfn; - - for (pci_devfn=devfn_start; pci_devfn> 16, vid, did); - if (pci_class & 0xff) - DBG(" (rev %.2x)", pci_class & 0xff); - DBG("\n"); - - if ((pci_class >> 16) == PCI_CLASS_BRIDGE_PCI) { - DBG(" Bridge: primary=%.2x, secondary=%.2x\n", - current_bus, sub_bus + 1); - pciauto_prescan_setup_bridge(hose, top_bus, current_bus, - pci_devfn, sub_bus); - DBG("Scanning sub bus %.2x, I/O 0x%.8x, Mem 0x%.8x\n", - sub_bus + 1, - pciauto_lower_iospc, pciauto_lower_memspc); - sub_bus = pciauto_bus_scan(hose, top_bus, sub_bus+1); - DBG("Back to bus %.2x\n", current_bus); - pciauto_postscan_setup_bridge(hose, top_bus, current_bus, - pci_devfn, sub_bus); - continue; - } else if ((pci_class >> 16) == PCI_CLASS_BRIDGE_CARDBUS) { - DBG(" CARDBUS Bridge: primary=%.2x, secondary=%.2x\n", - current_bus, sub_bus + 1); - DBG("PCI Autoconfig: Found CardBus bridge, device %d function %d\n", PCI_SLOT(pci_devfn), PCI_FUNC(pci_devfn)); - /* Place CardBus Socket/ExCA registers */ - pciauto_setup_bars(hose, top_bus, current_bus, pci_devfn, PCI_BASE_ADDRESS_0); - - pciauto_prescan_setup_cardbus_bridge(hose, top_bus, - current_bus, pci_devfn, sub_bus); - - DBG("Scanning sub bus %.2x, I/O 0x%.8x, Mem 0x%.8x\n", - sub_bus + 1, - pciauto_lower_iospc, pciauto_lower_memspc); - sub_bus = pciauto_bus_scan(hose, top_bus, sub_bus+1); - DBG("Back to bus %.2x, sub_bus is %x\n", current_bus, sub_bus); - pciauto_postscan_setup_cardbus_bridge(hose, top_bus, - current_bus, pci_devfn, sub_bus); - continue; - } else if ((pci_class >> 16) == PCI_CLASS_STORAGE_IDE) { - - unsigned char prg_iface; - - early_read_config_byte(hose, top_bus, current_bus, - pci_devfn, PCI_CLASS_PROG, &prg_iface); - if (!(prg_iface & PCIAUTO_IDE_MODE_MASK)) { - DBG("Skipping legacy mode IDE controller\n"); - continue; - } - } - - /* - * Found a peripheral, enable some standard - * settings - */ - early_read_config_dword(hose, top_bus, current_bus, pci_devfn, - PCI_COMMAND, &cmdstat); - early_write_config_dword(hose, top_bus, current_bus, pci_devfn, - PCI_COMMAND, cmdstat | PCI_COMMAND_IO | - PCI_COMMAND_MEMORY | - PCI_COMMAND_MASTER); - early_write_config_byte(hose, top_bus, current_bus, pci_devfn, - PCI_LATENCY_TIMER, 0x80); - - /* Allocate PCI I/O and/or memory space */ - pciauto_setup_bars(hose, top_bus, current_bus, pci_devfn, PCI_BASE_ADDRESS_5); - } - return sub_bus; -} - -int __init -pciauto_assign_resources(int busno, struct pci_channel *hose) -{ - /* setup resource limits */ - io_resource_inuse = hose->io_resource; - mem_resource_inuse = hose->mem_resource; - - pciauto_lower_iospc = io_resource_inuse->start; - pciauto_upper_iospc = io_resource_inuse->end + 1; - pciauto_lower_memspc = mem_resource_inuse->start; - pciauto_upper_memspc = mem_resource_inuse->end + 1; - DBG("Autoconfig PCI channel 0x%p\n", hose); - DBG("Scanning bus %.2x, I/O 0x%.8x:0x%.8x, Mem 0x%.8x:0x%.8x\n", - busno, pciauto_lower_iospc, pciauto_upper_iospc, - pciauto_lower_memspc, pciauto_upper_memspc); - - return pciauto_bus_scan(hose, busno, busno); -} diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c deleted file mode 100644 index 8c332b2..0000000 --- a/arch/sh/drivers/pci/pci.c +++ /dev/null @@ -1,75 +0,0 @@ -/* - * arch/sh/drivers/pci/pci.c - * - * Copyright (c) 2002 M. R. Brown - * Copyright (c) 2004 - 2006 Paul Mundt - * - * These functions are collected here to reduce duplication of common - * code amongst the many platform-specific PCI support code files. - * - * These routines require the following board-specific routines: - * void pcibios_fixup_irqs(); - * - * See include/asm-sh/pci.h for more information. - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ -#include -#include -#include -#include -#include - -static int __init pcibios_init(void) -{ - struct pci_channel *p; - struct pci_bus *bus; - int busno; - - /* init channels */ - busno = 0; - for (p = board_pci_channels; p->init; p++) { - if (p->init(p) == 0) - p->enabled = 1; - else - pr_err("Unable to init pci channel %d\n", busno); - busno++; - } - -#ifdef CONFIG_PCI_AUTO - /* assign resources */ - busno = 0; - for (p = board_pci_channels; p->init; p++) - if (p->enabled) - busno = pciauto_assign_resources(busno, p) + 1; -#endif - - /* scan the buses */ - busno = 0; - for (p = board_pci_channels; p->init; p++) { - if (p->enabled) { - bus = pci_scan_bus(busno, p->pci_ops, p); - busno = bus->subordinate + 1; - } - } - - pci_fixup_irqs(pci_common_swizzle, pcibios_map_platform_irq); - - dma_debug_add_bus(&pci_bus_type); - - return 0; -} -subsys_initcall(pcibios_init); - -/* - * Called after each bus is probed, but before its children - * are examined. - */ -void __devinit __weak pcibios_fixup_bus(struct pci_bus *bus) -{ - pci_read_bridge_bases(bus); -} - -EXPORT_SYMBOL(board_pci_channels); diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h index f36c789..f9101215 100644 --- a/arch/sh/include/asm/pci.h +++ b/arch/sh/include/asm/pci.h @@ -19,8 +19,6 @@ struct pci_channel { struct pci_channel *next; - int (*init)(struct pci_channel *chan); - struct pci_ops *pci_ops; struct resource *io_resource; struct resource *mem_resource; @@ -28,20 +26,11 @@ struct pci_channel { unsigned long io_offset; unsigned long mem_offset; - int first_devfn; - int last_devfn; - int enabled; - unsigned long reg_base; unsigned long io_map_base; }; -/* - * Each board initializes this array and terminates it with a NULL entry. - */ -extern struct pci_channel board_pci_channels[]; - extern void register_pci_controller(struct pci_channel *hose); extern unsigned long PCIBIOS_MIN_IO, PCIBIOS_MIN_MEM; @@ -122,13 +111,8 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev, #endif /* Board-specific fixup routines. */ -int pcibios_init_platform(void); int pcibios_map_platform_irq(struct pci_dev *dev, u8 slot, u8 pin); -#ifdef CONFIG_PCI_AUTO -int pciauto_assign_resources(int busno, struct pci_channel *hose); -#endif - extern void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, struct resource *res); -- cgit v0.10.2 From 35bcfffd86aac933205fbf8401e3e7e4dde68264 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 20 Apr 2009 21:51:19 +0900 Subject: sh: pci: Roll pci-lib in to pci-new. Now that the pci-auto cruft is gone, pci-lib can go away. Roll it back in to pci-new.c where it originally split off from. Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/Makefile b/arch/sh/drivers/pci/Makefile index a8350cc..eacac7f 100644 --- a/arch/sh/drivers/pci/Makefile +++ b/arch/sh/drivers/pci/Makefile @@ -1,7 +1,6 @@ # # Makefile for the PCI specific kernel interface routines under Linux. # -obj-y += pci-lib.o obj-$(CONFIG_PCI_NEW) += pci-new.o obj-$(CONFIG_CPU_SUBTYPE_SH7751) += pci-sh7751.o ops-sh4.o diff --git a/arch/sh/drivers/pci/pci-lib.c b/arch/sh/drivers/pci/pci-lib.c deleted file mode 100644 index f072aca..0000000 --- a/arch/sh/drivers/pci/pci-lib.c +++ /dev/null @@ -1,219 +0,0 @@ -#include -#include -#include -#include -#include - -unsigned long PCIBIOS_MIN_IO = 0x0000; -unsigned long PCIBIOS_MIN_MEM = 0; - -/* - * We need to avoid collisions with `mirrored' VGA ports - * and other strange ISA hardware, so we always want the - * addresses to be allocated in the 0x000-0x0ff region - * modulo 0x400. - */ -void pcibios_align_resource(void *data, struct resource *res, - resource_size_t size, resource_size_t align) -{ - struct pci_dev *dev = data; - struct pci_channel *chan = dev->sysdata; - resource_size_t start = res->start; - - if (res->flags & IORESOURCE_IO) { - if (start < PCIBIOS_MIN_IO + chan->io_resource->start) - start = PCIBIOS_MIN_IO + chan->io_resource->start; - - /* - * Put everything into 0x00-0xff region modulo 0x400. - */ - if (start & 0x300) { - start = (start + 0x3ff) & ~0x3ff; - res->start = start; - } - } else if (res->flags & IORESOURCE_MEM) { - if (start < PCIBIOS_MIN_MEM + chan->mem_resource->start) - start = PCIBIOS_MIN_MEM + chan->mem_resource->start; - } - - res->start = start; -} - -void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, - struct resource *res) -{ - struct pci_channel *hose = dev->sysdata; - unsigned long offset = 0; - - if (res->flags & IORESOURCE_IO) - offset = hose->io_offset; - else if (res->flags & IORESOURCE_MEM) - offset = hose->mem_offset; - - region->start = res->start - offset; - region->end = res->end - offset; -} - -void __devinit -pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, - struct pci_bus_region *region) -{ - struct pci_channel *hose = dev->sysdata; - unsigned long offset = 0; - - if (res->flags & IORESOURCE_IO) - offset = hose->io_offset; - else if (res->flags & IORESOURCE_MEM) - offset = hose->mem_offset; - - res->start = region->start + offset; - res->end = region->end + offset; -} - -int pcibios_enable_device(struct pci_dev *dev, int mask) -{ - u16 cmd, old_cmd; - int idx; - struct resource *r; - - pci_read_config_word(dev, PCI_COMMAND, &cmd); - old_cmd = cmd; - for (idx=0; idx < PCI_NUM_RESOURCES; idx++) { - /* Only set up the requested stuff */ - if (!(mask & (1<resource[idx]; - if (!(r->flags & (IORESOURCE_IO | IORESOURCE_MEM))) - continue; - if ((idx == PCI_ROM_RESOURCE) && - (!(r->flags & IORESOURCE_ROM_ENABLE))) - continue; - if (!r->start && r->end) { - printk(KERN_ERR "PCI: Device %s not available " - "because of resource collisions\n", - pci_name(dev)); - return -EINVAL; - } - if (r->flags & IORESOURCE_IO) - cmd |= PCI_COMMAND_IO; - if (r->flags & IORESOURCE_MEM) - cmd |= PCI_COMMAND_MEMORY; - } - if (cmd != old_cmd) { - printk("PCI: Enabling device %s (%04x -> %04x)\n", - pci_name(dev), old_cmd, cmd); - pci_write_config_word(dev, PCI_COMMAND, cmd); - } - return 0; -} - -/* - * If we set up a device for bus mastering, we need to check and set - * the latency timer as it may not be properly set. - */ -static unsigned int pcibios_max_latency = 255; - -void pcibios_set_master(struct pci_dev *dev) -{ - u8 lat; - pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat); - if (lat < 16) - lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency; - else if (lat > pcibios_max_latency) - lat = pcibios_max_latency; - else - return; - printk(KERN_INFO "PCI: Setting latency timer of device %s to %d\n", - pci_name(dev), lat); - pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); -} - -void __init pcibios_update_irq(struct pci_dev *dev, int irq) -{ - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); -} - -char * __devinit pcibios_setup(char *str) -{ - return str; -} - -int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, - enum pci_mmap_state mmap_state, int write_combine) -{ - /* - * I/O space can be accessed via normal processor loads and stores on - * this platform but for now we elect not to do this and portable - * drivers should not do this anyway. - */ - if (mmap_state == pci_mmap_io) - return -EINVAL; - - /* - * Ignore write-combine; for now only return uncached mappings. - */ - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - - return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, - vma->vm_end - vma->vm_start, - vma->vm_page_prot); -} - -static void __iomem *ioport_map_pci(struct pci_dev *dev, - unsigned long port, unsigned int nr) -{ - struct pci_channel *chan = dev->sysdata; - - if (!chan->io_map_base) - chan->io_map_base = generic_io_base; - - return (void __iomem *)(chan->io_map_base + port); -} - -void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) -{ - resource_size_t start = pci_resource_start(dev, bar); - resource_size_t len = pci_resource_len(dev, bar); - unsigned long flags = pci_resource_flags(dev, bar); - - if (unlikely(!len || !start)) - return NULL; - if (maxlen && len > maxlen) - len = maxlen; - - if (flags & IORESOURCE_IO) - return ioport_map_pci(dev, start, len); - - /* - * Presently the IORESOURCE_MEM case is a bit special, most - * SH7751 style PCI controllers have PCI memory at a fixed - * location in the address space where no remapping is desired. - * With the IORESOURCE_MEM case more care has to be taken - * to inhibit page table mapping for legacy cores, but this is - * punted off to __ioremap(). - * -- PFM. - */ - if (flags & IORESOURCE_MEM) { - if (flags & IORESOURCE_CACHEABLE) - return ioremap(start, len); - - return ioremap_nocache(start, len); - } - - return NULL; -} -EXPORT_SYMBOL(pci_iomap); - -void pci_iounmap(struct pci_dev *dev, void __iomem *addr) -{ - iounmap(addr); -} -EXPORT_SYMBOL(pci_iounmap); - -#ifdef CONFIG_HOTPLUG -EXPORT_SYMBOL(pcibios_resource_to_bus); -EXPORT_SYMBOL(pcibios_bus_to_resource); -EXPORT_SYMBOL(PCIBIOS_MIN_IO); -EXPORT_SYMBOL(PCIBIOS_MIN_MEM); -#endif diff --git a/arch/sh/drivers/pci/pci-new.c b/arch/sh/drivers/pci/pci-new.c index 8c0b136..54d77cb 100644 --- a/arch/sh/drivers/pci/pci-new.c +++ b/arch/sh/drivers/pci/pci-new.c @@ -1,20 +1,28 @@ /* * New-style PCI core. * - * Copyright (c) 2002 M. R. Brown * Copyright (c) 2004 - 2009 Paul Mundt + * Copyright (c) 2002 M. R. Brown + * + * Modelled after arch/mips/pci/pci.c: + * Copyright (C) 2003, 04 Ralf Baechle (ralf@linux-mips.org) * * This file is subject to the terms and conditions of the GNU General Public * License. See the file "COPYING" in the main directory of this archive * for more details. */ #include +#include #include #include +#include #include #include #include +unsigned long PCIBIOS_MIN_IO = 0x0000; +unsigned long PCIBIOS_MIN_MEM = 0; + /* * The PCI controller list. */ @@ -27,9 +35,6 @@ static void __devinit pcibios_scanbus(struct pci_channel *hose) static int next_busno; struct pci_bus *bus; - /* Catch botched conversion attempts */ - BUG_ON(hose->init); - bus = pci_scan_bus(next_busno, hose->pci_ops, hose); if (bus) { next_busno = bus->subordinate + 1; @@ -128,7 +133,7 @@ static void pcibios_fixup_device_resources(struct pci_dev *dev, * Called after each bus is probed, but before its children * are examined. */ -void __devinit __weak pcibios_fixup_bus(struct pci_bus *bus) +void __devinit pcibios_fixup_bus(struct pci_bus *bus) { struct pci_dev *dev = bus->self; struct list_head *ln; @@ -146,3 +151,214 @@ void __devinit __weak pcibios_fixup_bus(struct pci_bus *bus) pcibios_fixup_device_resources(dev, bus); } } + +/* + * We need to avoid collisions with `mirrored' VGA ports + * and other strange ISA hardware, so we always want the + * addresses to be allocated in the 0x000-0x0ff region + * modulo 0x400. + */ +void pcibios_align_resource(void *data, struct resource *res, + resource_size_t size, resource_size_t align) +{ + struct pci_dev *dev = data; + struct pci_channel *chan = dev->sysdata; + resource_size_t start = res->start; + + if (res->flags & IORESOURCE_IO) { + if (start < PCIBIOS_MIN_IO + chan->io_resource->start) + start = PCIBIOS_MIN_IO + chan->io_resource->start; + + /* + * Put everything into 0x00-0xff region modulo 0x400. + */ + if (start & 0x300) { + start = (start + 0x3ff) & ~0x3ff; + res->start = start; + } + } else if (res->flags & IORESOURCE_MEM) { + if (start < PCIBIOS_MIN_MEM + chan->mem_resource->start) + start = PCIBIOS_MIN_MEM + chan->mem_resource->start; + } + + res->start = start; +} + +void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, + struct resource *res) +{ + struct pci_channel *hose = dev->sysdata; + unsigned long offset = 0; + + if (res->flags & IORESOURCE_IO) + offset = hose->io_offset; + else if (res->flags & IORESOURCE_MEM) + offset = hose->mem_offset; + + region->start = res->start - offset; + region->end = res->end - offset; +} + +void __devinit +pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, + struct pci_bus_region *region) +{ + struct pci_channel *hose = dev->sysdata; + unsigned long offset = 0; + + if (res->flags & IORESOURCE_IO) + offset = hose->io_offset; + else if (res->flags & IORESOURCE_MEM) + offset = hose->mem_offset; + + res->start = region->start + offset; + res->end = region->end + offset; +} + +int pcibios_enable_device(struct pci_dev *dev, int mask) +{ + u16 cmd, old_cmd; + int idx; + struct resource *r; + + pci_read_config_word(dev, PCI_COMMAND, &cmd); + old_cmd = cmd; + for (idx=0; idx < PCI_NUM_RESOURCES; idx++) { + /* Only set up the requested stuff */ + if (!(mask & (1<resource[idx]; + if (!(r->flags & (IORESOURCE_IO | IORESOURCE_MEM))) + continue; + if ((idx == PCI_ROM_RESOURCE) && + (!(r->flags & IORESOURCE_ROM_ENABLE))) + continue; + if (!r->start && r->end) { + printk(KERN_ERR "PCI: Device %s not available " + "because of resource collisions\n", + pci_name(dev)); + return -EINVAL; + } + if (r->flags & IORESOURCE_IO) + cmd |= PCI_COMMAND_IO; + if (r->flags & IORESOURCE_MEM) + cmd |= PCI_COMMAND_MEMORY; + } + if (cmd != old_cmd) { + printk("PCI: Enabling device %s (%04x -> %04x)\n", + pci_name(dev), old_cmd, cmd); + pci_write_config_word(dev, PCI_COMMAND, cmd); + } + return 0; +} + +/* + * If we set up a device for bus mastering, we need to check and set + * the latency timer as it may not be properly set. + */ +static unsigned int pcibios_max_latency = 255; + +void pcibios_set_master(struct pci_dev *dev) +{ + u8 lat; + pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat); + if (lat < 16) + lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency; + else if (lat > pcibios_max_latency) + lat = pcibios_max_latency; + else + return; + printk(KERN_INFO "PCI: Setting latency timer of device %s to %d\n", + pci_name(dev), lat); + pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); +} + +void __init pcibios_update_irq(struct pci_dev *dev, int irq) +{ + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); +} + +char * __devinit pcibios_setup(char *str) +{ + return str; +} + +int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, + enum pci_mmap_state mmap_state, int write_combine) +{ + /* + * I/O space can be accessed via normal processor loads and stores on + * this platform but for now we elect not to do this and portable + * drivers should not do this anyway. + */ + if (mmap_state == pci_mmap_io) + return -EINVAL; + + /* + * Ignore write-combine; for now only return uncached mappings. + */ + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + vma->vm_end - vma->vm_start, + vma->vm_page_prot); +} + +static void __iomem *ioport_map_pci(struct pci_dev *dev, + unsigned long port, unsigned int nr) +{ + struct pci_channel *chan = dev->sysdata; + + if (!chan->io_map_base) + chan->io_map_base = generic_io_base; + + return (void __iomem *)(chan->io_map_base + port); +} + +void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) +{ + resource_size_t start = pci_resource_start(dev, bar); + resource_size_t len = pci_resource_len(dev, bar); + unsigned long flags = pci_resource_flags(dev, bar); + + if (unlikely(!len || !start)) + return NULL; + if (maxlen && len > maxlen) + len = maxlen; + + if (flags & IORESOURCE_IO) + return ioport_map_pci(dev, start, len); + + /* + * Presently the IORESOURCE_MEM case is a bit special, most + * SH7751 style PCI controllers have PCI memory at a fixed + * location in the address space where no remapping is desired. + * With the IORESOURCE_MEM case more care has to be taken + * to inhibit page table mapping for legacy cores, but this is + * punted off to __ioremap(). + * -- PFM. + */ + if (flags & IORESOURCE_MEM) { + if (flags & IORESOURCE_CACHEABLE) + return ioremap(start, len); + + return ioremap_nocache(start, len); + } + + return NULL; +} +EXPORT_SYMBOL(pci_iomap); + +void pci_iounmap(struct pci_dev *dev, void __iomem *addr) +{ + iounmap(addr); +} +EXPORT_SYMBOL(pci_iounmap); + +#ifdef CONFIG_HOTPLUG +EXPORT_SYMBOL(pcibios_resource_to_bus); +EXPORT_SYMBOL(pcibios_bus_to_resource); +EXPORT_SYMBOL(PCIBIOS_MIN_IO); +EXPORT_SYMBOL(PCIBIOS_MIN_MEM); +#endif -- cgit v0.10.2 From cf242007a1eb29dcf93d1cb34713ec9b3f4a0e1b Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 20 Apr 2009 21:53:33 +0900 Subject: sh: pci: Rename pci-new.c to pci.c. pci-new.c is now in a state to replace the old pci.c, rename it accordingly. Signed-off-by: Paul Mundt diff --git a/arch/sh/drivers/pci/Kconfig b/arch/sh/drivers/pci/Kconfig index ea903a9..e8db585 100644 --- a/arch/sh/drivers/pci/Kconfig +++ b/arch/sh/drivers/pci/Kconfig @@ -17,7 +17,3 @@ config SH_PCIDMA_NONCOHERENT code will not have to flush the CPU's caches. If you have a PCI host bridge integrated with your SH CPU, refer carefully to the chip specs to see if you can say 'N' here. Otherwise, leave it as 'Y'. - -config PCI_NEW - def_bool y - depends on PCI diff --git a/arch/sh/drivers/pci/Makefile b/arch/sh/drivers/pci/Makefile index eacac7f..d2ffc47 100644 --- a/arch/sh/drivers/pci/Makefile +++ b/arch/sh/drivers/pci/Makefile @@ -1,7 +1,7 @@ # # Makefile for the PCI specific kernel interface routines under Linux. # -obj-$(CONFIG_PCI_NEW) += pci-new.o +obj-y += pci.o obj-$(CONFIG_CPU_SUBTYPE_SH7751) += pci-sh7751.o ops-sh4.o obj-$(CONFIG_CPU_SUBTYPE_SH7751R) += pci-sh7751.o ops-sh4.o diff --git a/arch/sh/drivers/pci/pci-new.c b/arch/sh/drivers/pci/pci-new.c deleted file mode 100644 index 54d77cb..0000000 --- a/arch/sh/drivers/pci/pci-new.c +++ /dev/null @@ -1,364 +0,0 @@ -/* - * New-style PCI core. - * - * Copyright (c) 2004 - 2009 Paul Mundt - * Copyright (c) 2002 M. R. Brown - * - * Modelled after arch/mips/pci/pci.c: - * Copyright (C) 2003, 04 Ralf Baechle (ralf@linux-mips.org) - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ -#include -#include -#include -#include -#include -#include -#include -#include - -unsigned long PCIBIOS_MIN_IO = 0x0000; -unsigned long PCIBIOS_MIN_MEM = 0; - -/* - * The PCI controller list. - */ -static struct pci_channel *hose_head, **hose_tail = &hose_head; - -static int pci_initialized; - -static void __devinit pcibios_scanbus(struct pci_channel *hose) -{ - static int next_busno; - struct pci_bus *bus; - - bus = pci_scan_bus(next_busno, hose->pci_ops, hose); - if (bus) { - next_busno = bus->subordinate + 1; - /* Don't allow 8-bit bus number overflow inside the hose - - reserve some space for bridges. */ - if (next_busno > 224) - next_busno = 0; - - pci_bus_size_bridges(bus); - pci_bus_assign_resources(bus); - pci_enable_bridges(bus); - } -} - -static DEFINE_MUTEX(pci_scan_mutex); - -void __devinit register_pci_controller(struct pci_channel *hose) -{ - if (request_resource(&iomem_resource, hose->mem_resource) < 0) - goto out; - if (request_resource(&ioport_resource, hose->io_resource) < 0) { - release_resource(hose->mem_resource); - goto out; - } - - *hose_tail = hose; - hose_tail = &hose->next; - - /* - * Do not panic here but later - this might hapen before console init. - */ - if (!hose->io_map_base) { - printk(KERN_WARNING - "registering PCI controller with io_map_base unset\n"); - } - - /* - * Scan the bus if it is register after the PCI subsystem - * initialization. - */ - if (pci_initialized) { - mutex_lock(&pci_scan_mutex); - pcibios_scanbus(hose); - mutex_unlock(&pci_scan_mutex); - } - - return; - -out: - printk(KERN_WARNING - "Skipping PCI bus scan due to resource conflict\n"); -} - -static int __init pcibios_init(void) -{ - struct pci_channel *hose; - - /* Scan all of the recorded PCI controllers. */ - for (hose = hose_head; hose; hose = hose->next) - pcibios_scanbus(hose); - - pci_fixup_irqs(pci_common_swizzle, pcibios_map_platform_irq); - - dma_debug_add_bus(&pci_bus_type); - - pci_initialized = 1; - - return 0; -} -subsys_initcall(pcibios_init); - -static void pcibios_fixup_device_resources(struct pci_dev *dev, - struct pci_bus *bus) -{ - /* Update device resources. */ - struct pci_channel *hose = bus->sysdata; - unsigned long offset = 0; - int i; - - for (i = 0; i < PCI_NUM_RESOURCES; i++) { - if (!dev->resource[i].start) - continue; - if (dev->resource[i].flags & IORESOURCE_PCI_FIXED) - continue; - if (dev->resource[i].flags & IORESOURCE_IO) - offset = hose->io_offset; - else if (dev->resource[i].flags & IORESOURCE_MEM) - offset = hose->mem_offset; - - dev->resource[i].start += offset; - dev->resource[i].end += offset; - } -} - -/* - * Called after each bus is probed, but before its children - * are examined. - */ -void __devinit pcibios_fixup_bus(struct pci_bus *bus) -{ - struct pci_dev *dev = bus->self; - struct list_head *ln; - struct pci_channel *chan = bus->sysdata; - - if (!dev) { - bus->resource[0] = chan->io_resource; - bus->resource[1] = chan->mem_resource; - } - - for (ln = bus->devices.next; ln != &bus->devices; ln = ln->next) { - dev = pci_dev_b(ln); - - if ((dev->class >> 8) != PCI_CLASS_BRIDGE_PCI) - pcibios_fixup_device_resources(dev, bus); - } -} - -/* - * We need to avoid collisions with `mirrored' VGA ports - * and other strange ISA hardware, so we always want the - * addresses to be allocated in the 0x000-0x0ff region - * modulo 0x400. - */ -void pcibios_align_resource(void *data, struct resource *res, - resource_size_t size, resource_size_t align) -{ - struct pci_dev *dev = data; - struct pci_channel *chan = dev->sysdata; - resource_size_t start = res->start; - - if (res->flags & IORESOURCE_IO) { - if (start < PCIBIOS_MIN_IO + chan->io_resource->start) - start = PCIBIOS_MIN_IO + chan->io_resource->start; - - /* - * Put everything into 0x00-0xff region modulo 0x400. - */ - if (start & 0x300) { - start = (start + 0x3ff) & ~0x3ff; - res->start = start; - } - } else if (res->flags & IORESOURCE_MEM) { - if (start < PCIBIOS_MIN_MEM + chan->mem_resource->start) - start = PCIBIOS_MIN_MEM + chan->mem_resource->start; - } - - res->start = start; -} - -void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, - struct resource *res) -{ - struct pci_channel *hose = dev->sysdata; - unsigned long offset = 0; - - if (res->flags & IORESOURCE_IO) - offset = hose->io_offset; - else if (res->flags & IORESOURCE_MEM) - offset = hose->mem_offset; - - region->start = res->start - offset; - region->end = res->end - offset; -} - -void __devinit -pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, - struct pci_bus_region *region) -{ - struct pci_channel *hose = dev->sysdata; - unsigned long offset = 0; - - if (res->flags & IORESOURCE_IO) - offset = hose->io_offset; - else if (res->flags & IORESOURCE_MEM) - offset = hose->mem_offset; - - res->start = region->start + offset; - res->end = region->end + offset; -} - -int pcibios_enable_device(struct pci_dev *dev, int mask) -{ - u16 cmd, old_cmd; - int idx; - struct resource *r; - - pci_read_config_word(dev, PCI_COMMAND, &cmd); - old_cmd = cmd; - for (idx=0; idx < PCI_NUM_RESOURCES; idx++) { - /* Only set up the requested stuff */ - if (!(mask & (1<resource[idx]; - if (!(r->flags & (IORESOURCE_IO | IORESOURCE_MEM))) - continue; - if ((idx == PCI_ROM_RESOURCE) && - (!(r->flags & IORESOURCE_ROM_ENABLE))) - continue; - if (!r->start && r->end) { - printk(KERN_ERR "PCI: Device %s not available " - "because of resource collisions\n", - pci_name(dev)); - return -EINVAL; - } - if (r->flags & IORESOURCE_IO) - cmd |= PCI_COMMAND_IO; - if (r->flags & IORESOURCE_MEM) - cmd |= PCI_COMMAND_MEMORY; - } - if (cmd != old_cmd) { - printk("PCI: Enabling device %s (%04x -> %04x)\n", - pci_name(dev), old_cmd, cmd); - pci_write_config_word(dev, PCI_COMMAND, cmd); - } - return 0; -} - -/* - * If we set up a device for bus mastering, we need to check and set - * the latency timer as it may not be properly set. - */ -static unsigned int pcibios_max_latency = 255; - -void pcibios_set_master(struct pci_dev *dev) -{ - u8 lat; - pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat); - if (lat < 16) - lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency; - else if (lat > pcibios_max_latency) - lat = pcibios_max_latency; - else - return; - printk(KERN_INFO "PCI: Setting latency timer of device %s to %d\n", - pci_name(dev), lat); - pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); -} - -void __init pcibios_update_irq(struct pci_dev *dev, int irq) -{ - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); -} - -char * __devinit pcibios_setup(char *str) -{ - return str; -} - -int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, - enum pci_mmap_state mmap_state, int write_combine) -{ - /* - * I/O space can be accessed via normal processor loads and stores on - * this platform but for now we elect not to do this and portable - * drivers should not do this anyway. - */ - if (mmap_state == pci_mmap_io) - return -EINVAL; - - /* - * Ignore write-combine; for now only return uncached mappings. - */ - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - - return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, - vma->vm_end - vma->vm_start, - vma->vm_page_prot); -} - -static void __iomem *ioport_map_pci(struct pci_dev *dev, - unsigned long port, unsigned int nr) -{ - struct pci_channel *chan = dev->sysdata; - - if (!chan->io_map_base) - chan->io_map_base = generic_io_base; - - return (void __iomem *)(chan->io_map_base + port); -} - -void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) -{ - resource_size_t start = pci_resource_start(dev, bar); - resource_size_t len = pci_resource_len(dev, bar); - unsigned long flags = pci_resource_flags(dev, bar); - - if (unlikely(!len || !start)) - return NULL; - if (maxlen && len > maxlen) - len = maxlen; - - if (flags & IORESOURCE_IO) - return ioport_map_pci(dev, start, len); - - /* - * Presently the IORESOURCE_MEM case is a bit special, most - * SH7751 style PCI controllers have PCI memory at a fixed - * location in the address space where no remapping is desired. - * With the IORESOURCE_MEM case more care has to be taken - * to inhibit page table mapping for legacy cores, but this is - * punted off to __ioremap(). - * -- PFM. - */ - if (flags & IORESOURCE_MEM) { - if (flags & IORESOURCE_CACHEABLE) - return ioremap(start, len); - - return ioremap_nocache(start, len); - } - - return NULL; -} -EXPORT_SYMBOL(pci_iomap); - -void pci_iounmap(struct pci_dev *dev, void __iomem *addr) -{ - iounmap(addr); -} -EXPORT_SYMBOL(pci_iounmap); - -#ifdef CONFIG_HOTPLUG -EXPORT_SYMBOL(pcibios_resource_to_bus); -EXPORT_SYMBOL(pcibios_bus_to_resource); -EXPORT_SYMBOL(PCIBIOS_MIN_IO); -EXPORT_SYMBOL(PCIBIOS_MIN_MEM); -#endif diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c new file mode 100644 index 0000000..54d77cb --- /dev/null +++ b/arch/sh/drivers/pci/pci.c @@ -0,0 +1,364 @@ +/* + * New-style PCI core. + * + * Copyright (c) 2004 - 2009 Paul Mundt + * Copyright (c) 2002 M. R. Brown + * + * Modelled after arch/mips/pci/pci.c: + * Copyright (C) 2003, 04 Ralf Baechle (ralf@linux-mips.org) + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +unsigned long PCIBIOS_MIN_IO = 0x0000; +unsigned long PCIBIOS_MIN_MEM = 0; + +/* + * The PCI controller list. + */ +static struct pci_channel *hose_head, **hose_tail = &hose_head; + +static int pci_initialized; + +static void __devinit pcibios_scanbus(struct pci_channel *hose) +{ + static int next_busno; + struct pci_bus *bus; + + bus = pci_scan_bus(next_busno, hose->pci_ops, hose); + if (bus) { + next_busno = bus->subordinate + 1; + /* Don't allow 8-bit bus number overflow inside the hose - + reserve some space for bridges. */ + if (next_busno > 224) + next_busno = 0; + + pci_bus_size_bridges(bus); + pci_bus_assign_resources(bus); + pci_enable_bridges(bus); + } +} + +static DEFINE_MUTEX(pci_scan_mutex); + +void __devinit register_pci_controller(struct pci_channel *hose) +{ + if (request_resource(&iomem_resource, hose->mem_resource) < 0) + goto out; + if (request_resource(&ioport_resource, hose->io_resource) < 0) { + release_resource(hose->mem_resource); + goto out; + } + + *hose_tail = hose; + hose_tail = &hose->next; + + /* + * Do not panic here but later - this might hapen before console init. + */ + if (!hose->io_map_base) { + printk(KERN_WARNING + "registering PCI controller with io_map_base unset\n"); + } + + /* + * Scan the bus if it is register after the PCI subsystem + * initialization. + */ + if (pci_initialized) { + mutex_lock(&pci_scan_mutex); + pcibios_scanbus(hose); + mutex_unlock(&pci_scan_mutex); + } + + return; + +out: + printk(KERN_WARNING + "Skipping PCI bus scan due to resource conflict\n"); +} + +static int __init pcibios_init(void) +{ + struct pci_channel *hose; + + /* Scan all of the recorded PCI controllers. */ + for (hose = hose_head; hose; hose = hose->next) + pcibios_scanbus(hose); + + pci_fixup_irqs(pci_common_swizzle, pcibios_map_platform_irq); + + dma_debug_add_bus(&pci_bus_type); + + pci_initialized = 1; + + return 0; +} +subsys_initcall(pcibios_init); + +static void pcibios_fixup_device_resources(struct pci_dev *dev, + struct pci_bus *bus) +{ + /* Update device resources. */ + struct pci_channel *hose = bus->sysdata; + unsigned long offset = 0; + int i; + + for (i = 0; i < PCI_NUM_RESOURCES; i++) { + if (!dev->resource[i].start) + continue; + if (dev->resource[i].flags & IORESOURCE_PCI_FIXED) + continue; + if (dev->resource[i].flags & IORESOURCE_IO) + offset = hose->io_offset; + else if (dev->resource[i].flags & IORESOURCE_MEM) + offset = hose->mem_offset; + + dev->resource[i].start += offset; + dev->resource[i].end += offset; + } +} + +/* + * Called after each bus is probed, but before its children + * are examined. + */ +void __devinit pcibios_fixup_bus(struct pci_bus *bus) +{ + struct pci_dev *dev = bus->self; + struct list_head *ln; + struct pci_channel *chan = bus->sysdata; + + if (!dev) { + bus->resource[0] = chan->io_resource; + bus->resource[1] = chan->mem_resource; + } + + for (ln = bus->devices.next; ln != &bus->devices; ln = ln->next) { + dev = pci_dev_b(ln); + + if ((dev->class >> 8) != PCI_CLASS_BRIDGE_PCI) + pcibios_fixup_device_resources(dev, bus); + } +} + +/* + * We need to avoid collisions with `mirrored' VGA ports + * and other strange ISA hardware, so we always want the + * addresses to be allocated in the 0x000-0x0ff region + * modulo 0x400. + */ +void pcibios_align_resource(void *data, struct resource *res, + resource_size_t size, resource_size_t align) +{ + struct pci_dev *dev = data; + struct pci_channel *chan = dev->sysdata; + resource_size_t start = res->start; + + if (res->flags & IORESOURCE_IO) { + if (start < PCIBIOS_MIN_IO + chan->io_resource->start) + start = PCIBIOS_MIN_IO + chan->io_resource->start; + + /* + * Put everything into 0x00-0xff region modulo 0x400. + */ + if (start & 0x300) { + start = (start + 0x3ff) & ~0x3ff; + res->start = start; + } + } else if (res->flags & IORESOURCE_MEM) { + if (start < PCIBIOS_MIN_MEM + chan->mem_resource->start) + start = PCIBIOS_MIN_MEM + chan->mem_resource->start; + } + + res->start = start; +} + +void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, + struct resource *res) +{ + struct pci_channel *hose = dev->sysdata; + unsigned long offset = 0; + + if (res->flags & IORESOURCE_IO) + offset = hose->io_offset; + else if (res->flags & IORESOURCE_MEM) + offset = hose->mem_offset; + + region->start = res->start - offset; + region->end = res->end - offset; +} + +void __devinit +pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, + struct pci_bus_region *region) +{ + struct pci_channel *hose = dev->sysdata; + unsigned long offset = 0; + + if (res->flags & IORESOURCE_IO) + offset = hose->io_offset; + else if (res->flags & IORESOURCE_MEM) + offset = hose->mem_offset; + + res->start = region->start + offset; + res->end = region->end + offset; +} + +int pcibios_enable_device(struct pci_dev *dev, int mask) +{ + u16 cmd, old_cmd; + int idx; + struct resource *r; + + pci_read_config_word(dev, PCI_COMMAND, &cmd); + old_cmd = cmd; + for (idx=0; idx < PCI_NUM_RESOURCES; idx++) { + /* Only set up the requested stuff */ + if (!(mask & (1<resource[idx]; + if (!(r->flags & (IORESOURCE_IO | IORESOURCE_MEM))) + continue; + if ((idx == PCI_ROM_RESOURCE) && + (!(r->flags & IORESOURCE_ROM_ENABLE))) + continue; + if (!r->start && r->end) { + printk(KERN_ERR "PCI: Device %s not available " + "because of resource collisions\n", + pci_name(dev)); + return -EINVAL; + } + if (r->flags & IORESOURCE_IO) + cmd |= PCI_COMMAND_IO; + if (r->flags & IORESOURCE_MEM) + cmd |= PCI_COMMAND_MEMORY; + } + if (cmd != old_cmd) { + printk("PCI: Enabling device %s (%04x -> %04x)\n", + pci_name(dev), old_cmd, cmd); + pci_write_config_word(dev, PCI_COMMAND, cmd); + } + return 0; +} + +/* + * If we set up a device for bus mastering, we need to check and set + * the latency timer as it may not be properly set. + */ +static unsigned int pcibios_max_latency = 255; + +void pcibios_set_master(struct pci_dev *dev) +{ + u8 lat; + pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat); + if (lat < 16) + lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency; + else if (lat > pcibios_max_latency) + lat = pcibios_max_latency; + else + return; + printk(KERN_INFO "PCI: Setting latency timer of device %s to %d\n", + pci_name(dev), lat); + pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); +} + +void __init pcibios_update_irq(struct pci_dev *dev, int irq) +{ + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); +} + +char * __devinit pcibios_setup(char *str) +{ + return str; +} + +int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, + enum pci_mmap_state mmap_state, int write_combine) +{ + /* + * I/O space can be accessed via normal processor loads and stores on + * this platform but for now we elect not to do this and portable + * drivers should not do this anyway. + */ + if (mmap_state == pci_mmap_io) + return -EINVAL; + + /* + * Ignore write-combine; for now only return uncached mappings. + */ + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + vma->vm_end - vma->vm_start, + vma->vm_page_prot); +} + +static void __iomem *ioport_map_pci(struct pci_dev *dev, + unsigned long port, unsigned int nr) +{ + struct pci_channel *chan = dev->sysdata; + + if (!chan->io_map_base) + chan->io_map_base = generic_io_base; + + return (void __iomem *)(chan->io_map_base + port); +} + +void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) +{ + resource_size_t start = pci_resource_start(dev, bar); + resource_size_t len = pci_resource_len(dev, bar); + unsigned long flags = pci_resource_flags(dev, bar); + + if (unlikely(!len || !start)) + return NULL; + if (maxlen && len > maxlen) + len = maxlen; + + if (flags & IORESOURCE_IO) + return ioport_map_pci(dev, start, len); + + /* + * Presently the IORESOURCE_MEM case is a bit special, most + * SH7751 style PCI controllers have PCI memory at a fixed + * location in the address space where no remapping is desired. + * With the IORESOURCE_MEM case more care has to be taken + * to inhibit page table mapping for legacy cores, but this is + * punted off to __ioremap(). + * -- PFM. + */ + if (flags & IORESOURCE_MEM) { + if (flags & IORESOURCE_CACHEABLE) + return ioremap(start, len); + + return ioremap_nocache(start, len); + } + + return NULL; +} +EXPORT_SYMBOL(pci_iomap); + +void pci_iounmap(struct pci_dev *dev, void __iomem *addr) +{ + iounmap(addr); +} +EXPORT_SYMBOL(pci_iounmap); + +#ifdef CONFIG_HOTPLUG +EXPORT_SYMBOL(pcibios_resource_to_bus); +EXPORT_SYMBOL(pcibios_bus_to_resource); +EXPORT_SYMBOL(PCIBIOS_MIN_IO); +EXPORT_SYMBOL(PCIBIOS_MIN_MEM); +#endif -- cgit v0.10.2 From 9ae5b8790037d05d32746f521af146c32089bfec Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 20 Apr 2009 10:27:58 -0400 Subject: tracing: change branch profiling to a choice selection This patch makes the branch profiling into a choice selection: None - no branch profiling likely/unlikely - only profile likely/unlikely branches all - profile all branches The all profiler will also enable the likely/unlikely branches. This does not change the way the profiler works or the dependencies between the profilers. What this patch does, is keep the branch profiling from being selected by an allyesconfig make. The branch profiler is very intrusive and it is known to break various architecture builds when selected as an allyesconfig. [ Impact: prevent branch profiler from being selected in allyesconfig ] Reported-by: Heiko Carstens Reported-by: Al Viro Reported-by: Stephen Rothwell Reported-by: Andrew Morton Signed-off-by: Steven Rostedt diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 57981d3..3ee28db 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -212,8 +212,36 @@ config BOOT_TRACER to enable this on bootup. config TRACE_BRANCH_PROFILING - bool "Trace likely/unlikely profiler" + bool select TRACING + +choice + prompt "Branch Profiling" + default BRANCH_PROFILE_NONE + help + The branch profiling is a software profiler. It will add hooks + into the C conditionals to test which path a branch takes. + + The likely/unlikely profiler only looks at the conditions that + are annotated with a likely or unlikely macro. + + The "all branch" profiler will profile every if statement in the + kernel. This profiler will also enable the likely/unlikely + profiler as well. + + Either of the above profilers add a bit of overhead to the system. + If unsure choose "No branch profiling". + +config BRANCH_PROFILE_NONE + bool "No branch profiling" + help + No branch profiling. Branch profiling adds a bit of overhead. + Only enable it if you want to analyse the branching behavior. + Otherwise keep it disabled. + +config PROFILE_ANNOTATED_BRANCHES + bool "Trace likely/unlikely profiler" + select TRACE_BRANCH_PROFILING help This tracer profiles all the the likely and unlikely macros in the kernel. It will display the results in: @@ -223,11 +251,9 @@ config TRACE_BRANCH_PROFILING Note: this will add a significant overhead, only turn this on if you need to profile the system's use of these macros. - Say N if unsure. - config PROFILE_ALL_BRANCHES bool "Profile all if conditionals" - depends on TRACE_BRANCH_PROFILING + select TRACE_BRANCH_PROFILING help This tracer profiles all branch conditions. Every if () taken in the kernel is recorded whether it hit or miss. @@ -235,11 +261,12 @@ config PROFILE_ALL_BRANCHES /debugfs/tracing/profile_branch + This option also enables the likely/unlikely profiler. + This configuration, when enabled, will impose a great overhead on the system. This should only be enabled when the system is to be analyzed - - Say N if unsure. +endchoice config TRACING_BRANCHES bool -- cgit v0.10.2 From 4ed9f0716e46bb9646f26e73f4a1b5b24db7947a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 20 Apr 2009 10:47:36 -0400 Subject: tracing: create menuconfig for tracing infrastructure During testing we often use randconfig to test various kernels. The current configuration set up does not give an easy way to disable all tracing with a single config. The case where randconfig would test all tracing disabled is very unlikely. This patch adds a config option to enable or disable all tracing. It is hooked into the tracing menu just like other submenus are done. [ Impact: allow randconfig to easily produce all traces disabled ] Reported-by: Ingo Molnar Signed-off-by: Steven Rostedt diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 3ee28db..3fa36d2 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -77,7 +77,12 @@ config TRACING_SUPPORT if TRACING_SUPPORT -menu "Tracers" +menuconfig FTRACE + bool "Tracers" + help + Enable the kernel tracing infrastructure. + +if FTRACE config FUNCTION_TRACER bool "Kernel Function Tracer" @@ -462,7 +467,7 @@ config MMIOTRACE_TEST Say N, unless you absolutely know what you are doing. -endmenu +endif # FTRACE endif # TRACING_SUPPORT -- cgit v0.10.2 From d24e473e5b2ca86d1288b9416227ccc603313d0f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 20 Apr 2009 13:32:07 +0200 Subject: perf_counter: copy in Git's top Makefile We'd like to have a similar user-space structure as Git has, for the perfcounter tools - so copy in Git's toplevel makefile as-is. We'll strip it down in subsequent commits to make it fit the perfcounters code. The Git version used: 66996ec: Sync with 1.6.2.4 Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile index 1dd37ee..6e0838b 100644 --- a/Documentation/perf_counter/Makefile +++ b/Documentation/perf_counter/Makefile @@ -1,18 +1,1731 @@ -BINS = kerneltop perfstat perf-record perf-report +# The default target of this Makefile is... +all:: -all: $(BINS) +# Define V=1 to have a more verbose compile. +# +# Define SNPRINTF_RETURNS_BOGUS if your are on a system which snprintf() +# or vsnprintf() return -1 instead of number of characters which would +# have been written to the final string if enough space had been available. +# +# Define FREAD_READS_DIRECTORIES if your are on a system which succeeds +# when attempting to read from an fopen'ed directory. +# +# Define NO_OPENSSL environment variable if you do not have OpenSSL. +# This also implies MOZILLA_SHA1. +# +# Define NO_CURL if you do not have libcurl installed. git-http-pull and +# git-http-push are not built, and you cannot use http:// and https:// +# transports. +# +# Define CURLDIR=/foo/bar if your curl header and library files are in +# /foo/bar/include and /foo/bar/lib directories. +# +# Define NO_EXPAT if you do not have expat installed. git-http-push is +# not built, and you cannot push using http:// and https:// transports. +# +# Define EXPATDIR=/foo/bar if your expat header and library files are in +# /foo/bar/include and /foo/bar/lib directories. +# +# Define NO_D_INO_IN_DIRENT if you don't have d_ino in your struct dirent. +# +# Define NO_D_TYPE_IN_DIRENT if your platform defines DT_UNKNOWN but lacks +# d_type in struct dirent (latest Cygwin -- will be fixed soonish). +# +# Define NO_C99_FORMAT if your formatted IO functions (printf/scanf et.al.) +# do not support the 'size specifiers' introduced by C99, namely ll, hh, +# j, z, t. (representing long long int, char, intmax_t, size_t, ptrdiff_t). +# some C compilers supported these specifiers prior to C99 as an extension. +# +# Define NO_STRCASESTR if you don't have strcasestr. +# +# Define NO_MEMMEM if you don't have memmem. +# +# Define NO_STRLCPY if you don't have strlcpy. +# +# Define NO_STRTOUMAX if you don't have strtoumax in the C library. +# If your compiler also does not support long long or does not have +# strtoull, define NO_STRTOULL. +# +# Define NO_SETENV if you don't have setenv in the C library. +# +# Define NO_UNSETENV if you don't have unsetenv in the C library. +# +# Define NO_MKDTEMP if you don't have mkdtemp in the C library. +# +# Define NO_SYS_SELECT_H if you don't have sys/select.h. +# +# Define NO_SYMLINK_HEAD if you never want .git/HEAD to be a symbolic link. +# Enable it on Windows. By default, symrefs are still used. +# +# Define NO_SVN_TESTS if you want to skip time-consuming SVN interoperability +# tests. These tests take up a significant amount of the total test time +# but are not needed unless you plan to talk to SVN repos. +# +# Define NO_FINK if you are building on Darwin/Mac OS X, have Fink +# installed in /sw, but don't want GIT to link against any libraries +# installed there. If defined you may specify your own (or Fink's) +# include directories and library directories by defining CFLAGS +# and LDFLAGS appropriately. +# +# Define NO_DARWIN_PORTS if you are building on Darwin/Mac OS X, +# have DarwinPorts installed in /opt/local, but don't want GIT to +# link against any libraries installed there. If defined you may +# specify your own (or DarwinPort's) include directories and +# library directories by defining CFLAGS and LDFLAGS appropriately. +# +# Define PPC_SHA1 environment variable when running make to make use of +# a bundled SHA1 routine optimized for PowerPC. +# +# Define ARM_SHA1 environment variable when running make to make use of +# a bundled SHA1 routine optimized for ARM. +# +# Define MOZILLA_SHA1 environment variable when running make to make use of +# a bundled SHA1 routine coming from Mozilla. It is GPL'd and should be fast +# on non-x86 architectures (e.g. PowerPC), while the OpenSSL version (default +# choice) has very fast version optimized for i586. +# +# Define NEEDS_SSL_WITH_CRYPTO if you need -lcrypto with -lssl (Darwin). +# +# Define NEEDS_LIBICONV if linking with libc is not enough (Darwin). +# +# Define NEEDS_SOCKET if linking with libc is not enough (SunOS, +# Patrick Mauritz). +# +# Define NO_MMAP if you want to avoid mmap. +# +# Define NO_PTHREADS if you do not have or do not want to use Pthreads. +# +# Define NO_PREAD if you have a problem with pread() system call (e.g. +# cygwin.dll before v1.5.22). +# +# Define NO_FAST_WORKING_DIRECTORY if accessing objects in pack files is +# generally faster on your platform than accessing the working directory. +# +# Define NO_TRUSTABLE_FILEMODE if your filesystem may claim to support +# the executable mode bit, but doesn't really do so. +# +# Define NO_IPV6 if you lack IPv6 support and getaddrinfo(). +# +# Define NO_SOCKADDR_STORAGE if your platform does not have struct +# sockaddr_storage. +# +# Define NO_ICONV if your libc does not properly support iconv. +# +# Define OLD_ICONV if your library has an old iconv(), where the second +# (input buffer pointer) parameter is declared with type (const char **). +# +# Define NO_DEFLATE_BOUND if your zlib does not have deflateBound. +# +# Define NO_R_TO_GCC_LINKER if your gcc does not like "-R/path/lib" +# that tells runtime paths to dynamic libraries; +# "-Wl,-rpath=/path/lib" is used instead. +# +# Define USE_NSEC below if you want git to care about sub-second file mtimes +# and ctimes. Note that you need recent glibc (at least 2.2.4) for this, and +# it will BREAK YOUR LOCAL DIFFS! show-diff and anything using it will likely +# randomly break unless your underlying filesystem supports those sub-second +# times (my ext3 doesn't). +# +# Define USE_ST_TIMESPEC if your "struct stat" uses "st_ctimespec" instead of +# "st_ctim" +# +# Define NO_NSEC if your "struct stat" does not have "st_ctim.tv_nsec" +# available. This automatically turns USE_NSEC off. +# +# Define USE_STDEV below if you want git to care about the underlying device +# change being considered an inode change from the update-index perspective. +# +# Define NO_ST_BLOCKS_IN_STRUCT_STAT if your platform does not have st_blocks +# field that counts the on-disk footprint in 512-byte blocks. +# +# Define ASCIIDOC8 if you want to format documentation with AsciiDoc 8 +# +# Define DOCBOOK_XSL_172 if you want to format man pages with DocBook XSL v1.72. +# +# Define NO_PERL_MAKEMAKER if you cannot use Makefiles generated by perl's +# MakeMaker (e.g. using ActiveState under Cygwin). +# +# Define NO_PERL if you do not want Perl scripts or libraries at all. +# +# Define NO_TCLTK if you do not want Tcl/Tk GUI. +# +# The TCL_PATH variable governs the location of the Tcl interpreter +# used to optimize git-gui for your system. Only used if NO_TCLTK +# is not set. Defaults to the bare 'tclsh'. +# +# The TCLTK_PATH variable governs the location of the Tcl/Tk interpreter. +# If not set it defaults to the bare 'wish'. If it is set to the empty +# string then NO_TCLTK will be forced (this is used by configure script). +# +# Define THREADED_DELTA_SEARCH if you have pthreads and wish to exploit +# parallel delta searching when packing objects. +# +# Define INTERNAL_QSORT to use Git's implementation of qsort(), which +# is a simplified version of the merge sort used in glibc. This is +# recommended if Git triggers O(n^2) behavior in your platform's qsort(). +# +# Define NO_EXTERNAL_GREP if you don't want "git grep" to ever call +# your external grep (e.g., if your system lacks grep, if its grep is +# broken, or spawning external process is slower than built-in grep git has). -kerneltop: kerneltop.c ../../include/linux/perf_counter.h - cc -O6 -Wall -lrt -o $@ $< +GIT-VERSION-FILE: .FORCE-GIT-VERSION-FILE + @$(SHELL_PATH) ./GIT-VERSION-GEN +-include GIT-VERSION-FILE -perf-record: perf-record.c ../../include/linux/perf_counter.h - cc -O6 -Wall -lrt -o $@ $< +uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not') +uname_M := $(shell sh -c 'uname -m 2>/dev/null || echo not') +uname_O := $(shell sh -c 'uname -o 2>/dev/null || echo not') +uname_R := $(shell sh -c 'uname -r 2>/dev/null || echo not') +uname_P := $(shell sh -c 'uname -p 2>/dev/null || echo not') +uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not') -perf-report: perf-report.cc ../../include/linux/perf_counter.h - g++ -O6 -Wall -lrt -o $@ $< +# CFLAGS and LDFLAGS are for the users to override from the command line. -perfstat: kerneltop - ln -sf kerneltop perfstat +CFLAGS = -g -O2 -Wall +LDFLAGS = +ALL_CFLAGS = $(CFLAGS) +ALL_LDFLAGS = $(LDFLAGS) +STRIP ?= strip + +# Among the variables below, these: +# gitexecdir +# template_dir +# mandir +# infodir +# htmldir +# ETC_GITCONFIG (but not sysconfdir) +# can be specified as a relative path some/where/else; +# this is interpreted as relative to $(prefix) and "git" at +# runtime figures out where they are based on the path to the executable. +# This can help installing the suite in a relocatable way. + +prefix = $(HOME) +bindir_relative = bin +bindir = $(prefix)/$(bindir_relative) +mandir = share/man +infodir = share/info +gitexecdir = libexec/git-core +sharedir = $(prefix)/share +template_dir = share/git-core/templates +htmldir = share/doc/git-doc +ifeq ($(prefix),/usr) +sysconfdir = /etc +ETC_GITCONFIG = $(sysconfdir)/gitconfig +else +sysconfdir = $(prefix)/etc +ETC_GITCONFIG = etc/gitconfig +endif +lib = lib +# DESTDIR= + +# default configuration for gitweb +GITWEB_CONFIG = gitweb_config.perl +GITWEB_CONFIG_SYSTEM = /etc/gitweb.conf +GITWEB_HOME_LINK_STR = projects +GITWEB_SITENAME = +GITWEB_PROJECTROOT = /pub/git +GITWEB_PROJECT_MAXDEPTH = 2007 +GITWEB_EXPORT_OK = +GITWEB_STRICT_EXPORT = +GITWEB_BASE_URL = +GITWEB_LIST = +GITWEB_HOMETEXT = indextext.html +GITWEB_CSS = gitweb.css +GITWEB_LOGO = git-logo.png +GITWEB_FAVICON = git-favicon.png +GITWEB_SITE_HEADER = +GITWEB_SITE_FOOTER = + +export prefix bindir sharedir sysconfdir + +CC = gcc +AR = ar +RM = rm -f +TAR = tar +FIND = find +INSTALL = install +RPMBUILD = rpmbuild +TCL_PATH = tclsh +TCLTK_PATH = wish +PTHREAD_LIBS = -lpthread + +export TCL_PATH TCLTK_PATH + +# sparse is architecture-neutral, which means that we need to tell it +# explicitly what architecture to check for. Fix this up for yours.. +SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__ + + + +### --- END CONFIGURATION SECTION --- + +# Those must not be GNU-specific; they are shared with perl/ which may +# be built by a different compiler. (Note that this is an artifact now +# but it still might be nice to keep that distinction.) +BASIC_CFLAGS = +BASIC_LDFLAGS = + +# Guard against environment variables +BUILTIN_OBJS = +BUILT_INS = +COMPAT_CFLAGS = +COMPAT_OBJS = +LIB_H = +LIB_OBJS = +PROGRAMS = +SCRIPT_PERL = +SCRIPT_SH = +TEST_PROGRAMS = + +SCRIPT_SH += git-am.sh +SCRIPT_SH += git-bisect.sh +SCRIPT_SH += git-difftool--helper.sh +SCRIPT_SH += git-filter-branch.sh +SCRIPT_SH += git-lost-found.sh +SCRIPT_SH += git-merge-octopus.sh +SCRIPT_SH += git-merge-one-file.sh +SCRIPT_SH += git-merge-resolve.sh +SCRIPT_SH += git-mergetool.sh +SCRIPT_SH += git-mergetool--lib.sh +SCRIPT_SH += git-parse-remote.sh +SCRIPT_SH += git-pull.sh +SCRIPT_SH += git-quiltimport.sh +SCRIPT_SH += git-rebase--interactive.sh +SCRIPT_SH += git-rebase.sh +SCRIPT_SH += git-repack.sh +SCRIPT_SH += git-request-pull.sh +SCRIPT_SH += git-sh-setup.sh +SCRIPT_SH += git-stash.sh +SCRIPT_SH += git-submodule.sh +SCRIPT_SH += git-web--browse.sh + +SCRIPT_PERL += git-add--interactive.perl +SCRIPT_PERL += git-difftool.perl +SCRIPT_PERL += git-archimport.perl +SCRIPT_PERL += git-cvsexportcommit.perl +SCRIPT_PERL += git-cvsimport.perl +SCRIPT_PERL += git-cvsserver.perl +SCRIPT_PERL += git-relink.perl +SCRIPT_PERL += git-send-email.perl +SCRIPT_PERL += git-svn.perl + +SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH)) \ + $(patsubst %.perl,%,$(SCRIPT_PERL)) \ + git-instaweb + +# Empty... +EXTRA_PROGRAMS = + +# ... and all the rest that could be moved out of bindir to gitexecdir +PROGRAMS += $(EXTRA_PROGRAMS) +PROGRAMS += git-fast-import$X +PROGRAMS += git-hash-object$X +PROGRAMS += git-index-pack$X +PROGRAMS += git-merge-index$X +PROGRAMS += git-merge-tree$X +PROGRAMS += git-mktag$X +PROGRAMS += git-mktree$X +PROGRAMS += git-pack-redundant$X +PROGRAMS += git-patch-id$X +PROGRAMS += git-shell$X +PROGRAMS += git-show-index$X +PROGRAMS += git-unpack-file$X +PROGRAMS += git-update-server-info$X +PROGRAMS += git-upload-pack$X +PROGRAMS += git-var$X + +# List built-in command $C whose implementation cmd_$C() is not in +# builtin-$C.o but is linked in as part of some other command. +BUILT_INS += $(patsubst builtin-%.o,git-%$X,$(BUILTIN_OBJS)) + +BUILT_INS += git-cherry$X +BUILT_INS += git-cherry-pick$X +BUILT_INS += git-format-patch$X +BUILT_INS += git-fsck-objects$X +BUILT_INS += git-get-tar-commit-id$X +BUILT_INS += git-init$X +BUILT_INS += git-merge-subtree$X +BUILT_INS += git-peek-remote$X +BUILT_INS += git-repo-config$X +BUILT_INS += git-show$X +BUILT_INS += git-stage$X +BUILT_INS += git-status$X +BUILT_INS += git-whatchanged$X + +# what 'all' will build and 'install' will install, in gitexecdir +ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS) + +# what 'all' will build but not install in gitexecdir +OTHER_PROGRAMS = git$X +ifndef NO_PERL +OTHER_PROGRAMS += gitweb/gitweb.cgi +endif + +# Set paths to tools early so that they can be used for version tests. +ifndef SHELL_PATH + SHELL_PATH = /bin/sh +endif +ifndef PERL_PATH + PERL_PATH = /usr/bin/perl +endif + +export PERL_PATH + +LIB_FILE=libgit.a +XDIFF_LIB=xdiff/lib.a + +LIB_H += archive.h +LIB_H += attr.h +LIB_H += blob.h +LIB_H += builtin.h +LIB_H += cache.h +LIB_H += cache-tree.h +LIB_H += commit.h +LIB_H += compat/cygwin.h +LIB_H += compat/mingw.h +LIB_H += csum-file.h +LIB_H += decorate.h +LIB_H += delta.h +LIB_H += diffcore.h +LIB_H += diff.h +LIB_H += dir.h +LIB_H += fsck.h +LIB_H += git-compat-util.h +LIB_H += graph.h +LIB_H += grep.h +LIB_H += hash.h +LIB_H += help.h +LIB_H += levenshtein.h +LIB_H += list-objects.h +LIB_H += ll-merge.h +LIB_H += log-tree.h +LIB_H += mailmap.h +LIB_H += merge-recursive.h +LIB_H += object.h +LIB_H += pack.h +LIB_H += pack-refs.h +LIB_H += pack-revindex.h +LIB_H += parse-options.h +LIB_H += patch-ids.h +LIB_H += pkt-line.h +LIB_H += progress.h +LIB_H += quote.h +LIB_H += reflog-walk.h +LIB_H += refs.h +LIB_H += remote.h +LIB_H += rerere.h +LIB_H += revision.h +LIB_H += run-command.h +LIB_H += sha1-lookup.h +LIB_H += sideband.h +LIB_H += sigchain.h +LIB_H += strbuf.h +LIB_H += string-list.h +LIB_H += tag.h +LIB_H += transport.h +LIB_H += tree.h +LIB_H += tree-walk.h +LIB_H += unpack-trees.h +LIB_H += userdiff.h +LIB_H += utf8.h +LIB_H += wt-status.h + +LIB_OBJS += abspath.o +LIB_OBJS += alias.o +LIB_OBJS += alloc.o +LIB_OBJS += archive.o +LIB_OBJS += archive-tar.o +LIB_OBJS += archive-zip.o +LIB_OBJS += attr.o +LIB_OBJS += base85.o +LIB_OBJS += bisect.o +LIB_OBJS += blob.o +LIB_OBJS += branch.o +LIB_OBJS += bundle.o +LIB_OBJS += cache-tree.o +LIB_OBJS += color.o +LIB_OBJS += combine-diff.o +LIB_OBJS += commit.o +LIB_OBJS += config.o +LIB_OBJS += connect.o +LIB_OBJS += convert.o +LIB_OBJS += copy.o +LIB_OBJS += csum-file.o +LIB_OBJS += ctype.o +LIB_OBJS += date.o +LIB_OBJS += decorate.o +LIB_OBJS += diffcore-break.o +LIB_OBJS += diffcore-delta.o +LIB_OBJS += diffcore-order.o +LIB_OBJS += diffcore-pickaxe.o +LIB_OBJS += diffcore-rename.o +LIB_OBJS += diff-delta.o +LIB_OBJS += diff-lib.o +LIB_OBJS += diff-no-index.o +LIB_OBJS += diff.o +LIB_OBJS += dir.o +LIB_OBJS += editor.o +LIB_OBJS += entry.o +LIB_OBJS += environment.o +LIB_OBJS += exec_cmd.o +LIB_OBJS += fsck.o +LIB_OBJS += graph.o +LIB_OBJS += grep.o +LIB_OBJS += hash.o +LIB_OBJS += help.o +LIB_OBJS += ident.o +LIB_OBJS += levenshtein.o +LIB_OBJS += list-objects.o +LIB_OBJS += ll-merge.o +LIB_OBJS += lockfile.o +LIB_OBJS += log-tree.o +LIB_OBJS += mailmap.o +LIB_OBJS += match-trees.o +LIB_OBJS += merge-file.o +LIB_OBJS += merge-recursive.o +LIB_OBJS += name-hash.o +LIB_OBJS += object.o +LIB_OBJS += pack-check.o +LIB_OBJS += pack-refs.o +LIB_OBJS += pack-revindex.o +LIB_OBJS += pack-write.o +LIB_OBJS += pager.o +LIB_OBJS += parse-options.o +LIB_OBJS += patch-delta.o +LIB_OBJS += patch-ids.o +LIB_OBJS += path.o +LIB_OBJS += pkt-line.o +LIB_OBJS += preload-index.o +LIB_OBJS += pretty.o +LIB_OBJS += progress.o +LIB_OBJS += quote.o +LIB_OBJS += reachable.o +LIB_OBJS += read-cache.o +LIB_OBJS += reflog-walk.o +LIB_OBJS += refs.o +LIB_OBJS += remote.o +LIB_OBJS += rerere.o +LIB_OBJS += revision.o +LIB_OBJS += run-command.o +LIB_OBJS += server-info.o +LIB_OBJS += setup.o +LIB_OBJS += sha1-lookup.o +LIB_OBJS += sha1_file.o +LIB_OBJS += sha1_name.o +LIB_OBJS += shallow.o +LIB_OBJS += sideband.o +LIB_OBJS += sigchain.o +LIB_OBJS += strbuf.o +LIB_OBJS += string-list.o +LIB_OBJS += symlinks.o +LIB_OBJS += tag.o +LIB_OBJS += trace.o +LIB_OBJS += transport.o +LIB_OBJS += tree-diff.o +LIB_OBJS += tree.o +LIB_OBJS += tree-walk.o +LIB_OBJS += unpack-trees.o +LIB_OBJS += usage.o +LIB_OBJS += userdiff.o +LIB_OBJS += utf8.o +LIB_OBJS += walker.o +LIB_OBJS += wrapper.o +LIB_OBJS += write_or_die.o +LIB_OBJS += ws.o +LIB_OBJS += wt-status.o +LIB_OBJS += xdiff-interface.o + +BUILTIN_OBJS += builtin-add.o +BUILTIN_OBJS += builtin-annotate.o +BUILTIN_OBJS += builtin-apply.o +BUILTIN_OBJS += builtin-archive.o +BUILTIN_OBJS += builtin-bisect--helper.o +BUILTIN_OBJS += builtin-blame.o +BUILTIN_OBJS += builtin-branch.o +BUILTIN_OBJS += builtin-bundle.o +BUILTIN_OBJS += builtin-cat-file.o +BUILTIN_OBJS += builtin-check-attr.o +BUILTIN_OBJS += builtin-check-ref-format.o +BUILTIN_OBJS += builtin-checkout-index.o +BUILTIN_OBJS += builtin-checkout.o +BUILTIN_OBJS += builtin-clean.o +BUILTIN_OBJS += builtin-clone.o +BUILTIN_OBJS += builtin-commit-tree.o +BUILTIN_OBJS += builtin-commit.o +BUILTIN_OBJS += builtin-config.o +BUILTIN_OBJS += builtin-count-objects.o +BUILTIN_OBJS += builtin-describe.o +BUILTIN_OBJS += builtin-diff-files.o +BUILTIN_OBJS += builtin-diff-index.o +BUILTIN_OBJS += builtin-diff-tree.o +BUILTIN_OBJS += builtin-diff.o +BUILTIN_OBJS += builtin-fast-export.o +BUILTIN_OBJS += builtin-fetch--tool.o +BUILTIN_OBJS += builtin-fetch-pack.o +BUILTIN_OBJS += builtin-fetch.o +BUILTIN_OBJS += builtin-fmt-merge-msg.o +BUILTIN_OBJS += builtin-for-each-ref.o +BUILTIN_OBJS += builtin-fsck.o +BUILTIN_OBJS += builtin-gc.o +BUILTIN_OBJS += builtin-grep.o +BUILTIN_OBJS += builtin-help.o +BUILTIN_OBJS += builtin-init-db.o +BUILTIN_OBJS += builtin-log.o +BUILTIN_OBJS += builtin-ls-files.o +BUILTIN_OBJS += builtin-ls-remote.o +BUILTIN_OBJS += builtin-ls-tree.o +BUILTIN_OBJS += builtin-mailinfo.o +BUILTIN_OBJS += builtin-mailsplit.o +BUILTIN_OBJS += builtin-merge.o +BUILTIN_OBJS += builtin-merge-base.o +BUILTIN_OBJS += builtin-merge-file.o +BUILTIN_OBJS += builtin-merge-ours.o +BUILTIN_OBJS += builtin-merge-recursive.o +BUILTIN_OBJS += builtin-mv.o +BUILTIN_OBJS += builtin-name-rev.o +BUILTIN_OBJS += builtin-pack-objects.o +BUILTIN_OBJS += builtin-pack-refs.o +BUILTIN_OBJS += builtin-prune-packed.o +BUILTIN_OBJS += builtin-prune.o +BUILTIN_OBJS += builtin-push.o +BUILTIN_OBJS += builtin-read-tree.o +BUILTIN_OBJS += builtin-receive-pack.o +BUILTIN_OBJS += builtin-reflog.o +BUILTIN_OBJS += builtin-remote.o +BUILTIN_OBJS += builtin-rerere.o +BUILTIN_OBJS += builtin-reset.o +BUILTIN_OBJS += builtin-rev-list.o +BUILTIN_OBJS += builtin-rev-parse.o +BUILTIN_OBJS += builtin-revert.o +BUILTIN_OBJS += builtin-rm.o +BUILTIN_OBJS += builtin-send-pack.o +BUILTIN_OBJS += builtin-shortlog.o +BUILTIN_OBJS += builtin-show-branch.o +BUILTIN_OBJS += builtin-show-ref.o +BUILTIN_OBJS += builtin-stripspace.o +BUILTIN_OBJS += builtin-symbolic-ref.o +BUILTIN_OBJS += builtin-tag.o +BUILTIN_OBJS += builtin-tar-tree.o +BUILTIN_OBJS += builtin-unpack-objects.o +BUILTIN_OBJS += builtin-update-index.o +BUILTIN_OBJS += builtin-update-ref.o +BUILTIN_OBJS += builtin-upload-archive.o +BUILTIN_OBJS += builtin-verify-pack.o +BUILTIN_OBJS += builtin-verify-tag.o +BUILTIN_OBJS += builtin-write-tree.o + +GITLIBS = $(LIB_FILE) $(XDIFF_LIB) +EXTLIBS = + +# +# Platform specific tweaks +# + +# We choose to avoid "if .. else if .. else .. endif endif" +# because maintaining the nesting to match is a pain. If +# we had "elif" things would have been much nicer... + +ifeq ($(uname_S),Linux) + NO_STRLCPY = YesPlease + THREADED_DELTA_SEARCH = YesPlease +endif +ifeq ($(uname_S),GNU/kFreeBSD) + NO_STRLCPY = YesPlease + THREADED_DELTA_SEARCH = YesPlease +endif +ifeq ($(uname_S),UnixWare) + CC = cc + NEEDS_SOCKET = YesPlease + NEEDS_NSL = YesPlease + NEEDS_SSL_WITH_CRYPTO = YesPlease + NEEDS_LIBICONV = YesPlease + SHELL_PATH = /usr/local/bin/bash + NO_IPV6 = YesPlease + NO_HSTRERROR = YesPlease + BASIC_CFLAGS += -Kthread + BASIC_CFLAGS += -I/usr/local/include + BASIC_LDFLAGS += -L/usr/local/lib + INSTALL = ginstall + TAR = gtar + NO_STRCASESTR = YesPlease + NO_MEMMEM = YesPlease +endif +ifeq ($(uname_S),SCO_SV) + ifeq ($(uname_R),3.2) + CFLAGS = -O2 + endif + ifeq ($(uname_R),5) + CC = cc + BASIC_CFLAGS += -Kthread + endif + NEEDS_SOCKET = YesPlease + NEEDS_NSL = YesPlease + NEEDS_SSL_WITH_CRYPTO = YesPlease + NEEDS_LIBICONV = YesPlease + SHELL_PATH = /usr/bin/bash + NO_IPV6 = YesPlease + NO_HSTRERROR = YesPlease + BASIC_CFLAGS += -I/usr/local/include + BASIC_LDFLAGS += -L/usr/local/lib + NO_STRCASESTR = YesPlease + NO_MEMMEM = YesPlease + INSTALL = ginstall + TAR = gtar +endif +ifeq ($(uname_S),Darwin) + NEEDS_SSL_WITH_CRYPTO = YesPlease + NEEDS_LIBICONV = YesPlease + ifeq ($(shell expr "$(uname_R)" : '[15678]\.'),2) + OLD_ICONV = UnfortunatelyYes + endif + ifeq ($(shell expr "$(uname_R)" : '[15]\.'),2) + NO_STRLCPY = YesPlease + endif + NO_MEMMEM = YesPlease + THREADED_DELTA_SEARCH = YesPlease + USE_ST_TIMESPEC = YesPlease +endif +ifeq ($(uname_S),SunOS) + NEEDS_SOCKET = YesPlease + NEEDS_NSL = YesPlease + SHELL_PATH = /bin/bash + NO_STRCASESTR = YesPlease + NO_MEMMEM = YesPlease + NO_HSTRERROR = YesPlease + NO_MKDTEMP = YesPlease + OLD_ICONV = UnfortunatelyYes + ifeq ($(uname_R),5.8) + NO_UNSETENV = YesPlease + NO_SETENV = YesPlease + NO_C99_FORMAT = YesPlease + NO_STRTOUMAX = YesPlease + endif + ifeq ($(uname_R),5.9) + NO_UNSETENV = YesPlease + NO_SETENV = YesPlease + NO_C99_FORMAT = YesPlease + NO_STRTOUMAX = YesPlease + endif + INSTALL = ginstall + TAR = gtar + BASIC_CFLAGS += -D__EXTENSIONS__ +endif +ifeq ($(uname_O),Cygwin) + NO_D_TYPE_IN_DIRENT = YesPlease + NO_D_INO_IN_DIRENT = YesPlease + NO_STRCASESTR = YesPlease + NO_MEMMEM = YesPlease + NO_SYMLINK_HEAD = YesPlease + NEEDS_LIBICONV = YesPlease + NO_FAST_WORKING_DIRECTORY = UnfortunatelyYes + NO_TRUSTABLE_FILEMODE = UnfortunatelyYes + OLD_ICONV = UnfortunatelyYes + # There are conflicting reports about this. + # On some boxes NO_MMAP is needed, and not so elsewhere. + # Try commenting this out if you suspect MMAP is more efficient + NO_MMAP = YesPlease + NO_IPV6 = YesPlease + X = .exe +endif +ifeq ($(uname_S),FreeBSD) + NEEDS_LIBICONV = YesPlease + NO_MEMMEM = YesPlease + BASIC_CFLAGS += -I/usr/local/include + BASIC_LDFLAGS += -L/usr/local/lib + DIR_HAS_BSD_GROUP_SEMANTICS = YesPlease + USE_ST_TIMESPEC = YesPlease + THREADED_DELTA_SEARCH = YesPlease + ifeq ($(shell expr "$(uname_R)" : '4\.'),2) + PTHREAD_LIBS = -pthread + NO_UINTMAX_T = YesPlease + NO_STRTOUMAX = YesPlease + endif +endif +ifeq ($(uname_S),OpenBSD) + NO_STRCASESTR = YesPlease + NO_MEMMEM = YesPlease + NEEDS_LIBICONV = YesPlease + BASIC_CFLAGS += -I/usr/local/include + BASIC_LDFLAGS += -L/usr/local/lib + THREADED_DELTA_SEARCH = YesPlease +endif +ifeq ($(uname_S),NetBSD) + ifeq ($(shell expr "$(uname_R)" : '[01]\.'),2) + NEEDS_LIBICONV = YesPlease + endif + BASIC_CFLAGS += -I/usr/pkg/include + BASIC_LDFLAGS += -L/usr/pkg/lib $(CC_LD_DYNPATH)/usr/pkg/lib + THREADED_DELTA_SEARCH = YesPlease +endif +ifeq ($(uname_S),AIX) + NO_STRCASESTR=YesPlease + NO_MEMMEM = YesPlease + NO_MKDTEMP = YesPlease + NO_STRLCPY = YesPlease + NO_NSEC = YesPlease + FREAD_READS_DIRECTORIES = UnfortunatelyYes + INTERNAL_QSORT = UnfortunatelyYes + NEEDS_LIBICONV=YesPlease + BASIC_CFLAGS += -D_LARGE_FILES + ifneq ($(shell expr "$(uname_V)" : '[1234]'),1) + THREADED_DELTA_SEARCH = YesPlease + else + NO_PTHREADS = YesPlease + endif +endif +ifeq ($(uname_S),GNU) + # GNU/Hurd + NO_STRLCPY=YesPlease +endif +ifeq ($(uname_S),IRIX64) + NO_IPV6=YesPlease + NO_SETENV=YesPlease + NO_STRCASESTR=YesPlease + NO_MEMMEM = YesPlease + NO_STRLCPY = YesPlease + NO_SOCKADDR_STORAGE=YesPlease + SHELL_PATH=/usr/gnu/bin/bash + BASIC_CFLAGS += -DPATH_MAX=1024 + # for now, build 32-bit version + BASIC_LDFLAGS += -L/usr/lib32 +endif +ifeq ($(uname_S),HP-UX) + NO_IPV6=YesPlease + NO_SETENV=YesPlease + NO_STRCASESTR=YesPlease + NO_MEMMEM = YesPlease + NO_STRLCPY = YesPlease + NO_MKDTEMP = YesPlease + NO_UNSETENV = YesPlease + NO_HSTRERROR = YesPlease + NO_SYS_SELECT_H = YesPlease + SNPRINTF_RETURNS_BOGUS = YesPlease +endif +ifneq (,$(findstring CYGWIN,$(uname_S))) + COMPAT_OBJS += compat/cygwin.o +endif +ifneq (,$(findstring MINGW,$(uname_S))) + NO_PREAD = YesPlease + NO_OPENSSL = YesPlease + NO_CURL = YesPlease + NO_SYMLINK_HEAD = YesPlease + NO_IPV6 = YesPlease + NO_SETENV = YesPlease + NO_UNSETENV = YesPlease + NO_STRCASESTR = YesPlease + NO_STRLCPY = YesPlease + NO_MEMMEM = YesPlease + NO_PTHREADS = YesPlease + NEEDS_LIBICONV = YesPlease + OLD_ICONV = YesPlease + NO_C99_FORMAT = YesPlease + NO_STRTOUMAX = YesPlease + NO_MKDTEMP = YesPlease + SNPRINTF_RETURNS_BOGUS = YesPlease + NO_SVN_TESTS = YesPlease + NO_PERL_MAKEMAKER = YesPlease + RUNTIME_PREFIX = YesPlease + NO_POSIX_ONLY_PROGRAMS = YesPlease + NO_ST_BLOCKS_IN_STRUCT_STAT = YesPlease + NO_NSEC = YesPlease + USE_WIN32_MMAP = YesPlease + COMPAT_CFLAGS += -D__USE_MINGW_ACCESS -DNOGDI -Icompat -Icompat/regex -Icompat/fnmatch + COMPAT_CFLAGS += -DSNPRINTF_SIZE_CORR=1 + COMPAT_CFLAGS += -DSTRIP_EXTENSION=\".exe\" + COMPAT_OBJS += compat/mingw.o compat/fnmatch/fnmatch.o compat/regex/regex.o compat/winansi.o + EXTLIBS += -lws2_32 + X = .exe +endif +ifneq (,$(findstring arm,$(uname_M))) + ARM_SHA1 = YesPlease +endif + +-include config.mak.autogen +-include config.mak + +ifeq ($(uname_S),Darwin) + ifndef NO_FINK + ifeq ($(shell test -d /sw/lib && echo y),y) + BASIC_CFLAGS += -I/sw/include + BASIC_LDFLAGS += -L/sw/lib + endif + endif + ifndef NO_DARWIN_PORTS + ifeq ($(shell test -d /opt/local/lib && echo y),y) + BASIC_CFLAGS += -I/opt/local/include + BASIC_LDFLAGS += -L/opt/local/lib + endif + endif + PTHREAD_LIBS = +endif + +ifndef CC_LD_DYNPATH + ifdef NO_R_TO_GCC_LINKER + # Some gcc does not accept and pass -R to the linker to specify + # the runtime dynamic library path. + CC_LD_DYNPATH = -Wl,-rpath, + else + CC_LD_DYNPATH = -R + endif +endif + +ifdef NO_CURL + BASIC_CFLAGS += -DNO_CURL +else + ifdef CURLDIR + # Try "-Wl,-rpath=$(CURLDIR)/$(lib)" in such a case. + BASIC_CFLAGS += -I$(CURLDIR)/include + CURL_LIBCURL = -L$(CURLDIR)/$(lib) $(CC_LD_DYNPATH)$(CURLDIR)/$(lib) -lcurl + else + CURL_LIBCURL = -lcurl + endif + BUILTIN_OBJS += builtin-http-fetch.o + EXTLIBS += $(CURL_LIBCURL) + LIB_OBJS += http.o http-walker.o + curl_check := $(shell (echo 070908; curl-config --vernum) | sort -r | sed -ne 2p) + ifeq "$(curl_check)" "070908" + ifndef NO_EXPAT + PROGRAMS += git-http-push$X + endif + endif + ifndef NO_EXPAT + ifdef EXPATDIR + BASIC_CFLAGS += -I$(EXPATDIR)/include + EXPAT_LIBEXPAT = -L$(EXPATDIR)/$(lib) $(CC_LD_DYNPATH)$(EXPATDIR)/$(lib) -lexpat + else + EXPAT_LIBEXPAT = -lexpat + endif + endif +endif + +ifdef ZLIB_PATH + BASIC_CFLAGS += -I$(ZLIB_PATH)/include + EXTLIBS += -L$(ZLIB_PATH)/$(lib) $(CC_LD_DYNPATH)$(ZLIB_PATH)/$(lib) +endif +EXTLIBS += -lz + +ifndef NO_POSIX_ONLY_PROGRAMS + PROGRAMS += git-daemon$X + PROGRAMS += git-imap-send$X +endif +ifndef NO_OPENSSL + OPENSSL_LIBSSL = -lssl + ifdef OPENSSLDIR + BASIC_CFLAGS += -I$(OPENSSLDIR)/include + OPENSSL_LINK = -L$(OPENSSLDIR)/$(lib) $(CC_LD_DYNPATH)$(OPENSSLDIR)/$(lib) + else + OPENSSL_LINK = + endif +else + BASIC_CFLAGS += -DNO_OPENSSL + MOZILLA_SHA1 = 1 + OPENSSL_LIBSSL = +endif +ifdef NEEDS_SSL_WITH_CRYPTO + LIB_4_CRYPTO = $(OPENSSL_LINK) -lcrypto -lssl +else + LIB_4_CRYPTO = $(OPENSSL_LINK) -lcrypto +endif +ifdef NEEDS_LIBICONV + ifdef ICONVDIR + BASIC_CFLAGS += -I$(ICONVDIR)/include + ICONV_LINK = -L$(ICONVDIR)/$(lib) $(CC_LD_DYNPATH)$(ICONVDIR)/$(lib) + else + ICONV_LINK = + endif + EXTLIBS += $(ICONV_LINK) -liconv +endif +ifdef NEEDS_SOCKET + EXTLIBS += -lsocket +endif +ifdef NEEDS_NSL + EXTLIBS += -lnsl +endif +ifdef NO_D_TYPE_IN_DIRENT + BASIC_CFLAGS += -DNO_D_TYPE_IN_DIRENT +endif +ifdef NO_D_INO_IN_DIRENT + BASIC_CFLAGS += -DNO_D_INO_IN_DIRENT +endif +ifdef NO_ST_BLOCKS_IN_STRUCT_STAT + BASIC_CFLAGS += -DNO_ST_BLOCKS_IN_STRUCT_STAT +endif +ifdef USE_NSEC + BASIC_CFLAGS += -DUSE_NSEC +endif +ifdef USE_ST_TIMESPEC + BASIC_CFLAGS += -DUSE_ST_TIMESPEC +endif +ifdef NO_NSEC + BASIC_CFLAGS += -DNO_NSEC +endif +ifdef NO_C99_FORMAT + BASIC_CFLAGS += -DNO_C99_FORMAT +endif +ifdef SNPRINTF_RETURNS_BOGUS + COMPAT_CFLAGS += -DSNPRINTF_RETURNS_BOGUS + COMPAT_OBJS += compat/snprintf.o +endif +ifdef FREAD_READS_DIRECTORIES + COMPAT_CFLAGS += -DFREAD_READS_DIRECTORIES + COMPAT_OBJS += compat/fopen.o +endif +ifdef NO_SYMLINK_HEAD + BASIC_CFLAGS += -DNO_SYMLINK_HEAD +endif +ifdef NO_STRCASESTR + COMPAT_CFLAGS += -DNO_STRCASESTR + COMPAT_OBJS += compat/strcasestr.o +endif +ifdef NO_STRLCPY + COMPAT_CFLAGS += -DNO_STRLCPY + COMPAT_OBJS += compat/strlcpy.o +endif +ifdef NO_STRTOUMAX + COMPAT_CFLAGS += -DNO_STRTOUMAX + COMPAT_OBJS += compat/strtoumax.o +endif +ifdef NO_STRTOULL + COMPAT_CFLAGS += -DNO_STRTOULL +endif +ifdef NO_SETENV + COMPAT_CFLAGS += -DNO_SETENV + COMPAT_OBJS += compat/setenv.o +endif +ifdef NO_MKDTEMP + COMPAT_CFLAGS += -DNO_MKDTEMP + COMPAT_OBJS += compat/mkdtemp.o +endif +ifdef NO_UNSETENV + COMPAT_CFLAGS += -DNO_UNSETENV + COMPAT_OBJS += compat/unsetenv.o +endif +ifdef NO_SYS_SELECT_H + BASIC_CFLAGS += -DNO_SYS_SELECT_H +endif +ifdef NO_MMAP + COMPAT_CFLAGS += -DNO_MMAP + COMPAT_OBJS += compat/mmap.o +else + ifdef USE_WIN32_MMAP + COMPAT_CFLAGS += -DUSE_WIN32_MMAP + COMPAT_OBJS += compat/win32mmap.o + endif +endif +ifdef NO_PREAD + COMPAT_CFLAGS += -DNO_PREAD + COMPAT_OBJS += compat/pread.o +endif +ifdef NO_FAST_WORKING_DIRECTORY + BASIC_CFLAGS += -DNO_FAST_WORKING_DIRECTORY +endif +ifdef NO_TRUSTABLE_FILEMODE + BASIC_CFLAGS += -DNO_TRUSTABLE_FILEMODE +endif +ifdef NO_IPV6 + BASIC_CFLAGS += -DNO_IPV6 +endif +ifdef NO_UINTMAX_T + BASIC_CFLAGS += -Duintmax_t=uint32_t +endif +ifdef NO_SOCKADDR_STORAGE +ifdef NO_IPV6 + BASIC_CFLAGS += -Dsockaddr_storage=sockaddr_in +else + BASIC_CFLAGS += -Dsockaddr_storage=sockaddr_in6 +endif +endif +ifdef NO_INET_NTOP + LIB_OBJS += compat/inet_ntop.o +endif +ifdef NO_INET_PTON + LIB_OBJS += compat/inet_pton.o +endif + +ifdef NO_ICONV + BASIC_CFLAGS += -DNO_ICONV +endif + +ifdef OLD_ICONV + BASIC_CFLAGS += -DOLD_ICONV +endif + +ifdef NO_DEFLATE_BOUND + BASIC_CFLAGS += -DNO_DEFLATE_BOUND +endif + +ifdef PPC_SHA1 + SHA1_HEADER = "ppc/sha1.h" + LIB_OBJS += ppc/sha1.o ppc/sha1ppc.o +else +ifdef ARM_SHA1 + SHA1_HEADER = "arm/sha1.h" + LIB_OBJS += arm/sha1.o arm/sha1_arm.o +else +ifdef MOZILLA_SHA1 + SHA1_HEADER = "mozilla-sha1/sha1.h" + LIB_OBJS += mozilla-sha1/sha1.o +else + SHA1_HEADER = + EXTLIBS += $(LIB_4_CRYPTO) +endif +endif +endif +ifdef NO_PERL_MAKEMAKER + export NO_PERL_MAKEMAKER +endif +ifdef NO_HSTRERROR + COMPAT_CFLAGS += -DNO_HSTRERROR + COMPAT_OBJS += compat/hstrerror.o +endif +ifdef NO_MEMMEM + COMPAT_CFLAGS += -DNO_MEMMEM + COMPAT_OBJS += compat/memmem.o +endif +ifdef INTERNAL_QSORT + COMPAT_CFLAGS += -DINTERNAL_QSORT + COMPAT_OBJS += compat/qsort.o +endif +ifdef RUNTIME_PREFIX + COMPAT_CFLAGS += -DRUNTIME_PREFIX +endif + +ifdef NO_PTHREADS + THREADED_DELTA_SEARCH = + BASIC_CFLAGS += -DNO_PTHREADS +else + EXTLIBS += $(PTHREAD_LIBS) +endif + +ifdef THREADED_DELTA_SEARCH + BASIC_CFLAGS += -DTHREADED_DELTA_SEARCH + LIB_OBJS += thread-utils.o +endif +ifdef DIR_HAS_BSD_GROUP_SEMANTICS + COMPAT_CFLAGS += -DDIR_HAS_BSD_GROUP_SEMANTICS +endif +ifdef NO_EXTERNAL_GREP + BASIC_CFLAGS += -DNO_EXTERNAL_GREP +endif + +ifeq ($(TCLTK_PATH),) +NO_TCLTK=NoThanks +endif + +ifeq ($(PERL_PATH),) +NO_PERL=NoThanks +endif + +QUIET_SUBDIR0 = +$(MAKE) -C # space to separate -C and subdir +QUIET_SUBDIR1 = + +ifneq ($(findstring $(MAKEFLAGS),w),w) +PRINT_DIR = --no-print-directory +else # "make -w" +NO_SUBDIR = : +endif + +ifneq ($(findstring $(MAKEFLAGS),s),s) +ifndef V + QUIET_CC = @echo ' ' CC $@; + QUIET_AR = @echo ' ' AR $@; + QUIET_LINK = @echo ' ' LINK $@; + QUIET_BUILT_IN = @echo ' ' BUILTIN $@; + QUIET_GEN = @echo ' ' GEN $@; + QUIET_SUBDIR0 = +@subdir= + QUIET_SUBDIR1 = ;$(NO_SUBDIR) echo ' ' SUBDIR $$subdir; \ + $(MAKE) $(PRINT_DIR) -C $$subdir + export V + export QUIET_GEN + export QUIET_BUILT_IN +endif +endif + +ifdef ASCIIDOC8 + export ASCIIDOC8 +endif + +# Shell quote (do not use $(call) to accommodate ancient setups); + +SHA1_HEADER_SQ = $(subst ','\'',$(SHA1_HEADER)) +ETC_GITCONFIG_SQ = $(subst ','\'',$(ETC_GITCONFIG)) + +DESTDIR_SQ = $(subst ','\'',$(DESTDIR)) +bindir_SQ = $(subst ','\'',$(bindir)) +bindir_relative_SQ = $(subst ','\'',$(bindir_relative)) +mandir_SQ = $(subst ','\'',$(mandir)) +infodir_SQ = $(subst ','\'',$(infodir)) +gitexecdir_SQ = $(subst ','\'',$(gitexecdir)) +template_dir_SQ = $(subst ','\'',$(template_dir)) +htmldir_SQ = $(subst ','\'',$(htmldir)) +prefix_SQ = $(subst ','\'',$(prefix)) + +SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH)) +PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH)) +TCLTK_PATH_SQ = $(subst ','\'',$(TCLTK_PATH)) + +LIBS = $(GITLIBS) $(EXTLIBS) + +BASIC_CFLAGS += -DSHA1_HEADER='$(SHA1_HEADER_SQ)' \ + $(COMPAT_CFLAGS) +LIB_OBJS += $(COMPAT_OBJS) + +ALL_CFLAGS += $(BASIC_CFLAGS) +ALL_LDFLAGS += $(BASIC_LDFLAGS) + +export TAR INSTALL DESTDIR SHELL_PATH + + +### Build rules + +SHELL = $(SHELL_PATH) + +all:: shell_compatibility_test $(ALL_PROGRAMS) $(BUILT_INS) $(OTHER_PROGRAMS) GIT-BUILD-OPTIONS +ifneq (,$X) + $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) git$X)), test '$p' -ef '$p$X' || $(RM) '$p';) +endif + +all:: +ifndef NO_TCLTK + $(QUIET_SUBDIR0)git-gui $(QUIET_SUBDIR1) gitexecdir='$(gitexec_instdir_SQ)' all + $(QUIET_SUBDIR0)gitk-git $(QUIET_SUBDIR1) all +endif +ifndef NO_PERL + $(QUIET_SUBDIR0)perl $(QUIET_SUBDIR1) PERL_PATH='$(PERL_PATH_SQ)' prefix='$(prefix_SQ)' all +endif + $(QUIET_SUBDIR0)templates $(QUIET_SUBDIR1) + +please_set_SHELL_PATH_to_a_more_modern_shell: + @$$(:) + +shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell + +strip: $(PROGRAMS) git$X + $(STRIP) $(STRIP_OPTS) $(PROGRAMS) git$X + +git.o: git.c common-cmds.h GIT-CFLAGS + $(QUIET_CC)$(CC) -DGIT_VERSION='"$(GIT_VERSION)"' \ + '-DGIT_HTML_PATH="$(htmldir_SQ)"' \ + $(ALL_CFLAGS) -c $(filter %.c,$^) + +git$X: git.o $(BUILTIN_OBJS) $(GITLIBS) + $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ git.o \ + $(BUILTIN_OBJS) $(ALL_LDFLAGS) $(LIBS) + +builtin-help.o: builtin-help.c common-cmds.h GIT-CFLAGS + $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) \ + '-DGIT_HTML_PATH="$(htmldir_SQ)"' \ + '-DGIT_MAN_PATH="$(mandir_SQ)"' \ + '-DGIT_INFO_PATH="$(infodir_SQ)"' $< + +$(BUILT_INS): git$X + $(QUIET_BUILT_IN)$(RM) $@ && \ + ln git$X $@ 2>/dev/null || \ + ln -s git$X $@ 2>/dev/null || \ + cp git$X $@ + +common-cmds.h: ./generate-cmdlist.sh command-list.txt + +common-cmds.h: $(wildcard Documentation/git-*.txt) + $(QUIET_GEN)./generate-cmdlist.sh > $@+ && mv $@+ $@ + +$(patsubst %.sh,%,$(SCRIPT_SH)) : % : %.sh + $(QUIET_GEN)$(RM) $@ $@+ && \ + sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \ + -e 's|@SHELL_PATH@|$(SHELL_PATH_SQ)|' \ + -e 's|@@PERL@@|$(PERL_PATH_SQ)|g' \ + -e 's/@@GIT_VERSION@@/$(GIT_VERSION)/g' \ + -e 's/@@NO_CURL@@/$(NO_CURL)/g' \ + $@.sh >$@+ && \ + chmod +x $@+ && \ + mv $@+ $@ + +ifndef NO_PERL +$(patsubst %.perl,%,$(SCRIPT_PERL)): perl/perl.mak + +perl/perl.mak: GIT-CFLAGS perl/Makefile perl/Makefile.PL + $(QUIET_SUBDIR0)perl $(QUIET_SUBDIR1) PERL_PATH='$(PERL_PATH_SQ)' prefix='$(prefix_SQ)' $(@F) + +$(patsubst %.perl,%,$(SCRIPT_PERL)): % : %.perl + $(QUIET_GEN)$(RM) $@ $@+ && \ + INSTLIBDIR=`MAKEFLAGS= $(MAKE) -C perl -s --no-print-directory instlibdir` && \ + sed -e '1{' \ + -e ' s|#!.*perl|#!$(PERL_PATH_SQ)|' \ + -e ' h' \ + -e ' s=.*=use lib (split(/:/, $$ENV{GITPERLLIB} || "@@INSTLIBDIR@@"));=' \ + -e ' H' \ + -e ' x' \ + -e '}' \ + -e 's|@@INSTLIBDIR@@|'"$$INSTLIBDIR"'|g' \ + -e 's/@@GIT_VERSION@@/$(GIT_VERSION)/g' \ + $@.perl >$@+ && \ + chmod +x $@+ && \ + mv $@+ $@ + +gitweb/gitweb.cgi: gitweb/gitweb.perl + $(QUIET_GEN)$(RM) $@ $@+ && \ + sed -e '1s|#!.*perl|#!$(PERL_PATH_SQ)|' \ + -e 's|++GIT_VERSION++|$(GIT_VERSION)|g' \ + -e 's|++GIT_BINDIR++|$(bindir)|g' \ + -e 's|++GITWEB_CONFIG++|$(GITWEB_CONFIG)|g' \ + -e 's|++GITWEB_CONFIG_SYSTEM++|$(GITWEB_CONFIG_SYSTEM)|g' \ + -e 's|++GITWEB_HOME_LINK_STR++|$(GITWEB_HOME_LINK_STR)|g' \ + -e 's|++GITWEB_SITENAME++|$(GITWEB_SITENAME)|g' \ + -e 's|++GITWEB_PROJECTROOT++|$(GITWEB_PROJECTROOT)|g' \ + -e 's|"++GITWEB_PROJECT_MAXDEPTH++"|$(GITWEB_PROJECT_MAXDEPTH)|g' \ + -e 's|++GITWEB_EXPORT_OK++|$(GITWEB_EXPORT_OK)|g' \ + -e 's|++GITWEB_STRICT_EXPORT++|$(GITWEB_STRICT_EXPORT)|g' \ + -e 's|++GITWEB_BASE_URL++|$(GITWEB_BASE_URL)|g' \ + -e 's|++GITWEB_LIST++|$(GITWEB_LIST)|g' \ + -e 's|++GITWEB_HOMETEXT++|$(GITWEB_HOMETEXT)|g' \ + -e 's|++GITWEB_CSS++|$(GITWEB_CSS)|g' \ + -e 's|++GITWEB_LOGO++|$(GITWEB_LOGO)|g' \ + -e 's|++GITWEB_FAVICON++|$(GITWEB_FAVICON)|g' \ + -e 's|++GITWEB_SITE_HEADER++|$(GITWEB_SITE_HEADER)|g' \ + -e 's|++GITWEB_SITE_FOOTER++|$(GITWEB_SITE_FOOTER)|g' \ + $< >$@+ && \ + chmod +x $@+ && \ + mv $@+ $@ + +git-instaweb: git-instaweb.sh gitweb/gitweb.cgi gitweb/gitweb.css + $(QUIET_GEN)$(RM) $@ $@+ && \ + sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \ + -e 's/@@GIT_VERSION@@/$(GIT_VERSION)/g' \ + -e 's/@@NO_CURL@@/$(NO_CURL)/g' \ + -e '/@@GITWEB_CGI@@/r gitweb/gitweb.cgi' \ + -e '/@@GITWEB_CGI@@/d' \ + -e '/@@GITWEB_CSS@@/r gitweb/gitweb.css' \ + -e '/@@GITWEB_CSS@@/d' \ + -e 's|@@PERL@@|$(PERL_PATH_SQ)|g' \ + $@.sh > $@+ && \ + chmod +x $@+ && \ + mv $@+ $@ +else # NO_PERL +$(patsubst %.perl,%,$(SCRIPT_PERL)) git-instaweb: % : unimplemented.sh + $(QUIET_GEN)$(RM) $@ $@+ && \ + sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \ + -e 's|@@REASON@@|NO_PERL=$(NO_PERL)|g' \ + unimplemented.sh >$@+ && \ + chmod +x $@+ && \ + mv $@+ $@ +endif # NO_PERL + +configure: configure.ac + $(QUIET_GEN)$(RM) $@ $<+ && \ + sed -e 's/@@GIT_VERSION@@/$(GIT_VERSION)/g' \ + $< > $<+ && \ + autoconf -o $@ $<+ && \ + $(RM) $<+ + +# These can record GIT_VERSION +git.o git.spec \ + $(patsubst %.sh,%,$(SCRIPT_SH)) \ + $(patsubst %.perl,%,$(SCRIPT_PERL)) \ + : GIT-VERSION-FILE + +%.o: %.c GIT-CFLAGS + $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) $< +%.s: %.c GIT-CFLAGS + $(QUIET_CC)$(CC) -S $(ALL_CFLAGS) $< +%.o: %.S + $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) $< + +exec_cmd.o: exec_cmd.c GIT-CFLAGS + $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) \ + '-DGIT_EXEC_PATH="$(gitexecdir_SQ)"' \ + '-DBINDIR="$(bindir_relative_SQ)"' \ + '-DPREFIX="$(prefix_SQ)"' \ + $< + +builtin-init-db.o: builtin-init-db.c GIT-CFLAGS + $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DDEFAULT_GIT_TEMPLATE_DIR='"$(template_dir_SQ)"' $< + +config.o: config.c GIT-CFLAGS + $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DETC_GITCONFIG='"$(ETC_GITCONFIG_SQ)"' $< + +http.o: http.c GIT-CFLAGS + $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DGIT_USER_AGENT='"git/$(GIT_VERSION)"' $< + +ifdef NO_EXPAT +http-walker.o: http-walker.c http.h GIT-CFLAGS + $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DNO_EXPAT $< +endif + +git-%$X: %.o $(GITLIBS) + $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) + +git-imap-send$X: imap-send.o $(GITLIBS) + $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \ + $(LIBS) $(OPENSSL_LINK) $(OPENSSL_LIBSSL) + +http.o http-walker.o http-push.o transport.o: http.h + +git-http-push$X: revision.o http.o http-push.o $(GITLIBS) + $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \ + $(LIBS) $(CURL_LIBCURL) $(EXPAT_LIBEXPAT) + +$(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H) +$(patsubst git-%$X,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h) +builtin-revert.o wt-status.o: wt-status.h + +$(LIB_FILE): $(LIB_OBJS) + $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS) + +XDIFF_OBJS=xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o \ + xdiff/xmerge.o xdiff/xpatience.o +$(XDIFF_OBJS): xdiff/xinclude.h xdiff/xmacros.h xdiff/xdiff.h xdiff/xtypes.h \ + xdiff/xutils.h xdiff/xprepare.h xdiff/xdiffi.h xdiff/xemit.h + +$(XDIFF_LIB): $(XDIFF_OBJS) + $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(XDIFF_OBJS) + + +doc: + $(MAKE) -C Documentation all + +man: + $(MAKE) -C Documentation man + +html: + $(MAKE) -C Documentation html + +info: + $(MAKE) -C Documentation info + +pdf: + $(MAKE) -C Documentation pdf + +TAGS: + $(RM) TAGS + $(FIND) . -name '*.[hcS]' -print | xargs etags -a + +tags: + $(RM) tags + $(FIND) . -name '*.[hcS]' -print | xargs ctags -a + +cscope: + $(RM) cscope* + $(FIND) . -name '*.[hcS]' -print | xargs cscope -b + +### Detect prefix changes +TRACK_CFLAGS = $(subst ','\'',$(ALL_CFLAGS)):\ + $(bindir_SQ):$(gitexecdir_SQ):$(template_dir_SQ):$(prefix_SQ) + +GIT-CFLAGS: .FORCE-GIT-CFLAGS + @FLAGS='$(TRACK_CFLAGS)'; \ + if test x"$$FLAGS" != x"`cat GIT-CFLAGS 2>/dev/null`" ; then \ + echo 1>&2 " * new build flags or prefix"; \ + echo "$$FLAGS" >GIT-CFLAGS; \ + fi + +# We need to apply sq twice, once to protect from the shell +# that runs GIT-BUILD-OPTIONS, and then again to protect it +# and the first level quoting from the shell that runs "echo". +GIT-BUILD-OPTIONS: .FORCE-GIT-BUILD-OPTIONS + @echo SHELL_PATH=\''$(subst ','\'',$(SHELL_PATH_SQ))'\' >$@ + @echo TAR=\''$(subst ','\'',$(subst ','\'',$(TAR)))'\' >>$@ + @echo NO_CURL=\''$(subst ','\'',$(subst ','\'',$(NO_CURL)))'\' >>$@ + @echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@ + +### Detect Tck/Tk interpreter path changes +ifndef NO_TCLTK +TRACK_VARS = $(subst ','\'',-DTCLTK_PATH='$(TCLTK_PATH_SQ)') + +GIT-GUI-VARS: .FORCE-GIT-GUI-VARS + @VARS='$(TRACK_VARS)'; \ + if test x"$$VARS" != x"`cat $@ 2>/dev/null`" ; then \ + echo 1>&2 " * new Tcl/Tk interpreter location"; \ + echo "$$VARS" >$@; \ + fi + +.PHONY: .FORCE-GIT-GUI-VARS +endif + +### Testing rules + +TEST_PROGRAMS += test-chmtime$X +TEST_PROGRAMS += test-ctype$X +TEST_PROGRAMS += test-date$X +TEST_PROGRAMS += test-delta$X +TEST_PROGRAMS += test-dump-cache-tree$X +TEST_PROGRAMS += test-genrandom$X +TEST_PROGRAMS += test-match-trees$X +TEST_PROGRAMS += test-parse-options$X +TEST_PROGRAMS += test-path-utils$X +TEST_PROGRAMS += test-sha1$X +TEST_PROGRAMS += test-sigchain$X + +all:: $(TEST_PROGRAMS) + +# GNU make supports exporting all variables by "export" without parameters. +# However, the environment gets quite big, and some programs have problems +# with that. + +export NO_SVN_TESTS + +test: all + $(MAKE) -C t/ all + +test-ctype$X: ctype.o + +test-date$X: date.o ctype.o + +test-delta$X: diff-delta.o patch-delta.o + +test-parse-options$X: parse-options.o + +.PRECIOUS: $(patsubst test-%$X,test-%.o,$(TEST_PROGRAMS)) + +test-%$X: test-%.o $(GITLIBS) + $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) + +check-sha1:: test-sha1$X + ./test-sha1.sh + +check: common-cmds.h + if sparse; \ + then \ + for i in *.c; \ + do \ + sparse $(ALL_CFLAGS) $(SPARSE_FLAGS) $$i || exit; \ + done; \ + else \ + echo 2>&1 "Did you mean 'make test'?"; \ + exit 1; \ + fi + +remove-dashes: + ./fixup-builtins $(BUILT_INS) $(PROGRAMS) $(SCRIPTS) + +### Installation rules + +ifneq ($(filter /%,$(firstword $(template_dir))),) +template_instdir = $(template_dir) +else +template_instdir = $(prefix)/$(template_dir) +endif +export template_instdir + +ifneq ($(filter /%,$(firstword $(gitexecdir))),) +gitexec_instdir = $(gitexecdir) +else +gitexec_instdir = $(prefix)/$(gitexecdir) +endif +gitexec_instdir_SQ = $(subst ','\'',$(gitexec_instdir)) +export gitexec_instdir + +install: all + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)' + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(gitexec_instdir_SQ)' + $(INSTALL) $(ALL_PROGRAMS) '$(DESTDIR_SQ)$(gitexec_instdir_SQ)' + $(INSTALL) git$X git-upload-pack$X git-receive-pack$X git-upload-archive$X git-shell$X git-cvsserver '$(DESTDIR_SQ)$(bindir_SQ)' + $(MAKE) -C templates DESTDIR='$(DESTDIR_SQ)' install + $(MAKE) -C perl prefix='$(prefix_SQ)' DESTDIR='$(DESTDIR_SQ)' install +ifndef NO_TCLTK + $(MAKE) -C gitk-git install + $(MAKE) -C git-gui gitexecdir='$(gitexec_instdir_SQ)' install +endif +ifneq (,$X) + $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) git$X)), $(RM) '$(DESTDIR_SQ)$(gitexec_instdir_SQ)/$p';) +endif + bindir=$$(cd '$(DESTDIR_SQ)$(bindir_SQ)' && pwd) && \ + execdir=$$(cd '$(DESTDIR_SQ)$(gitexec_instdir_SQ)' && pwd) && \ + { $(RM) "$$execdir/git-add$X" && \ + ln "$$bindir/git$X" "$$execdir/git-add$X" 2>/dev/null || \ + cp "$$bindir/git$X" "$$execdir/git-add$X"; } && \ + { for p in $(filter-out git-add$X,$(BUILT_INS)); do \ + $(RM) "$$execdir/$$p" && \ + ln "$$execdir/git-add$X" "$$execdir/$$p" 2>/dev/null || \ + ln -s "git-add$X" "$$execdir/$$p" 2>/dev/null || \ + cp "$$execdir/git-add$X" "$$execdir/$$p" || exit; \ + done } && \ + ./check_bindir "z$$bindir" "z$$execdir" "$$bindir/git-add$X" + +install-doc: + $(MAKE) -C Documentation install + +install-man: + $(MAKE) -C Documentation install-man + +install-html: + $(MAKE) -C Documentation install-html + +install-info: + $(MAKE) -C Documentation install-info + +install-pdf: + $(MAKE) -C Documentation install-pdf + +quick-install-doc: + $(MAKE) -C Documentation quick-install + +quick-install-man: + $(MAKE) -C Documentation quick-install-man + +quick-install-html: + $(MAKE) -C Documentation quick-install-html + + + +### Maintainer's dist rules + +git.spec: git.spec.in + sed -e 's/@@VERSION@@/$(GIT_VERSION)/g' < $< > $@+ + mv $@+ $@ + +GIT_TARNAME=git-$(GIT_VERSION) +dist: git.spec git-archive$(X) configure + ./git-archive --format=tar \ + --prefix=$(GIT_TARNAME)/ HEAD^{tree} > $(GIT_TARNAME).tar + @mkdir -p $(GIT_TARNAME) + @cp git.spec configure $(GIT_TARNAME) + @echo $(GIT_VERSION) > $(GIT_TARNAME)/version + @$(MAKE) -C git-gui TARDIR=../$(GIT_TARNAME)/git-gui dist-version + $(TAR) rf $(GIT_TARNAME).tar \ + $(GIT_TARNAME)/git.spec \ + $(GIT_TARNAME)/configure \ + $(GIT_TARNAME)/version \ + $(GIT_TARNAME)/git-gui/version + @$(RM) -r $(GIT_TARNAME) + gzip -f -9 $(GIT_TARNAME).tar + +rpm: dist + $(RPMBUILD) -ta $(GIT_TARNAME).tar.gz + +htmldocs = git-htmldocs-$(GIT_VERSION) +manpages = git-manpages-$(GIT_VERSION) +dist-doc: + $(RM) -r .doc-tmp-dir + mkdir .doc-tmp-dir + $(MAKE) -C Documentation WEBDOC_DEST=../.doc-tmp-dir install-webdoc + cd .doc-tmp-dir && $(TAR) cf ../$(htmldocs).tar . + gzip -n -9 -f $(htmldocs).tar + : + $(RM) -r .doc-tmp-dir + mkdir -p .doc-tmp-dir/man1 .doc-tmp-dir/man5 .doc-tmp-dir/man7 + $(MAKE) -C Documentation DESTDIR=./ \ + man1dir=../.doc-tmp-dir/man1 \ + man5dir=../.doc-tmp-dir/man5 \ + man7dir=../.doc-tmp-dir/man7 \ + install + cd .doc-tmp-dir && $(TAR) cf ../$(manpages).tar . + gzip -n -9 -f $(manpages).tar + $(RM) -r .doc-tmp-dir + +### Cleaning rules + +distclean: clean + $(RM) configure clean: - rm $(BINS) + $(RM) *.o mozilla-sha1/*.o arm/*.o ppc/*.o compat/*.o xdiff/*.o \ + $(LIB_FILE) $(XDIFF_LIB) + $(RM) $(ALL_PROGRAMS) $(BUILT_INS) git$X + $(RM) $(TEST_PROGRAMS) + $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo common-cmds.h TAGS tags cscope* + $(RM) -r autom4te.cache + $(RM) config.log config.mak.autogen config.mak.append config.status config.cache + $(RM) -r $(GIT_TARNAME) .doc-tmp-dir + $(RM) $(GIT_TARNAME).tar.gz git-core_$(GIT_VERSION)-*.tar.gz + $(RM) $(htmldocs).tar.gz $(manpages).tar.gz + $(MAKE) -C Documentation/ clean +ifndef NO_PERL + $(RM) gitweb/gitweb.cgi + $(MAKE) -C perl clean +endif + $(MAKE) -C templates/ clean + $(MAKE) -C t/ clean +ifndef NO_TCLTK + $(MAKE) -C gitk-git clean + $(MAKE) -C git-gui clean +endif + $(RM) GIT-VERSION-FILE GIT-CFLAGS GIT-GUI-VARS GIT-BUILD-OPTIONS + +.PHONY: all install clean strip +.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell +.PHONY: .FORCE-GIT-VERSION-FILE TAGS tags cscope .FORCE-GIT-CFLAGS +.PHONY: .FORCE-GIT-BUILD-OPTIONS + +### Check documentation +# +check-docs:: + @(for v in $(ALL_PROGRAMS) $(BUILT_INS) git gitk; \ + do \ + case "$$v" in \ + git-merge-octopus | git-merge-ours | git-merge-recursive | \ + git-merge-resolve | git-merge-subtree | \ + git-fsck-objects | git-init-db | \ + git-?*--?* ) continue ;; \ + esac ; \ + test -f "Documentation/$$v.txt" || \ + echo "no doc: $$v"; \ + sed -e '/^#/d' command-list.txt | \ + grep -q "^$$v[ ]" || \ + case "$$v" in \ + git) ;; \ + *) echo "no link: $$v";; \ + esac ; \ + done; \ + ( \ + sed -e '/^#/d' \ + -e 's/[ ].*//' \ + -e 's/^/listed /' command-list.txt; \ + ls -1 Documentation/git*txt | \ + sed -e 's|Documentation/|documented |' \ + -e 's/\.txt//'; \ + ) | while read how cmd; \ + do \ + case "$$how,$$cmd" in \ + *,git-citool | \ + *,git-gui | \ + *,git-help | \ + documented,gitattributes | \ + documented,gitignore | \ + documented,gitmodules | \ + documented,gitcli | \ + documented,git-tools | \ + documented,gitcore-tutorial | \ + documented,gitcvs-migration | \ + documented,gitdiffcore | \ + documented,gitglossary | \ + documented,githooks | \ + documented,gitrepository-layout | \ + documented,gittutorial | \ + documented,gittutorial-2 | \ + sentinel,not,matching,is,ok ) continue ;; \ + esac; \ + case " $(ALL_PROGRAMS) $(BUILT_INS) git gitk " in \ + *" $$cmd "*) ;; \ + *) echo "removed but $$how: $$cmd" ;; \ + esac; \ + done ) | sort + +### Make sure built-ins do not have dups and listed in git.c +# +check-builtins:: + ./check-builtins.sh + +### Test suite coverage testing +# +.PHONY: coverage coverage-clean coverage-build coverage-report + +coverage: + $(MAKE) coverage-build + $(MAKE) coverage-report + +coverage-clean: + rm -f *.gcda *.gcno + +COVERAGE_CFLAGS = $(CFLAGS) -O0 -ftest-coverage -fprofile-arcs +COVERAGE_LDFLAGS = $(CFLAGS) -O0 -lgcov + +coverage-build: coverage-clean + $(MAKE) CFLAGS="$(COVERAGE_CFLAGS)" LDFLAGS="$(COVERAGE_LDFLAGS)" all + $(MAKE) CFLAGS="$(COVERAGE_CFLAGS)" LDFLAGS="$(COVERAGE_LDFLAGS)" \ + -j1 test + +coverage-report: + gcov -b *.c + grep '^function.*called 0 ' *.c.gcov \ + | sed -e 's/\([^:]*\)\.gcov: *function \([^ ]*\) called.*/\1: \2/' \ + | tee coverage-untested-functions -- cgit v0.10.2 From 0780060124011b94af55830939c86cc0916be0f5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 20 Apr 2009 15:00:56 +0200 Subject: perf_counter tools: add in basic glue from Git First very raw version at having a central 'perf' command and a list of subcommands: perf top perf stat perf record perf report ... This is done by picking up Git's collection of utility functions, and hacking them to build fine in this new environment. Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/.gitignore b/Documentation/perf_counter/.gitignore new file mode 100644 index 0000000..41c0b20 --- /dev/null +++ b/Documentation/perf_counter/.gitignore @@ -0,0 +1,179 @@ +GIT-BUILD-OPTIONS +GIT-CFLAGS +GIT-GUI-VARS +GIT-VERSION-FILE +git +git-add +git-add--interactive +git-am +git-annotate +git-apply +git-archimport +git-archive +git-bisect +git-bisect--helper +git-blame +git-branch +git-bundle +git-cat-file +git-check-attr +git-check-ref-format +git-checkout +git-checkout-index +git-cherry +git-cherry-pick +git-clean +git-clone +git-commit +git-commit-tree +git-config +git-count-objects +git-cvsexportcommit +git-cvsimport +git-cvsserver +git-daemon +git-diff +git-diff-files +git-diff-index +git-diff-tree +git-difftool +git-difftool--helper +git-describe +git-fast-export +git-fast-import +git-fetch +git-fetch--tool +git-fetch-pack +git-filter-branch +git-fmt-merge-msg +git-for-each-ref +git-format-patch +git-fsck +git-fsck-objects +git-gc +git-get-tar-commit-id +git-grep +git-hash-object +git-help +git-http-fetch +git-http-push +git-imap-send +git-index-pack +git-init +git-init-db +git-instaweb +git-log +git-lost-found +git-ls-files +git-ls-remote +git-ls-tree +git-mailinfo +git-mailsplit +git-merge +git-merge-base +git-merge-index +git-merge-file +git-merge-tree +git-merge-octopus +git-merge-one-file +git-merge-ours +git-merge-recursive +git-merge-resolve +git-merge-subtree +git-mergetool +git-mergetool--lib +git-mktag +git-mktree +git-name-rev +git-mv +git-pack-redundant +git-pack-objects +git-pack-refs +git-parse-remote +git-patch-id +git-peek-remote +git-prune +git-prune-packed +git-pull +git-push +git-quiltimport +git-read-tree +git-rebase +git-rebase--interactive +git-receive-pack +git-reflog +git-relink +git-remote +git-repack +git-repo-config +git-request-pull +git-rerere +git-reset +git-rev-list +git-rev-parse +git-revert +git-rm +git-send-email +git-send-pack +git-sh-setup +git-shell +git-shortlog +git-show +git-show-branch +git-show-index +git-show-ref +git-stage +git-stash +git-status +git-stripspace +git-submodule +git-svn +git-symbolic-ref +git-tag +git-tar-tree +git-unpack-file +git-unpack-objects +git-update-index +git-update-ref +git-update-server-info +git-upload-archive +git-upload-pack +git-var +git-verify-pack +git-verify-tag +git-web--browse +git-whatchanged +git-write-tree +git-core-*/?* +gitk-wish +gitweb/gitweb.cgi +test-chmtime +test-ctype +test-date +test-delta +test-dump-cache-tree +test-genrandom +test-match-trees +test-parse-options +test-path-utils +test-sha1 +test-sigchain +common-cmds.h +*.tar.gz +*.dsc +*.deb +git.spec +*.exe +*.[aos] +*.py[co] +config.mak +autom4te.cache +config.cache +config.log +config.status +config.mak.autogen +config.mak.append +configure +tags +TAGS +cscope* diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile index 6e0838b..11809b9 100644 --- a/Documentation/perf_counter/Makefile +++ b/Documentation/perf_counter/Makefile @@ -13,16 +13,9 @@ all:: # Define NO_OPENSSL environment variable if you do not have OpenSSL. # This also implies MOZILLA_SHA1. # -# Define NO_CURL if you do not have libcurl installed. git-http-pull and -# git-http-push are not built, and you cannot use http:// and https:// -# transports. -# # Define CURLDIR=/foo/bar if your curl header and library files are in # /foo/bar/include and /foo/bar/lib directories. # -# Define NO_EXPAT if you do not have expat installed. git-http-push is -# not built, and you cannot push using http:// and https:// transports. -# # Define EXPATDIR=/foo/bar if your expat header and library files are in # /foo/bar/include and /foo/bar/lib directories. # @@ -40,8 +33,6 @@ all:: # # Define NO_MEMMEM if you don't have memmem. # -# Define NO_STRLCPY if you don't have strlcpy. -# # Define NO_STRTOUMAX if you don't have strtoumax in the C library. # If your compiler also does not support long long or does not have # strtoull, define NO_STRTOULL. @@ -54,7 +45,7 @@ all:: # # Define NO_SYS_SELECT_H if you don't have sys/select.h. # -# Define NO_SYMLINK_HEAD if you never want .git/HEAD to be a symbolic link. +# Define NO_SYMLINK_HEAD if you never want .perf/HEAD to be a symbolic link. # Enable it on Windows. By default, symrefs are still used. # # Define NO_SVN_TESTS if you want to skip time-consuming SVN interoperability @@ -62,13 +53,13 @@ all:: # but are not needed unless you plan to talk to SVN repos. # # Define NO_FINK if you are building on Darwin/Mac OS X, have Fink -# installed in /sw, but don't want GIT to link against any libraries +# installed in /sw, but don't want PERF to link against any libraries # installed there. If defined you may specify your own (or Fink's) # include directories and library directories by defining CFLAGS # and LDFLAGS appropriately. # # Define NO_DARWIN_PORTS if you are building on Darwin/Mac OS X, -# have DarwinPorts installed in /opt/local, but don't want GIT to +# have DarwinPorts installed in /opt/local, but don't want PERF to # link against any libraries installed there. If defined you may # specify your own (or DarwinPort's) include directories and # library directories by defining CFLAGS and LDFLAGS appropriately. @@ -120,7 +111,7 @@ all:: # that tells runtime paths to dynamic libraries; # "-Wl,-rpath=/path/lib" is used instead. # -# Define USE_NSEC below if you want git to care about sub-second file mtimes +# Define USE_NSEC below if you want perf to care about sub-second file mtimes # and ctimes. Note that you need recent glibc (at least 2.2.4) for this, and # it will BREAK YOUR LOCAL DIFFS! show-diff and anything using it will likely # randomly break unless your underlying filesystem supports those sub-second @@ -132,7 +123,7 @@ all:: # Define NO_NSEC if your "struct stat" does not have "st_ctim.tv_nsec" # available. This automatically turns USE_NSEC off. # -# Define USE_STDEV below if you want git to care about the underlying device +# Define USE_STDEV below if you want perf to care about the underlying device # change being considered an inode change from the update-index perspective. # # Define NO_ST_BLOCKS_IN_STRUCT_STAT if your platform does not have st_blocks @@ -150,27 +141,24 @@ all:: # Define NO_TCLTK if you do not want Tcl/Tk GUI. # # The TCL_PATH variable governs the location of the Tcl interpreter -# used to optimize git-gui for your system. Only used if NO_TCLTK +# used to optimize perf-gui for your system. Only used if NO_TCLTK # is not set. Defaults to the bare 'tclsh'. # # The TCLTK_PATH variable governs the location of the Tcl/Tk interpreter. # If not set it defaults to the bare 'wish'. If it is set to the empty # string then NO_TCLTK will be forced (this is used by configure script). # -# Define THREADED_DELTA_SEARCH if you have pthreads and wish to exploit -# parallel delta searching when packing objects. -# # Define INTERNAL_QSORT to use Git's implementation of qsort(), which # is a simplified version of the merge sort used in glibc. This is # recommended if Git triggers O(n^2) behavior in your platform's qsort(). # -# Define NO_EXTERNAL_GREP if you don't want "git grep" to ever call +# Define NO_EXTERNAL_GREP if you don't want "perf grep" to ever call # your external grep (e.g., if your system lacks grep, if its grep is -# broken, or spawning external process is slower than built-in grep git has). +# broken, or spawning external process is slower than built-in grep perf has). -GIT-VERSION-FILE: .FORCE-GIT-VERSION-FILE - @$(SHELL_PATH) ./GIT-VERSION-GEN --include GIT-VERSION-FILE +PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE + @$(SHELL_PATH) ./PERF-VERSION-GEN +-include PERF-VERSION-FILE uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not') uname_M := $(shell sh -c 'uname -m 2>/dev/null || echo not') @@ -182,20 +170,20 @@ uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not') # CFLAGS and LDFLAGS are for the users to override from the command line. CFLAGS = -g -O2 -Wall -LDFLAGS = +LDFLAGS = -lpthread -lrt ALL_CFLAGS = $(CFLAGS) ALL_LDFLAGS = $(LDFLAGS) STRIP ?= strip # Among the variables below, these: -# gitexecdir +# perfexecdir # template_dir # mandir # infodir # htmldir -# ETC_GITCONFIG (but not sysconfdir) +# ETC_PERFCONFIG (but not sysconfdir) # can be specified as a relative path some/where/else; -# this is interpreted as relative to $(prefix) and "git" at +# this is interpreted as relative to $(prefix) and "perf" at # runtime figures out where they are based on the path to the executable. # This can help installing the suite in a relocatable way. @@ -204,38 +192,20 @@ bindir_relative = bin bindir = $(prefix)/$(bindir_relative) mandir = share/man infodir = share/info -gitexecdir = libexec/git-core +perfexecdir = libexec/perf-core sharedir = $(prefix)/share -template_dir = share/git-core/templates -htmldir = share/doc/git-doc +template_dir = share/perf-core/templates +htmldir = share/doc/perf-doc ifeq ($(prefix),/usr) sysconfdir = /etc -ETC_GITCONFIG = $(sysconfdir)/gitconfig +ETC_PERFCONFIG = $(sysconfdir)/perfconfig else sysconfdir = $(prefix)/etc -ETC_GITCONFIG = etc/gitconfig +ETC_PERFCONFIG = etc/perfconfig endif lib = lib # DESTDIR= -# default configuration for gitweb -GITWEB_CONFIG = gitweb_config.perl -GITWEB_CONFIG_SYSTEM = /etc/gitweb.conf -GITWEB_HOME_LINK_STR = projects -GITWEB_SITENAME = -GITWEB_PROJECTROOT = /pub/git -GITWEB_PROJECT_MAXDEPTH = 2007 -GITWEB_EXPORT_OK = -GITWEB_STRICT_EXPORT = -GITWEB_BASE_URL = -GITWEB_LIST = -GITWEB_HOMETEXT = indextext.html -GITWEB_CSS = gitweb.css -GITWEB_LOGO = git-logo.png -GITWEB_FAVICON = git-favicon.png -GITWEB_SITE_HEADER = -GITWEB_SITE_FOOTER = - export prefix bindir sharedir sysconfdir CC = gcc @@ -277,89 +247,46 @@ SCRIPT_PERL = SCRIPT_SH = TEST_PROGRAMS = -SCRIPT_SH += git-am.sh -SCRIPT_SH += git-bisect.sh -SCRIPT_SH += git-difftool--helper.sh -SCRIPT_SH += git-filter-branch.sh -SCRIPT_SH += git-lost-found.sh -SCRIPT_SH += git-merge-octopus.sh -SCRIPT_SH += git-merge-one-file.sh -SCRIPT_SH += git-merge-resolve.sh -SCRIPT_SH += git-mergetool.sh -SCRIPT_SH += git-mergetool--lib.sh -SCRIPT_SH += git-parse-remote.sh -SCRIPT_SH += git-pull.sh -SCRIPT_SH += git-quiltimport.sh -SCRIPT_SH += git-rebase--interactive.sh -SCRIPT_SH += git-rebase.sh -SCRIPT_SH += git-repack.sh -SCRIPT_SH += git-request-pull.sh -SCRIPT_SH += git-sh-setup.sh -SCRIPT_SH += git-stash.sh -SCRIPT_SH += git-submodule.sh -SCRIPT_SH += git-web--browse.sh - -SCRIPT_PERL += git-add--interactive.perl -SCRIPT_PERL += git-difftool.perl -SCRIPT_PERL += git-archimport.perl -SCRIPT_PERL += git-cvsexportcommit.perl -SCRIPT_PERL += git-cvsimport.perl -SCRIPT_PERL += git-cvsserver.perl -SCRIPT_PERL += git-relink.perl -SCRIPT_PERL += git-send-email.perl -SCRIPT_PERL += git-svn.perl +# +# No scripts right now: +# + +# SCRIPT_SH += perf-am.sh + +# +# No Perl scripts right now: +# + +# SCRIPT_PERL += perf-add--interactive.perl SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH)) \ - $(patsubst %.perl,%,$(SCRIPT_PERL)) \ - git-instaweb + $(patsubst %.perl,%,$(SCRIPT_PERL)) # Empty... EXTRA_PROGRAMS = -# ... and all the rest that could be moved out of bindir to gitexecdir +# ... and all the rest that could be moved out of bindir to perfexecdir PROGRAMS += $(EXTRA_PROGRAMS) -PROGRAMS += git-fast-import$X -PROGRAMS += git-hash-object$X -PROGRAMS += git-index-pack$X -PROGRAMS += git-merge-index$X -PROGRAMS += git-merge-tree$X -PROGRAMS += git-mktag$X -PROGRAMS += git-mktree$X -PROGRAMS += git-pack-redundant$X -PROGRAMS += git-patch-id$X -PROGRAMS += git-shell$X -PROGRAMS += git-show-index$X -PROGRAMS += git-unpack-file$X -PROGRAMS += git-update-server-info$X -PROGRAMS += git-upload-pack$X -PROGRAMS += git-var$X + +# +# None right now: +# +# PROGRAMS += perf-fast-import$X # List built-in command $C whose implementation cmd_$C() is not in # builtin-$C.o but is linked in as part of some other command. -BUILT_INS += $(patsubst builtin-%.o,git-%$X,$(BUILTIN_OBJS)) - -BUILT_INS += git-cherry$X -BUILT_INS += git-cherry-pick$X -BUILT_INS += git-format-patch$X -BUILT_INS += git-fsck-objects$X -BUILT_INS += git-get-tar-commit-id$X -BUILT_INS += git-init$X -BUILT_INS += git-merge-subtree$X -BUILT_INS += git-peek-remote$X -BUILT_INS += git-repo-config$X -BUILT_INS += git-show$X -BUILT_INS += git-stage$X -BUILT_INS += git-status$X -BUILT_INS += git-whatchanged$X - -# what 'all' will build and 'install' will install, in gitexecdir +BUILT_INS += $(patsubst builtin-%.o,perf-%$X,$(BUILTIN_OBJS)) + +# +# None right now: +# +# BUILT_INS += perf-init $X + +# what 'all' will build and 'install' will install, in perfexecdir ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS) -# what 'all' will build but not install in gitexecdir -OTHER_PROGRAMS = git$X -ifndef NO_PERL -OTHER_PROGRAMS += gitweb/gitweb.cgi -endif +# what 'all' will build but not install in perfexecdir +OTHER_PROGRAMS = perf$X # Set paths to tools early so that they can be used for version tests. ifndef SHELL_PATH @@ -371,250 +298,34 @@ endif export PERL_PATH -LIB_FILE=libgit.a -XDIFF_LIB=xdiff/lib.a - -LIB_H += archive.h -LIB_H += attr.h -LIB_H += blob.h -LIB_H += builtin.h -LIB_H += cache.h -LIB_H += cache-tree.h -LIB_H += commit.h -LIB_H += compat/cygwin.h -LIB_H += compat/mingw.h -LIB_H += csum-file.h -LIB_H += decorate.h -LIB_H += delta.h -LIB_H += diffcore.h -LIB_H += diff.h -LIB_H += dir.h -LIB_H += fsck.h -LIB_H += git-compat-util.h -LIB_H += graph.h -LIB_H += grep.h -LIB_H += hash.h -LIB_H += help.h +LIB_FILE=libperf.a + +LIB_H += ../../include/linux/perf_counter.h LIB_H += levenshtein.h -LIB_H += list-objects.h -LIB_H += ll-merge.h -LIB_H += log-tree.h -LIB_H += mailmap.h -LIB_H += merge-recursive.h -LIB_H += object.h -LIB_H += pack.h -LIB_H += pack-refs.h -LIB_H += pack-revindex.h LIB_H += parse-options.h -LIB_H += patch-ids.h -LIB_H += pkt-line.h -LIB_H += progress.h LIB_H += quote.h -LIB_H += reflog-walk.h -LIB_H += refs.h -LIB_H += remote.h -LIB_H += rerere.h -LIB_H += revision.h -LIB_H += run-command.h -LIB_H += sha1-lookup.h -LIB_H += sideband.h -LIB_H += sigchain.h LIB_H += strbuf.h -LIB_H += string-list.h -LIB_H += tag.h -LIB_H += transport.h -LIB_H += tree.h -LIB_H += tree-walk.h -LIB_H += unpack-trees.h -LIB_H += userdiff.h -LIB_H += utf8.h -LIB_H += wt-status.h +LIB_H += run-command.h LIB_OBJS += abspath.o LIB_OBJS += alias.o -LIB_OBJS += alloc.o -LIB_OBJS += archive.o -LIB_OBJS += archive-tar.o -LIB_OBJS += archive-zip.o -LIB_OBJS += attr.o -LIB_OBJS += base85.o -LIB_OBJS += bisect.o -LIB_OBJS += blob.o -LIB_OBJS += branch.o -LIB_OBJS += bundle.o -LIB_OBJS += cache-tree.o -LIB_OBJS += color.o -LIB_OBJS += combine-diff.o -LIB_OBJS += commit.o LIB_OBJS += config.o -LIB_OBJS += connect.o -LIB_OBJS += convert.o -LIB_OBJS += copy.o -LIB_OBJS += csum-file.o LIB_OBJS += ctype.o -LIB_OBJS += date.o -LIB_OBJS += decorate.o -LIB_OBJS += diffcore-break.o -LIB_OBJS += diffcore-delta.o -LIB_OBJS += diffcore-order.o -LIB_OBJS += diffcore-pickaxe.o -LIB_OBJS += diffcore-rename.o -LIB_OBJS += diff-delta.o -LIB_OBJS += diff-lib.o -LIB_OBJS += diff-no-index.o -LIB_OBJS += diff.o -LIB_OBJS += dir.o -LIB_OBJS += editor.o -LIB_OBJS += entry.o -LIB_OBJS += environment.o LIB_OBJS += exec_cmd.o -LIB_OBJS += fsck.o -LIB_OBJS += graph.o -LIB_OBJS += grep.o -LIB_OBJS += hash.o LIB_OBJS += help.o -LIB_OBJS += ident.o LIB_OBJS += levenshtein.o -LIB_OBJS += list-objects.o -LIB_OBJS += ll-merge.o -LIB_OBJS += lockfile.o -LIB_OBJS += log-tree.o -LIB_OBJS += mailmap.o -LIB_OBJS += match-trees.o -LIB_OBJS += merge-file.o -LIB_OBJS += merge-recursive.o -LIB_OBJS += name-hash.o -LIB_OBJS += object.o -LIB_OBJS += pack-check.o -LIB_OBJS += pack-refs.o -LIB_OBJS += pack-revindex.o -LIB_OBJS += pack-write.o -LIB_OBJS += pager.o LIB_OBJS += parse-options.o -LIB_OBJS += patch-delta.o -LIB_OBJS += patch-ids.o LIB_OBJS += path.o -LIB_OBJS += pkt-line.o -LIB_OBJS += preload-index.o -LIB_OBJS += pretty.o -LIB_OBJS += progress.o -LIB_OBJS += quote.o -LIB_OBJS += reachable.o -LIB_OBJS += read-cache.o -LIB_OBJS += reflog-walk.o -LIB_OBJS += refs.o -LIB_OBJS += remote.o -LIB_OBJS += rerere.o -LIB_OBJS += revision.o LIB_OBJS += run-command.o -LIB_OBJS += server-info.o -LIB_OBJS += setup.o -LIB_OBJS += sha1-lookup.o -LIB_OBJS += sha1_file.o -LIB_OBJS += sha1_name.o -LIB_OBJS += shallow.o -LIB_OBJS += sideband.o -LIB_OBJS += sigchain.o +LIB_OBJS += quote.o LIB_OBJS += strbuf.o -LIB_OBJS += string-list.o -LIB_OBJS += symlinks.o -LIB_OBJS += tag.o -LIB_OBJS += trace.o -LIB_OBJS += transport.o -LIB_OBJS += tree-diff.o -LIB_OBJS += tree.o -LIB_OBJS += tree-walk.o -LIB_OBJS += unpack-trees.o LIB_OBJS += usage.o -LIB_OBJS += userdiff.o -LIB_OBJS += utf8.o -LIB_OBJS += walker.o LIB_OBJS += wrapper.o -LIB_OBJS += write_or_die.o -LIB_OBJS += ws.o -LIB_OBJS += wt-status.o -LIB_OBJS += xdiff-interface.o - -BUILTIN_OBJS += builtin-add.o -BUILTIN_OBJS += builtin-annotate.o -BUILTIN_OBJS += builtin-apply.o -BUILTIN_OBJS += builtin-archive.o -BUILTIN_OBJS += builtin-bisect--helper.o -BUILTIN_OBJS += builtin-blame.o -BUILTIN_OBJS += builtin-branch.o -BUILTIN_OBJS += builtin-bundle.o -BUILTIN_OBJS += builtin-cat-file.o -BUILTIN_OBJS += builtin-check-attr.o -BUILTIN_OBJS += builtin-check-ref-format.o -BUILTIN_OBJS += builtin-checkout-index.o -BUILTIN_OBJS += builtin-checkout.o -BUILTIN_OBJS += builtin-clean.o -BUILTIN_OBJS += builtin-clone.o -BUILTIN_OBJS += builtin-commit-tree.o -BUILTIN_OBJS += builtin-commit.o -BUILTIN_OBJS += builtin-config.o -BUILTIN_OBJS += builtin-count-objects.o -BUILTIN_OBJS += builtin-describe.o -BUILTIN_OBJS += builtin-diff-files.o -BUILTIN_OBJS += builtin-diff-index.o -BUILTIN_OBJS += builtin-diff-tree.o -BUILTIN_OBJS += builtin-diff.o -BUILTIN_OBJS += builtin-fast-export.o -BUILTIN_OBJS += builtin-fetch--tool.o -BUILTIN_OBJS += builtin-fetch-pack.o -BUILTIN_OBJS += builtin-fetch.o -BUILTIN_OBJS += builtin-fmt-merge-msg.o -BUILTIN_OBJS += builtin-for-each-ref.o -BUILTIN_OBJS += builtin-fsck.o -BUILTIN_OBJS += builtin-gc.o -BUILTIN_OBJS += builtin-grep.o + BUILTIN_OBJS += builtin-help.o -BUILTIN_OBJS += builtin-init-db.o -BUILTIN_OBJS += builtin-log.o -BUILTIN_OBJS += builtin-ls-files.o -BUILTIN_OBJS += builtin-ls-remote.o -BUILTIN_OBJS += builtin-ls-tree.o -BUILTIN_OBJS += builtin-mailinfo.o -BUILTIN_OBJS += builtin-mailsplit.o -BUILTIN_OBJS += builtin-merge.o -BUILTIN_OBJS += builtin-merge-base.o -BUILTIN_OBJS += builtin-merge-file.o -BUILTIN_OBJS += builtin-merge-ours.o -BUILTIN_OBJS += builtin-merge-recursive.o -BUILTIN_OBJS += builtin-mv.o -BUILTIN_OBJS += builtin-name-rev.o -BUILTIN_OBJS += builtin-pack-objects.o -BUILTIN_OBJS += builtin-pack-refs.o -BUILTIN_OBJS += builtin-prune-packed.o -BUILTIN_OBJS += builtin-prune.o -BUILTIN_OBJS += builtin-push.o -BUILTIN_OBJS += builtin-read-tree.o -BUILTIN_OBJS += builtin-receive-pack.o -BUILTIN_OBJS += builtin-reflog.o -BUILTIN_OBJS += builtin-remote.o -BUILTIN_OBJS += builtin-rerere.o -BUILTIN_OBJS += builtin-reset.o -BUILTIN_OBJS += builtin-rev-list.o -BUILTIN_OBJS += builtin-rev-parse.o -BUILTIN_OBJS += builtin-revert.o -BUILTIN_OBJS += builtin-rm.o -BUILTIN_OBJS += builtin-send-pack.o -BUILTIN_OBJS += builtin-shortlog.o -BUILTIN_OBJS += builtin-show-branch.o -BUILTIN_OBJS += builtin-show-ref.o -BUILTIN_OBJS += builtin-stripspace.o -BUILTIN_OBJS += builtin-symbolic-ref.o -BUILTIN_OBJS += builtin-tag.o -BUILTIN_OBJS += builtin-tar-tree.o -BUILTIN_OBJS += builtin-unpack-objects.o -BUILTIN_OBJS += builtin-update-index.o -BUILTIN_OBJS += builtin-update-ref.o -BUILTIN_OBJS += builtin-upload-archive.o -BUILTIN_OBJS += builtin-verify-pack.o -BUILTIN_OBJS += builtin-verify-tag.o -BUILTIN_OBJS += builtin-write-tree.o - -GITLIBS = $(LIB_FILE) $(XDIFF_LIB) +BUILTIN_OBJS += builtin-top.o + +PERFLIBS = $(LIB_FILE) EXTLIBS = # @@ -625,221 +336,6 @@ EXTLIBS = # because maintaining the nesting to match is a pain. If # we had "elif" things would have been much nicer... -ifeq ($(uname_S),Linux) - NO_STRLCPY = YesPlease - THREADED_DELTA_SEARCH = YesPlease -endif -ifeq ($(uname_S),GNU/kFreeBSD) - NO_STRLCPY = YesPlease - THREADED_DELTA_SEARCH = YesPlease -endif -ifeq ($(uname_S),UnixWare) - CC = cc - NEEDS_SOCKET = YesPlease - NEEDS_NSL = YesPlease - NEEDS_SSL_WITH_CRYPTO = YesPlease - NEEDS_LIBICONV = YesPlease - SHELL_PATH = /usr/local/bin/bash - NO_IPV6 = YesPlease - NO_HSTRERROR = YesPlease - BASIC_CFLAGS += -Kthread - BASIC_CFLAGS += -I/usr/local/include - BASIC_LDFLAGS += -L/usr/local/lib - INSTALL = ginstall - TAR = gtar - NO_STRCASESTR = YesPlease - NO_MEMMEM = YesPlease -endif -ifeq ($(uname_S),SCO_SV) - ifeq ($(uname_R),3.2) - CFLAGS = -O2 - endif - ifeq ($(uname_R),5) - CC = cc - BASIC_CFLAGS += -Kthread - endif - NEEDS_SOCKET = YesPlease - NEEDS_NSL = YesPlease - NEEDS_SSL_WITH_CRYPTO = YesPlease - NEEDS_LIBICONV = YesPlease - SHELL_PATH = /usr/bin/bash - NO_IPV6 = YesPlease - NO_HSTRERROR = YesPlease - BASIC_CFLAGS += -I/usr/local/include - BASIC_LDFLAGS += -L/usr/local/lib - NO_STRCASESTR = YesPlease - NO_MEMMEM = YesPlease - INSTALL = ginstall - TAR = gtar -endif -ifeq ($(uname_S),Darwin) - NEEDS_SSL_WITH_CRYPTO = YesPlease - NEEDS_LIBICONV = YesPlease - ifeq ($(shell expr "$(uname_R)" : '[15678]\.'),2) - OLD_ICONV = UnfortunatelyYes - endif - ifeq ($(shell expr "$(uname_R)" : '[15]\.'),2) - NO_STRLCPY = YesPlease - endif - NO_MEMMEM = YesPlease - THREADED_DELTA_SEARCH = YesPlease - USE_ST_TIMESPEC = YesPlease -endif -ifeq ($(uname_S),SunOS) - NEEDS_SOCKET = YesPlease - NEEDS_NSL = YesPlease - SHELL_PATH = /bin/bash - NO_STRCASESTR = YesPlease - NO_MEMMEM = YesPlease - NO_HSTRERROR = YesPlease - NO_MKDTEMP = YesPlease - OLD_ICONV = UnfortunatelyYes - ifeq ($(uname_R),5.8) - NO_UNSETENV = YesPlease - NO_SETENV = YesPlease - NO_C99_FORMAT = YesPlease - NO_STRTOUMAX = YesPlease - endif - ifeq ($(uname_R),5.9) - NO_UNSETENV = YesPlease - NO_SETENV = YesPlease - NO_C99_FORMAT = YesPlease - NO_STRTOUMAX = YesPlease - endif - INSTALL = ginstall - TAR = gtar - BASIC_CFLAGS += -D__EXTENSIONS__ -endif -ifeq ($(uname_O),Cygwin) - NO_D_TYPE_IN_DIRENT = YesPlease - NO_D_INO_IN_DIRENT = YesPlease - NO_STRCASESTR = YesPlease - NO_MEMMEM = YesPlease - NO_SYMLINK_HEAD = YesPlease - NEEDS_LIBICONV = YesPlease - NO_FAST_WORKING_DIRECTORY = UnfortunatelyYes - NO_TRUSTABLE_FILEMODE = UnfortunatelyYes - OLD_ICONV = UnfortunatelyYes - # There are conflicting reports about this. - # On some boxes NO_MMAP is needed, and not so elsewhere. - # Try commenting this out if you suspect MMAP is more efficient - NO_MMAP = YesPlease - NO_IPV6 = YesPlease - X = .exe -endif -ifeq ($(uname_S),FreeBSD) - NEEDS_LIBICONV = YesPlease - NO_MEMMEM = YesPlease - BASIC_CFLAGS += -I/usr/local/include - BASIC_LDFLAGS += -L/usr/local/lib - DIR_HAS_BSD_GROUP_SEMANTICS = YesPlease - USE_ST_TIMESPEC = YesPlease - THREADED_DELTA_SEARCH = YesPlease - ifeq ($(shell expr "$(uname_R)" : '4\.'),2) - PTHREAD_LIBS = -pthread - NO_UINTMAX_T = YesPlease - NO_STRTOUMAX = YesPlease - endif -endif -ifeq ($(uname_S),OpenBSD) - NO_STRCASESTR = YesPlease - NO_MEMMEM = YesPlease - NEEDS_LIBICONV = YesPlease - BASIC_CFLAGS += -I/usr/local/include - BASIC_LDFLAGS += -L/usr/local/lib - THREADED_DELTA_SEARCH = YesPlease -endif -ifeq ($(uname_S),NetBSD) - ifeq ($(shell expr "$(uname_R)" : '[01]\.'),2) - NEEDS_LIBICONV = YesPlease - endif - BASIC_CFLAGS += -I/usr/pkg/include - BASIC_LDFLAGS += -L/usr/pkg/lib $(CC_LD_DYNPATH)/usr/pkg/lib - THREADED_DELTA_SEARCH = YesPlease -endif -ifeq ($(uname_S),AIX) - NO_STRCASESTR=YesPlease - NO_MEMMEM = YesPlease - NO_MKDTEMP = YesPlease - NO_STRLCPY = YesPlease - NO_NSEC = YesPlease - FREAD_READS_DIRECTORIES = UnfortunatelyYes - INTERNAL_QSORT = UnfortunatelyYes - NEEDS_LIBICONV=YesPlease - BASIC_CFLAGS += -D_LARGE_FILES - ifneq ($(shell expr "$(uname_V)" : '[1234]'),1) - THREADED_DELTA_SEARCH = YesPlease - else - NO_PTHREADS = YesPlease - endif -endif -ifeq ($(uname_S),GNU) - # GNU/Hurd - NO_STRLCPY=YesPlease -endif -ifeq ($(uname_S),IRIX64) - NO_IPV6=YesPlease - NO_SETENV=YesPlease - NO_STRCASESTR=YesPlease - NO_MEMMEM = YesPlease - NO_STRLCPY = YesPlease - NO_SOCKADDR_STORAGE=YesPlease - SHELL_PATH=/usr/gnu/bin/bash - BASIC_CFLAGS += -DPATH_MAX=1024 - # for now, build 32-bit version - BASIC_LDFLAGS += -L/usr/lib32 -endif -ifeq ($(uname_S),HP-UX) - NO_IPV6=YesPlease - NO_SETENV=YesPlease - NO_STRCASESTR=YesPlease - NO_MEMMEM = YesPlease - NO_STRLCPY = YesPlease - NO_MKDTEMP = YesPlease - NO_UNSETENV = YesPlease - NO_HSTRERROR = YesPlease - NO_SYS_SELECT_H = YesPlease - SNPRINTF_RETURNS_BOGUS = YesPlease -endif -ifneq (,$(findstring CYGWIN,$(uname_S))) - COMPAT_OBJS += compat/cygwin.o -endif -ifneq (,$(findstring MINGW,$(uname_S))) - NO_PREAD = YesPlease - NO_OPENSSL = YesPlease - NO_CURL = YesPlease - NO_SYMLINK_HEAD = YesPlease - NO_IPV6 = YesPlease - NO_SETENV = YesPlease - NO_UNSETENV = YesPlease - NO_STRCASESTR = YesPlease - NO_STRLCPY = YesPlease - NO_MEMMEM = YesPlease - NO_PTHREADS = YesPlease - NEEDS_LIBICONV = YesPlease - OLD_ICONV = YesPlease - NO_C99_FORMAT = YesPlease - NO_STRTOUMAX = YesPlease - NO_MKDTEMP = YesPlease - SNPRINTF_RETURNS_BOGUS = YesPlease - NO_SVN_TESTS = YesPlease - NO_PERL_MAKEMAKER = YesPlease - RUNTIME_PREFIX = YesPlease - NO_POSIX_ONLY_PROGRAMS = YesPlease - NO_ST_BLOCKS_IN_STRUCT_STAT = YesPlease - NO_NSEC = YesPlease - USE_WIN32_MMAP = YesPlease - COMPAT_CFLAGS += -D__USE_MINGW_ACCESS -DNOGDI -Icompat -Icompat/regex -Icompat/fnmatch - COMPAT_CFLAGS += -DSNPRINTF_SIZE_CORR=1 - COMPAT_CFLAGS += -DSTRIP_EXTENSION=\".exe\" - COMPAT_OBJS += compat/mingw.o compat/fnmatch/fnmatch.o compat/regex/regex.o compat/winansi.o - EXTLIBS += -lws2_32 - X = .exe -endif -ifneq (,$(findstring arm,$(uname_M))) - ARM_SHA1 = YesPlease -endif - -include config.mak.autogen -include config.mak @@ -869,72 +365,12 @@ ifndef CC_LD_DYNPATH endif endif -ifdef NO_CURL - BASIC_CFLAGS += -DNO_CURL -else - ifdef CURLDIR - # Try "-Wl,-rpath=$(CURLDIR)/$(lib)" in such a case. - BASIC_CFLAGS += -I$(CURLDIR)/include - CURL_LIBCURL = -L$(CURLDIR)/$(lib) $(CC_LD_DYNPATH)$(CURLDIR)/$(lib) -lcurl - else - CURL_LIBCURL = -lcurl - endif - BUILTIN_OBJS += builtin-http-fetch.o - EXTLIBS += $(CURL_LIBCURL) - LIB_OBJS += http.o http-walker.o - curl_check := $(shell (echo 070908; curl-config --vernum) | sort -r | sed -ne 2p) - ifeq "$(curl_check)" "070908" - ifndef NO_EXPAT - PROGRAMS += git-http-push$X - endif - endif - ifndef NO_EXPAT - ifdef EXPATDIR - BASIC_CFLAGS += -I$(EXPATDIR)/include - EXPAT_LIBEXPAT = -L$(EXPATDIR)/$(lib) $(CC_LD_DYNPATH)$(EXPATDIR)/$(lib) -lexpat - else - EXPAT_LIBEXPAT = -lexpat - endif - endif -endif - ifdef ZLIB_PATH BASIC_CFLAGS += -I$(ZLIB_PATH)/include EXTLIBS += -L$(ZLIB_PATH)/$(lib) $(CC_LD_DYNPATH)$(ZLIB_PATH)/$(lib) endif EXTLIBS += -lz -ifndef NO_POSIX_ONLY_PROGRAMS - PROGRAMS += git-daemon$X - PROGRAMS += git-imap-send$X -endif -ifndef NO_OPENSSL - OPENSSL_LIBSSL = -lssl - ifdef OPENSSLDIR - BASIC_CFLAGS += -I$(OPENSSLDIR)/include - OPENSSL_LINK = -L$(OPENSSLDIR)/$(lib) $(CC_LD_DYNPATH)$(OPENSSLDIR)/$(lib) - else - OPENSSL_LINK = - endif -else - BASIC_CFLAGS += -DNO_OPENSSL - MOZILLA_SHA1 = 1 - OPENSSL_LIBSSL = -endif -ifdef NEEDS_SSL_WITH_CRYPTO - LIB_4_CRYPTO = $(OPENSSL_LINK) -lcrypto -lssl -else - LIB_4_CRYPTO = $(OPENSSL_LINK) -lcrypto -endif -ifdef NEEDS_LIBICONV - ifdef ICONVDIR - BASIC_CFLAGS += -I$(ICONVDIR)/include - ICONV_LINK = -L$(ICONVDIR)/$(lib) $(CC_LD_DYNPATH)$(ICONVDIR)/$(lib) - else - ICONV_LINK = - endif - EXTLIBS += $(ICONV_LINK) -liconv -endif ifdef NEEDS_SOCKET EXTLIBS += -lsocket endif @@ -977,10 +413,6 @@ ifdef NO_STRCASESTR COMPAT_CFLAGS += -DNO_STRCASESTR COMPAT_OBJS += compat/strcasestr.o endif -ifdef NO_STRLCPY - COMPAT_CFLAGS += -DNO_STRLCPY - COMPAT_OBJS += compat/strlcpy.o -endif ifdef NO_STRTOUMAX COMPAT_CFLAGS += -DNO_STRTOUMAX COMPAT_OBJS += compat/strtoumax.o @@ -1090,17 +522,6 @@ ifdef RUNTIME_PREFIX COMPAT_CFLAGS += -DRUNTIME_PREFIX endif -ifdef NO_PTHREADS - THREADED_DELTA_SEARCH = - BASIC_CFLAGS += -DNO_PTHREADS -else - EXTLIBS += $(PTHREAD_LIBS) -endif - -ifdef THREADED_DELTA_SEARCH - BASIC_CFLAGS += -DTHREADED_DELTA_SEARCH - LIB_OBJS += thread-utils.o -endif ifdef DIR_HAS_BSD_GROUP_SEMANTICS COMPAT_CFLAGS += -DDIR_HAS_BSD_GROUP_SEMANTICS endif @@ -1148,14 +569,14 @@ endif # Shell quote (do not use $(call) to accommodate ancient setups); SHA1_HEADER_SQ = $(subst ','\'',$(SHA1_HEADER)) -ETC_GITCONFIG_SQ = $(subst ','\'',$(ETC_GITCONFIG)) +ETC_PERFCONFIG_SQ = $(subst ','\'',$(ETC_PERFCONFIG)) DESTDIR_SQ = $(subst ','\'',$(DESTDIR)) bindir_SQ = $(subst ','\'',$(bindir)) bindir_relative_SQ = $(subst ','\'',$(bindir_relative)) mandir_SQ = $(subst ','\'',$(mandir)) infodir_SQ = $(subst ','\'',$(infodir)) -gitexecdir_SQ = $(subst ','\'',$(gitexecdir)) +perfexecdir_SQ = $(subst ','\'',$(perfexecdir)) template_dir_SQ = $(subst ','\'',$(template_dir)) htmldir_SQ = $(subst ','\'',$(htmldir)) prefix_SQ = $(subst ','\'',$(prefix)) @@ -1164,7 +585,7 @@ SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH)) PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH)) TCLTK_PATH_SQ = $(subst ','\'',$(TCLTK_PATH)) -LIBS = $(GITLIBS) $(EXTLIBS) +LIBS = $(PERFLIBS) $(EXTLIBS) BASIC_CFLAGS += -DSHA1_HEADER='$(SHA1_HEADER_SQ)' \ $(COMPAT_CFLAGS) @@ -1180,15 +601,15 @@ export TAR INSTALL DESTDIR SHELL_PATH SHELL = $(SHELL_PATH) -all:: shell_compatibility_test $(ALL_PROGRAMS) $(BUILT_INS) $(OTHER_PROGRAMS) GIT-BUILD-OPTIONS +all:: shell_compatibility_test $(ALL_PROGRAMS) $(BUILT_INS) $(OTHER_PROGRAMS) PERF-BUILD-OPTIONS ifneq (,$X) - $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) git$X)), test '$p' -ef '$p$X' || $(RM) '$p';) + $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), test '$p' -ef '$p$X' || $(RM) '$p';) endif all:: ifndef NO_TCLTK - $(QUIET_SUBDIR0)git-gui $(QUIET_SUBDIR1) gitexecdir='$(gitexec_instdir_SQ)' all - $(QUIET_SUBDIR0)gitk-git $(QUIET_SUBDIR1) all + $(QUIET_SUBDIR0)perf-gui $(QUIET_SUBDIR1) perfexecdir='$(perfexec_instdir_SQ)' all + $(QUIET_SUBDIR0)perfk-perf $(QUIET_SUBDIR1) all endif ifndef NO_PERL $(QUIET_SUBDIR0)perl $(QUIET_SUBDIR1) PERL_PATH='$(PERL_PATH_SQ)' prefix='$(prefix_SQ)' all @@ -1200,33 +621,33 @@ please_set_SHELL_PATH_to_a_more_modern_shell: shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell -strip: $(PROGRAMS) git$X - $(STRIP) $(STRIP_OPTS) $(PROGRAMS) git$X +strip: $(PROGRAMS) perf$X + $(STRIP) $(STRIP_OPTS) $(PROGRAMS) perf$X -git.o: git.c common-cmds.h GIT-CFLAGS - $(QUIET_CC)$(CC) -DGIT_VERSION='"$(GIT_VERSION)"' \ - '-DGIT_HTML_PATH="$(htmldir_SQ)"' \ +perf.o: perf.c common-cmds.h PERF-CFLAGS + $(QUIET_CC)$(CC) -DPERF_VERSION='"$(PERF_VERSION)"' \ + '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ $(ALL_CFLAGS) -c $(filter %.c,$^) -git$X: git.o $(BUILTIN_OBJS) $(GITLIBS) - $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ git.o \ +perf$X: perf.o $(BUILTIN_OBJS) $(PERFLIBS) + $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ perf.o \ $(BUILTIN_OBJS) $(ALL_LDFLAGS) $(LIBS) -builtin-help.o: builtin-help.c common-cmds.h GIT-CFLAGS +builtin-help.o: builtin-help.c common-cmds.h PERF-CFLAGS $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) \ - '-DGIT_HTML_PATH="$(htmldir_SQ)"' \ - '-DGIT_MAN_PATH="$(mandir_SQ)"' \ - '-DGIT_INFO_PATH="$(infodir_SQ)"' $< + '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ + '-DPERF_MAN_PATH="$(mandir_SQ)"' \ + '-DPERF_INFO_PATH="$(infodir_SQ)"' $< -$(BUILT_INS): git$X +$(BUILT_INS): perf$X $(QUIET_BUILT_IN)$(RM) $@ && \ - ln git$X $@ 2>/dev/null || \ - ln -s git$X $@ 2>/dev/null || \ - cp git$X $@ + ln perf$X $@ 2>/dev/null || \ + ln -s perf$X $@ 2>/dev/null || \ + cp perf$X $@ common-cmds.h: ./generate-cmdlist.sh command-list.txt -common-cmds.h: $(wildcard Documentation/git-*.txt) +common-cmds.h: $(wildcard Documentation/perf-*.txt) $(QUIET_GEN)./generate-cmdlist.sh > $@+ && mv $@+ $@ $(patsubst %.sh,%,$(SCRIPT_SH)) : % : %.sh @@ -1234,152 +655,55 @@ $(patsubst %.sh,%,$(SCRIPT_SH)) : % : %.sh sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \ -e 's|@SHELL_PATH@|$(SHELL_PATH_SQ)|' \ -e 's|@@PERL@@|$(PERL_PATH_SQ)|g' \ - -e 's/@@GIT_VERSION@@/$(GIT_VERSION)/g' \ + -e 's/@@PERF_VERSION@@/$(PERF_VERSION)/g' \ -e 's/@@NO_CURL@@/$(NO_CURL)/g' \ $@.sh >$@+ && \ chmod +x $@+ && \ mv $@+ $@ -ifndef NO_PERL -$(patsubst %.perl,%,$(SCRIPT_PERL)): perl/perl.mak - -perl/perl.mak: GIT-CFLAGS perl/Makefile perl/Makefile.PL - $(QUIET_SUBDIR0)perl $(QUIET_SUBDIR1) PERL_PATH='$(PERL_PATH_SQ)' prefix='$(prefix_SQ)' $(@F) - -$(patsubst %.perl,%,$(SCRIPT_PERL)): % : %.perl - $(QUIET_GEN)$(RM) $@ $@+ && \ - INSTLIBDIR=`MAKEFLAGS= $(MAKE) -C perl -s --no-print-directory instlibdir` && \ - sed -e '1{' \ - -e ' s|#!.*perl|#!$(PERL_PATH_SQ)|' \ - -e ' h' \ - -e ' s=.*=use lib (split(/:/, $$ENV{GITPERLLIB} || "@@INSTLIBDIR@@"));=' \ - -e ' H' \ - -e ' x' \ - -e '}' \ - -e 's|@@INSTLIBDIR@@|'"$$INSTLIBDIR"'|g' \ - -e 's/@@GIT_VERSION@@/$(GIT_VERSION)/g' \ - $@.perl >$@+ && \ - chmod +x $@+ && \ - mv $@+ $@ - -gitweb/gitweb.cgi: gitweb/gitweb.perl - $(QUIET_GEN)$(RM) $@ $@+ && \ - sed -e '1s|#!.*perl|#!$(PERL_PATH_SQ)|' \ - -e 's|++GIT_VERSION++|$(GIT_VERSION)|g' \ - -e 's|++GIT_BINDIR++|$(bindir)|g' \ - -e 's|++GITWEB_CONFIG++|$(GITWEB_CONFIG)|g' \ - -e 's|++GITWEB_CONFIG_SYSTEM++|$(GITWEB_CONFIG_SYSTEM)|g' \ - -e 's|++GITWEB_HOME_LINK_STR++|$(GITWEB_HOME_LINK_STR)|g' \ - -e 's|++GITWEB_SITENAME++|$(GITWEB_SITENAME)|g' \ - -e 's|++GITWEB_PROJECTROOT++|$(GITWEB_PROJECTROOT)|g' \ - -e 's|"++GITWEB_PROJECT_MAXDEPTH++"|$(GITWEB_PROJECT_MAXDEPTH)|g' \ - -e 's|++GITWEB_EXPORT_OK++|$(GITWEB_EXPORT_OK)|g' \ - -e 's|++GITWEB_STRICT_EXPORT++|$(GITWEB_STRICT_EXPORT)|g' \ - -e 's|++GITWEB_BASE_URL++|$(GITWEB_BASE_URL)|g' \ - -e 's|++GITWEB_LIST++|$(GITWEB_LIST)|g' \ - -e 's|++GITWEB_HOMETEXT++|$(GITWEB_HOMETEXT)|g' \ - -e 's|++GITWEB_CSS++|$(GITWEB_CSS)|g' \ - -e 's|++GITWEB_LOGO++|$(GITWEB_LOGO)|g' \ - -e 's|++GITWEB_FAVICON++|$(GITWEB_FAVICON)|g' \ - -e 's|++GITWEB_SITE_HEADER++|$(GITWEB_SITE_HEADER)|g' \ - -e 's|++GITWEB_SITE_FOOTER++|$(GITWEB_SITE_FOOTER)|g' \ - $< >$@+ && \ - chmod +x $@+ && \ - mv $@+ $@ - -git-instaweb: git-instaweb.sh gitweb/gitweb.cgi gitweb/gitweb.css - $(QUIET_GEN)$(RM) $@ $@+ && \ - sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \ - -e 's/@@GIT_VERSION@@/$(GIT_VERSION)/g' \ - -e 's/@@NO_CURL@@/$(NO_CURL)/g' \ - -e '/@@GITWEB_CGI@@/r gitweb/gitweb.cgi' \ - -e '/@@GITWEB_CGI@@/d' \ - -e '/@@GITWEB_CSS@@/r gitweb/gitweb.css' \ - -e '/@@GITWEB_CSS@@/d' \ - -e 's|@@PERL@@|$(PERL_PATH_SQ)|g' \ - $@.sh > $@+ && \ - chmod +x $@+ && \ - mv $@+ $@ -else # NO_PERL -$(patsubst %.perl,%,$(SCRIPT_PERL)) git-instaweb: % : unimplemented.sh - $(QUIET_GEN)$(RM) $@ $@+ && \ - sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \ - -e 's|@@REASON@@|NO_PERL=$(NO_PERL)|g' \ - unimplemented.sh >$@+ && \ - chmod +x $@+ && \ - mv $@+ $@ -endif # NO_PERL - configure: configure.ac $(QUIET_GEN)$(RM) $@ $<+ && \ - sed -e 's/@@GIT_VERSION@@/$(GIT_VERSION)/g' \ + sed -e 's/@@PERF_VERSION@@/$(PERF_VERSION)/g' \ $< > $<+ && \ autoconf -o $@ $<+ && \ $(RM) $<+ -# These can record GIT_VERSION -git.o git.spec \ +# These can record PERF_VERSION +perf.o perf.spec \ $(patsubst %.sh,%,$(SCRIPT_SH)) \ $(patsubst %.perl,%,$(SCRIPT_PERL)) \ - : GIT-VERSION-FILE + : PERF-VERSION-FILE -%.o: %.c GIT-CFLAGS +%.o: %.c PERF-CFLAGS $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) $< -%.s: %.c GIT-CFLAGS +%.s: %.c PERF-CFLAGS $(QUIET_CC)$(CC) -S $(ALL_CFLAGS) $< %.o: %.S $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) $< -exec_cmd.o: exec_cmd.c GIT-CFLAGS +exec_cmd.o: exec_cmd.c PERF-CFLAGS $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) \ - '-DGIT_EXEC_PATH="$(gitexecdir_SQ)"' \ + '-DPERF_EXEC_PATH="$(perfexecdir_SQ)"' \ '-DBINDIR="$(bindir_relative_SQ)"' \ '-DPREFIX="$(prefix_SQ)"' \ $< -builtin-init-db.o: builtin-init-db.c GIT-CFLAGS - $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DDEFAULT_GIT_TEMPLATE_DIR='"$(template_dir_SQ)"' $< - -config.o: config.c GIT-CFLAGS - $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DETC_GITCONFIG='"$(ETC_GITCONFIG_SQ)"' $< +builtin-init-db.o: builtin-init-db.c PERF-CFLAGS + $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DDEFAULT_PERF_TEMPLATE_DIR='"$(template_dir_SQ)"' $< -http.o: http.c GIT-CFLAGS - $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DGIT_USER_AGENT='"git/$(GIT_VERSION)"' $< +config.o: config.c PERF-CFLAGS + $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< -ifdef NO_EXPAT -http-walker.o: http-walker.c http.h GIT-CFLAGS - $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DNO_EXPAT $< -endif - -git-%$X: %.o $(GITLIBS) +perf-%$X: %.o $(PERFLIBS) $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) -git-imap-send$X: imap-send.o $(GITLIBS) - $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \ - $(LIBS) $(OPENSSL_LINK) $(OPENSSL_LIBSSL) - -http.o http-walker.o http-push.o transport.o: http.h - -git-http-push$X: revision.o http.o http-push.o $(GITLIBS) - $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \ - $(LIBS) $(CURL_LIBCURL) $(EXPAT_LIBEXPAT) - $(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H) -$(patsubst git-%$X,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h) +$(patsubst perf-%$X,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h) builtin-revert.o wt-status.o: wt-status.h $(LIB_FILE): $(LIB_OBJS) $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS) -XDIFF_OBJS=xdiff/xdiffi.o xdiff/xprepare.o xdiff/xutils.o xdiff/xemit.o \ - xdiff/xmerge.o xdiff/xpatience.o -$(XDIFF_OBJS): xdiff/xinclude.h xdiff/xmacros.h xdiff/xdiff.h xdiff/xtypes.h \ - xdiff/xutils.h xdiff/xprepare.h xdiff/xdiffi.h xdiff/xemit.h - -$(XDIFF_LIB): $(XDIFF_OBJS) - $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(XDIFF_OBJS) - - doc: $(MAKE) -C Documentation all @@ -1409,19 +733,19 @@ cscope: ### Detect prefix changes TRACK_CFLAGS = $(subst ','\'',$(ALL_CFLAGS)):\ - $(bindir_SQ):$(gitexecdir_SQ):$(template_dir_SQ):$(prefix_SQ) + $(bindir_SQ):$(perfexecdir_SQ):$(template_dir_SQ):$(prefix_SQ) -GIT-CFLAGS: .FORCE-GIT-CFLAGS +PERF-CFLAGS: .FORCE-PERF-CFLAGS @FLAGS='$(TRACK_CFLAGS)'; \ - if test x"$$FLAGS" != x"`cat GIT-CFLAGS 2>/dev/null`" ; then \ + if test x"$$FLAGS" != x"`cat PERF-CFLAGS 2>/dev/null`" ; then \ echo 1>&2 " * new build flags or prefix"; \ - echo "$$FLAGS" >GIT-CFLAGS; \ + echo "$$FLAGS" >PERF-CFLAGS; \ fi # We need to apply sq twice, once to protect from the shell -# that runs GIT-BUILD-OPTIONS, and then again to protect it +# that runs PERF-BUILD-OPTIONS, and then again to protect it # and the first level quoting from the shell that runs "echo". -GIT-BUILD-OPTIONS: .FORCE-GIT-BUILD-OPTIONS +PERF-BUILD-OPTIONS: .FORCE-PERF-BUILD-OPTIONS @echo SHELL_PATH=\''$(subst ','\'',$(SHELL_PATH_SQ))'\' >$@ @echo TAR=\''$(subst ','\'',$(subst ','\'',$(TAR)))'\' >>$@ @echo NO_CURL=\''$(subst ','\'',$(subst ','\'',$(NO_CURL)))'\' >>$@ @@ -1431,14 +755,14 @@ GIT-BUILD-OPTIONS: .FORCE-GIT-BUILD-OPTIONS ifndef NO_TCLTK TRACK_VARS = $(subst ','\'',-DTCLTK_PATH='$(TCLTK_PATH_SQ)') -GIT-GUI-VARS: .FORCE-GIT-GUI-VARS +PERF-GUI-VARS: .FORCE-PERF-GUI-VARS @VARS='$(TRACK_VARS)'; \ if test x"$$VARS" != x"`cat $@ 2>/dev/null`" ; then \ echo 1>&2 " * new Tcl/Tk interpreter location"; \ echo "$$VARS" >$@; \ fi -.PHONY: .FORCE-GIT-GUI-VARS +.PHONY: .FORCE-PERF-GUI-VARS endif ### Testing rules @@ -1476,7 +800,7 @@ test-parse-options$X: parse-options.o .PRECIOUS: $(patsubst test-%$X,test-%.o,$(TEST_PROGRAMS)) -test-%$X: test-%.o $(GITLIBS) +test-%$X: test-%.o $(PERFLIBS) $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) check-sha1:: test-sha1$X @@ -1506,40 +830,40 @@ template_instdir = $(prefix)/$(template_dir) endif export template_instdir -ifneq ($(filter /%,$(firstword $(gitexecdir))),) -gitexec_instdir = $(gitexecdir) +ifneq ($(filter /%,$(firstword $(perfexecdir))),) +perfexec_instdir = $(perfexecdir) else -gitexec_instdir = $(prefix)/$(gitexecdir) +perfexec_instdir = $(prefix)/$(perfexecdir) endif -gitexec_instdir_SQ = $(subst ','\'',$(gitexec_instdir)) -export gitexec_instdir +perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir)) +export perfexec_instdir install: all $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)' - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(gitexec_instdir_SQ)' - $(INSTALL) $(ALL_PROGRAMS) '$(DESTDIR_SQ)$(gitexec_instdir_SQ)' - $(INSTALL) git$X git-upload-pack$X git-receive-pack$X git-upload-archive$X git-shell$X git-cvsserver '$(DESTDIR_SQ)$(bindir_SQ)' + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' + $(INSTALL) $(ALL_PROGRAMS) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' + $(INSTALL) perf$X perf-upload-pack$X perf-receive-pack$X perf-upload-archive$X perf-shell$X perf-cvsserver '$(DESTDIR_SQ)$(bindir_SQ)' $(MAKE) -C templates DESTDIR='$(DESTDIR_SQ)' install $(MAKE) -C perl prefix='$(prefix_SQ)' DESTDIR='$(DESTDIR_SQ)' install ifndef NO_TCLTK - $(MAKE) -C gitk-git install - $(MAKE) -C git-gui gitexecdir='$(gitexec_instdir_SQ)' install + $(MAKE) -C perfk-perf install + $(MAKE) -C perf-gui perfexecdir='$(perfexec_instdir_SQ)' install endif ifneq (,$X) - $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) git$X)), $(RM) '$(DESTDIR_SQ)$(gitexec_instdir_SQ)/$p';) + $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), $(RM) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/$p';) endif bindir=$$(cd '$(DESTDIR_SQ)$(bindir_SQ)' && pwd) && \ - execdir=$$(cd '$(DESTDIR_SQ)$(gitexec_instdir_SQ)' && pwd) && \ - { $(RM) "$$execdir/git-add$X" && \ - ln "$$bindir/git$X" "$$execdir/git-add$X" 2>/dev/null || \ - cp "$$bindir/git$X" "$$execdir/git-add$X"; } && \ - { for p in $(filter-out git-add$X,$(BUILT_INS)); do \ + execdir=$$(cd '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' && pwd) && \ + { $(RM) "$$execdir/perf-add$X" && \ + ln "$$bindir/perf$X" "$$execdir/perf-add$X" 2>/dev/null || \ + cp "$$bindir/perf$X" "$$execdir/perf-add$X"; } && \ + { for p in $(filter-out perf-add$X,$(BUILT_INS)); do \ $(RM) "$$execdir/$$p" && \ - ln "$$execdir/git-add$X" "$$execdir/$$p" 2>/dev/null || \ - ln -s "git-add$X" "$$execdir/$$p" 2>/dev/null || \ - cp "$$execdir/git-add$X" "$$execdir/$$p" || exit; \ + ln "$$execdir/perf-add$X" "$$execdir/$$p" 2>/dev/null || \ + ln -s "perf-add$X" "$$execdir/$$p" 2>/dev/null || \ + cp "$$execdir/perf-add$X" "$$execdir/$$p" || exit; \ done } && \ - ./check_bindir "z$$bindir" "z$$execdir" "$$bindir/git-add$X" + ./check_bindir "z$$bindir" "z$$execdir" "$$bindir/perf-add$X" install-doc: $(MAKE) -C Documentation install @@ -1569,31 +893,31 @@ quick-install-html: ### Maintainer's dist rules -git.spec: git.spec.in - sed -e 's/@@VERSION@@/$(GIT_VERSION)/g' < $< > $@+ +perf.spec: perf.spec.in + sed -e 's/@@VERSION@@/$(PERF_VERSION)/g' < $< > $@+ mv $@+ $@ -GIT_TARNAME=git-$(GIT_VERSION) -dist: git.spec git-archive$(X) configure - ./git-archive --format=tar \ - --prefix=$(GIT_TARNAME)/ HEAD^{tree} > $(GIT_TARNAME).tar - @mkdir -p $(GIT_TARNAME) - @cp git.spec configure $(GIT_TARNAME) - @echo $(GIT_VERSION) > $(GIT_TARNAME)/version - @$(MAKE) -C git-gui TARDIR=../$(GIT_TARNAME)/git-gui dist-version - $(TAR) rf $(GIT_TARNAME).tar \ - $(GIT_TARNAME)/git.spec \ - $(GIT_TARNAME)/configure \ - $(GIT_TARNAME)/version \ - $(GIT_TARNAME)/git-gui/version - @$(RM) -r $(GIT_TARNAME) - gzip -f -9 $(GIT_TARNAME).tar +PERF_TARNAME=perf-$(PERF_VERSION) +dist: perf.spec perf-archive$(X) configure + ./perf-archive --format=tar \ + --prefix=$(PERF_TARNAME)/ HEAD^{tree} > $(PERF_TARNAME).tar + @mkdir -p $(PERF_TARNAME) + @cp perf.spec configure $(PERF_TARNAME) + @echo $(PERF_VERSION) > $(PERF_TARNAME)/version + @$(MAKE) -C perf-gui TARDIR=../$(PERF_TARNAME)/perf-gui dist-version + $(TAR) rf $(PERF_TARNAME).tar \ + $(PERF_TARNAME)/perf.spec \ + $(PERF_TARNAME)/configure \ + $(PERF_TARNAME)/version \ + $(PERF_TARNAME)/perf-gui/version + @$(RM) -r $(PERF_TARNAME) + gzip -f -9 $(PERF_TARNAME).tar rpm: dist - $(RPMBUILD) -ta $(GIT_TARNAME).tar.gz + $(RPMBUILD) -ta $(PERF_TARNAME).tar.gz -htmldocs = git-htmldocs-$(GIT_VERSION) -manpages = git-manpages-$(GIT_VERSION) +htmldocs = perf-htmldocs-$(PERF_VERSION) +manpages = perf-manpages-$(PERF_VERSION) dist-doc: $(RM) -r .doc-tmp-dir mkdir .doc-tmp-dir @@ -1618,51 +942,46 @@ distclean: clean $(RM) configure clean: - $(RM) *.o mozilla-sha1/*.o arm/*.o ppc/*.o compat/*.o xdiff/*.o \ - $(LIB_FILE) $(XDIFF_LIB) - $(RM) $(ALL_PROGRAMS) $(BUILT_INS) git$X + $(RM) *.o $(LIB_FILE) + $(RM) $(ALL_PROGRAMS) $(BUILT_INS) perf$X $(RM) $(TEST_PROGRAMS) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo common-cmds.h TAGS tags cscope* $(RM) -r autom4te.cache $(RM) config.log config.mak.autogen config.mak.append config.status config.cache - $(RM) -r $(GIT_TARNAME) .doc-tmp-dir - $(RM) $(GIT_TARNAME).tar.gz git-core_$(GIT_VERSION)-*.tar.gz + $(RM) -r $(PERF_TARNAME) .doc-tmp-dir + $(RM) $(PERF_TARNAME).tar.gz perf-core_$(PERF_VERSION)-*.tar.gz $(RM) $(htmldocs).tar.gz $(manpages).tar.gz $(MAKE) -C Documentation/ clean -ifndef NO_PERL - $(RM) gitweb/gitweb.cgi - $(MAKE) -C perl clean -endif $(MAKE) -C templates/ clean $(MAKE) -C t/ clean ifndef NO_TCLTK - $(MAKE) -C gitk-git clean - $(MAKE) -C git-gui clean + $(MAKE) -C perfk-perf clean + $(MAKE) -C perf-gui clean endif - $(RM) GIT-VERSION-FILE GIT-CFLAGS GIT-GUI-VARS GIT-BUILD-OPTIONS + $(RM) PERF-VERSION-FILE PERF-CFLAGS PERF-GUI-VARS PERF-BUILD-OPTIONS .PHONY: all install clean strip .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell -.PHONY: .FORCE-GIT-VERSION-FILE TAGS tags cscope .FORCE-GIT-CFLAGS -.PHONY: .FORCE-GIT-BUILD-OPTIONS +.PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS +.PHONY: .FORCE-PERF-BUILD-OPTIONS ### Check documentation # check-docs:: - @(for v in $(ALL_PROGRAMS) $(BUILT_INS) git gitk; \ + @(for v in $(ALL_PROGRAMS) $(BUILT_INS) perf perfk; \ do \ case "$$v" in \ - git-merge-octopus | git-merge-ours | git-merge-recursive | \ - git-merge-resolve | git-merge-subtree | \ - git-fsck-objects | git-init-db | \ - git-?*--?* ) continue ;; \ + perf-merge-octopus | perf-merge-ours | perf-merge-recursive | \ + perf-merge-resolve | perf-merge-subtree | \ + perf-fsck-objects | perf-init-db | \ + perf-?*--?* ) continue ;; \ esac ; \ test -f "Documentation/$$v.txt" || \ echo "no doc: $$v"; \ sed -e '/^#/d' command-list.txt | \ grep -q "^$$v[ ]" || \ case "$$v" in \ - git) ;; \ + perf) ;; \ *) echo "no link: $$v";; \ esac ; \ done; \ @@ -1670,37 +989,37 @@ check-docs:: sed -e '/^#/d' \ -e 's/[ ].*//' \ -e 's/^/listed /' command-list.txt; \ - ls -1 Documentation/git*txt | \ + ls -1 Documentation/perf*txt | \ sed -e 's|Documentation/|documented |' \ -e 's/\.txt//'; \ ) | while read how cmd; \ do \ case "$$how,$$cmd" in \ - *,git-citool | \ - *,git-gui | \ - *,git-help | \ - documented,gitattributes | \ - documented,gitignore | \ - documented,gitmodules | \ - documented,gitcli | \ - documented,git-tools | \ - documented,gitcore-tutorial | \ - documented,gitcvs-migration | \ - documented,gitdiffcore | \ - documented,gitglossary | \ - documented,githooks | \ - documented,gitrepository-layout | \ - documented,gittutorial | \ - documented,gittutorial-2 | \ + *,perf-citool | \ + *,perf-gui | \ + *,perf-help | \ + documented,perfattributes | \ + documented,perfignore | \ + documented,perfmodules | \ + documented,perfcli | \ + documented,perf-tools | \ + documented,perfcore-tutorial | \ + documented,perfcvs-migration | \ + documented,perfdiffcore | \ + documented,perfglossary | \ + documented,perfhooks | \ + documented,perfrepository-layout | \ + documented,perftutorial | \ + documented,perftutorial-2 | \ sentinel,not,matching,is,ok ) continue ;; \ esac; \ - case " $(ALL_PROGRAMS) $(BUILT_INS) git gitk " in \ + case " $(ALL_PROGRAMS) $(BUILT_INS) perf perfk " in \ *" $$cmd "*) ;; \ *) echo "removed but $$how: $$cmd" ;; \ esac; \ done ) | sort -### Make sure built-ins do not have dups and listed in git.c +### Make sure built-ins do not have dups and listed in perf.c # check-builtins:: ./check-builtins.sh diff --git a/Documentation/perf_counter/PERF-BUILD-OPTIONS b/Documentation/perf_counter/PERF-BUILD-OPTIONS new file mode 100644 index 0000000..46d8d6c --- /dev/null +++ b/Documentation/perf_counter/PERF-BUILD-OPTIONS @@ -0,0 +1,4 @@ +SHELL_PATH='/bin/sh' +TAR='tar' +NO_CURL='' +NO_PERL='' diff --git a/Documentation/perf_counter/PERF-CFLAGS b/Documentation/perf_counter/PERF-CFLAGS new file mode 100644 index 0000000..f24906c --- /dev/null +++ b/Documentation/perf_counter/PERF-CFLAGS @@ -0,0 +1 @@ +-g -O2 -Wall -DSHA1_HEADER='' : /home/mingo/bin:libexec/perf-core:share/perf-core/templates:/home/mingo diff --git a/Documentation/perf_counter/PERF-VERSION-FILE b/Documentation/perf_counter/PERF-VERSION-FILE new file mode 100644 index 0000000..328e244 --- /dev/null +++ b/Documentation/perf_counter/PERF-VERSION-FILE @@ -0,0 +1 @@ +PERF_VERSION = 0.0.1.PERF diff --git a/Documentation/perf_counter/PERF-VERSION-GEN b/Documentation/perf_counter/PERF-VERSION-GEN new file mode 100755 index 0000000..c561d15 --- /dev/null +++ b/Documentation/perf_counter/PERF-VERSION-GEN @@ -0,0 +1,42 @@ +#!/bin/sh + +GVF=PERF-VERSION-FILE +DEF_VER=v0.0.1.PERF + +LF=' +' + +# First see if there is a version file (included in release tarballs), +# then try git-describe, then default. +if test -f version +then + VN=$(cat version) || VN="$DEF_VER" +elif test -d .git -o -f .git && + VN=$(git describe --abbrev=4 HEAD 2>/dev/null) && + case "$VN" in + *$LF*) (exit 1) ;; + v[0-9]*) + git update-index -q --refresh + test -z "$(git diff-index --name-only HEAD --)" || + VN="$VN-dirty" ;; + esac +then + VN=$(echo "$VN" | sed -e 's/-/./g'); +else + VN="$DEF_VER" +fi + +VN=$(expr "$VN" : v*'\(.*\)') + +if test -r $GVF +then + VC=$(sed -e 's/^PERF_VERSION = //' <$GVF) +else + VC=unset +fi +test "$VN" = "$VC" || { + echo >&2 "PERF_VERSION = $VN" + echo "PERF_VERSION = $VN" >$GVF +} + + diff --git a/Documentation/perf_counter/abspath.c b/Documentation/perf_counter/abspath.c new file mode 100644 index 0000000..649f34f --- /dev/null +++ b/Documentation/perf_counter/abspath.c @@ -0,0 +1,117 @@ +#include "cache.h" + +/* + * Do not use this for inspecting *tracked* content. When path is a + * symlink to a directory, we do not want to say it is a directory when + * dealing with tracked content in the working tree. + */ +int is_directory(const char *path) +{ + struct stat st; + return (!stat(path, &st) && S_ISDIR(st.st_mode)); +} + +/* We allow "recursive" symbolic links. Only within reason, though. */ +#define MAXDEPTH 5 + +const char *make_absolute_path(const char *path) +{ + static char bufs[2][PATH_MAX + 1], *buf = bufs[0], *next_buf = bufs[1]; + char cwd[1024] = ""; + int buf_index = 1, len; + + int depth = MAXDEPTH; + char *last_elem = NULL; + struct stat st; + + if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX) + die ("Too long path: %.*s", 60, path); + + while (depth--) { + if (!is_directory(buf)) { + char *last_slash = strrchr(buf, '/'); + if (last_slash) { + *last_slash = '\0'; + last_elem = xstrdup(last_slash + 1); + } else { + last_elem = xstrdup(buf); + *buf = '\0'; + } + } + + if (*buf) { + if (!*cwd && !getcwd(cwd, sizeof(cwd))) + die ("Could not get current working directory"); + + if (chdir(buf)) + die ("Could not switch to '%s'", buf); + } + if (!getcwd(buf, PATH_MAX)) + die ("Could not get current working directory"); + + if (last_elem) { + int len = strlen(buf); + if (len + strlen(last_elem) + 2 > PATH_MAX) + die ("Too long path name: '%s/%s'", + buf, last_elem); + buf[len] = '/'; + strcpy(buf + len + 1, last_elem); + free(last_elem); + last_elem = NULL; + } + + if (!lstat(buf, &st) && S_ISLNK(st.st_mode)) { + len = readlink(buf, next_buf, PATH_MAX); + if (len < 0) + die ("Invalid symlink: %s", buf); + if (PATH_MAX <= len) + die("symbolic link too long: %s", buf); + next_buf[len] = '\0'; + buf = next_buf; + buf_index = 1 - buf_index; + next_buf = bufs[buf_index]; + } else + break; + } + + if (*cwd && chdir(cwd)) + die ("Could not change back to '%s'", cwd); + + return buf; +} + +static const char *get_pwd_cwd(void) +{ + static char cwd[PATH_MAX + 1]; + char *pwd; + struct stat cwd_stat, pwd_stat; + if (getcwd(cwd, PATH_MAX) == NULL) + return NULL; + pwd = getenv("PWD"); + if (pwd && strcmp(pwd, cwd)) { + stat(cwd, &cwd_stat); + if (!stat(pwd, &pwd_stat) && + pwd_stat.st_dev == cwd_stat.st_dev && + pwd_stat.st_ino == cwd_stat.st_ino) { + strlcpy(cwd, pwd, PATH_MAX); + } + } + return cwd; +} + +const char *make_nonrelative_path(const char *path) +{ + static char buf[PATH_MAX + 1]; + + if (is_absolute_path(path)) { + if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX) + die("Too long path: %.*s", 60, path); + } else { + const char *cwd = get_pwd_cwd(); + if (!cwd) + die("Cannot determine the current working directory"); + if (snprintf(buf, PATH_MAX, "%s/%s", cwd, path) >= PATH_MAX) + die("Too long path: %.*s", 60, path); + } + return buf; +} diff --git a/Documentation/perf_counter/alias.c b/Documentation/perf_counter/alias.c new file mode 100644 index 0000000..9b3dd2b --- /dev/null +++ b/Documentation/perf_counter/alias.c @@ -0,0 +1,77 @@ +#include "cache.h" + +static const char *alias_key; +static char *alias_val; + +static int alias_lookup_cb(const char *k, const char *v, void *cb) +{ + if (!prefixcmp(k, "alias.") && !strcmp(k+6, alias_key)) { + if (!v) + return config_error_nonbool(k); + alias_val = strdup(v); + return 0; + } + return 0; +} + +char *alias_lookup(const char *alias) +{ + alias_key = alias; + alias_val = NULL; + perf_config(alias_lookup_cb, NULL); + return alias_val; +} + +int split_cmdline(char *cmdline, const char ***argv) +{ + int src, dst, count = 0, size = 16; + char quoted = 0; + + *argv = malloc(sizeof(char*) * size); + + /* split alias_string */ + (*argv)[count++] = cmdline; + for (src = dst = 0; cmdline[src];) { + char c = cmdline[src]; + if (!quoted && isspace(c)) { + cmdline[dst++] = 0; + while (cmdline[++src] + && isspace(cmdline[src])) + ; /* skip */ + if (count >= size) { + size += 16; + *argv = realloc(*argv, sizeof(char*) * size); + } + (*argv)[count++] = cmdline + dst; + } else if (!quoted && (c == '\'' || c == '"')) { + quoted = c; + src++; + } else if (c == quoted) { + quoted = 0; + src++; + } else { + if (c == '\\' && quoted != '\'') { + src++; + c = cmdline[src]; + if (!c) { + free(*argv); + *argv = NULL; + return error("cmdline ends with \\"); + } + } + cmdline[dst++] = c; + src++; + } + } + + cmdline[dst] = 0; + + if (quoted) { + free(*argv); + *argv = NULL; + return error("unclosed quote"); + } + + return count; +} + diff --git a/Documentation/perf_counter/builtin-help.c b/Documentation/perf_counter/builtin-help.c new file mode 100644 index 0000000..125fcc2 --- /dev/null +++ b/Documentation/perf_counter/builtin-help.c @@ -0,0 +1,463 @@ +/* + * builtin-help.c + * + * Builtin help command + */ +#include "cache.h" +#include "builtin.h" +#include "exec_cmd.h" +#include "common-cmds.h" +#include "parse-options.h" +#include "run-command.h" +#include "help.h" + +static struct man_viewer_list { + struct man_viewer_list *next; + char name[FLEX_ARRAY]; +} *man_viewer_list; + +static struct man_viewer_info_list { + struct man_viewer_info_list *next; + const char *info; + char name[FLEX_ARRAY]; +} *man_viewer_info_list; + +enum help_format { + HELP_FORMAT_MAN, + HELP_FORMAT_INFO, + HELP_FORMAT_WEB, +}; + +static int show_all = 0; +static enum help_format help_format = HELP_FORMAT_MAN; +static struct option builtin_help_options[] = { + OPT_BOOLEAN('a', "all", &show_all, "print all available commands"), + OPT_SET_INT('m', "man", &help_format, "show man page", HELP_FORMAT_MAN), + OPT_SET_INT('w', "web", &help_format, "show manual in web browser", + HELP_FORMAT_WEB), + OPT_SET_INT('i', "info", &help_format, "show info page", + HELP_FORMAT_INFO), + OPT_END(), +}; + +static const char * const builtin_help_usage[] = { + "perf help [--all] [--man|--web|--info] [command]", + NULL +}; + +static enum help_format parse_help_format(const char *format) +{ + if (!strcmp(format, "man")) + return HELP_FORMAT_MAN; + if (!strcmp(format, "info")) + return HELP_FORMAT_INFO; + if (!strcmp(format, "web") || !strcmp(format, "html")) + return HELP_FORMAT_WEB; + die("unrecognized help format '%s'", format); +} + +static const char *get_man_viewer_info(const char *name) +{ + struct man_viewer_info_list *viewer; + + for (viewer = man_viewer_info_list; viewer; viewer = viewer->next) + { + if (!strcasecmp(name, viewer->name)) + return viewer->info; + } + return NULL; +} + +static int check_emacsclient_version(void) +{ + struct strbuf buffer = STRBUF_INIT; + struct child_process ec_process; + const char *argv_ec[] = { "emacsclient", "--version", NULL }; + int version; + + /* emacsclient prints its version number on stderr */ + memset(&ec_process, 0, sizeof(ec_process)); + ec_process.argv = argv_ec; + ec_process.err = -1; + ec_process.stdout_to_stderr = 1; + if (start_command(&ec_process)) { + fprintf(stderr, "Failed to start emacsclient.\n"); + return -1; + } + strbuf_read(&buffer, ec_process.err, 20); + close(ec_process.err); + + /* + * Don't bother checking return value, because "emacsclient --version" + * seems to always exits with code 1. + */ + finish_command(&ec_process); + + if (prefixcmp(buffer.buf, "emacsclient")) { + fprintf(stderr, "Failed to parse emacsclient version.\n"); + strbuf_release(&buffer); + return -1; + } + + strbuf_remove(&buffer, 0, strlen("emacsclient")); + version = atoi(buffer.buf); + + if (version < 22) { + fprintf(stderr, + "emacsclient version '%d' too old (< 22).\n", + version); + strbuf_release(&buffer); + return -1; + } + + strbuf_release(&buffer); + return 0; +} + +static void exec_woman_emacs(const char* path, const char *page) +{ + if (!check_emacsclient_version()) { + /* This works only with emacsclient version >= 22. */ + struct strbuf man_page = STRBUF_INIT; + + if (!path) + path = "emacsclient"; + strbuf_addf(&man_page, "(woman \"%s\")", page); + execlp(path, "emacsclient", "-e", man_page.buf, NULL); + warning("failed to exec '%s': %s", path, strerror(errno)); + } +} + +static void exec_man_konqueror(const char* path, const char *page) +{ + const char *display = getenv("DISPLAY"); + if (display && *display) { + struct strbuf man_page = STRBUF_INIT; + const char *filename = "kfmclient"; + + /* It's simpler to launch konqueror using kfmclient. */ + if (path) { + const char *file = strrchr(path, '/'); + if (file && !strcmp(file + 1, "konqueror")) { + char *new = strdup(path); + char *dest = strrchr(new, '/'); + + /* strlen("konqueror") == strlen("kfmclient") */ + strcpy(dest + 1, "kfmclient"); + path = new; + } + if (file) + filename = file; + } else + path = "kfmclient"; + strbuf_addf(&man_page, "man:%s(1)", page); + execlp(path, filename, "newTab", man_page.buf, NULL); + warning("failed to exec '%s': %s", path, strerror(errno)); + } +} + +static void exec_man_man(const char* path, const char *page) +{ + if (!path) + path = "man"; + execlp(path, "man", page, NULL); + warning("failed to exec '%s': %s", path, strerror(errno)); +} + +static void exec_man_cmd(const char *cmd, const char *page) +{ + struct strbuf shell_cmd = STRBUF_INIT; + strbuf_addf(&shell_cmd, "%s %s", cmd, page); + execl("/bin/sh", "sh", "-c", shell_cmd.buf, NULL); + warning("failed to exec '%s': %s", cmd, strerror(errno)); +} + +static void add_man_viewer(const char *name) +{ + struct man_viewer_list **p = &man_viewer_list; + size_t len = strlen(name); + + while (*p) + p = &((*p)->next); + *p = calloc(1, (sizeof(**p) + len + 1)); + strncpy((*p)->name, name, len); +} + +static int supported_man_viewer(const char *name, size_t len) +{ + return (!strncasecmp("man", name, len) || + !strncasecmp("woman", name, len) || + !strncasecmp("konqueror", name, len)); +} + +static void do_add_man_viewer_info(const char *name, + size_t len, + const char *value) +{ + struct man_viewer_info_list *new = calloc(1, sizeof(*new) + len + 1); + + strncpy(new->name, name, len); + new->info = strdup(value); + new->next = man_viewer_info_list; + man_viewer_info_list = new; +} + +static int add_man_viewer_path(const char *name, + size_t len, + const char *value) +{ + if (supported_man_viewer(name, len)) + do_add_man_viewer_info(name, len, value); + else + warning("'%s': path for unsupported man viewer.\n" + "Please consider using 'man..cmd' instead.", + name); + + return 0; +} + +static int add_man_viewer_cmd(const char *name, + size_t len, + const char *value) +{ + if (supported_man_viewer(name, len)) + warning("'%s': cmd for supported man viewer.\n" + "Please consider using 'man..path' instead.", + name); + else + do_add_man_viewer_info(name, len, value); + + return 0; +} + +static int add_man_viewer_info(const char *var, const char *value) +{ + const char *name = var + 4; + const char *subkey = strrchr(name, '.'); + + if (!subkey) + return error("Config with no key for man viewer: %s", name); + + if (!strcmp(subkey, ".path")) { + if (!value) + return config_error_nonbool(var); + return add_man_viewer_path(name, subkey - name, value); + } + if (!strcmp(subkey, ".cmd")) { + if (!value) + return config_error_nonbool(var); + return add_man_viewer_cmd(name, subkey - name, value); + } + + warning("'%s': unsupported man viewer sub key.", subkey); + return 0; +} + +static int perf_help_config(const char *var, const char *value, void *cb) +{ + if (!strcmp(var, "help.format")) { + if (!value) + return config_error_nonbool(var); + help_format = parse_help_format(value); + return 0; + } + if (!strcmp(var, "man.viewer")) { + if (!value) + return config_error_nonbool(var); + add_man_viewer(value); + return 0; + } + if (!prefixcmp(var, "man.")) + return add_man_viewer_info(var, value); + + return perf_default_config(var, value, cb); +} + +static struct cmdnames main_cmds, other_cmds; + +void list_common_cmds_help(void) +{ + int i, longest = 0; + + for (i = 0; i < ARRAY_SIZE(common_cmds); i++) { + if (longest < strlen(common_cmds[i].name)) + longest = strlen(common_cmds[i].name); + } + + puts("The most commonly used perf commands are:"); + for (i = 0; i < ARRAY_SIZE(common_cmds); i++) { + printf(" %s ", common_cmds[i].name); + mput_char(' ', longest - strlen(common_cmds[i].name)); + puts(common_cmds[i].help); + } +} + +static int is_perf_command(const char *s) +{ + return is_in_cmdlist(&main_cmds, s) || + is_in_cmdlist(&other_cmds, s); +} + +static const char *prepend(const char *prefix, const char *cmd) +{ + size_t pre_len = strlen(prefix); + size_t cmd_len = strlen(cmd); + char *p = malloc(pre_len + cmd_len + 1); + memcpy(p, prefix, pre_len); + strcpy(p + pre_len, cmd); + return p; +} + +static const char *cmd_to_page(const char *perf_cmd) +{ + if (!perf_cmd) + return "perf"; + else if (!prefixcmp(perf_cmd, "perf")) + return perf_cmd; + else if (is_perf_command(perf_cmd)) + return prepend("perf-", perf_cmd); + else + return prepend("perf", perf_cmd); +} + +static void setup_man_path(void) +{ + struct strbuf new_path = STRBUF_INIT; + const char *old_path = getenv("MANPATH"); + + /* We should always put ':' after our path. If there is no + * old_path, the ':' at the end will let 'man' to try + * system-wide paths after ours to find the manual page. If + * there is old_path, we need ':' as delimiter. */ + strbuf_addstr(&new_path, system_path(PERF_MAN_PATH)); + strbuf_addch(&new_path, ':'); + if (old_path) + strbuf_addstr(&new_path, old_path); + + setenv("MANPATH", new_path.buf, 1); + + strbuf_release(&new_path); +} + +static void exec_viewer(const char *name, const char *page) +{ + const char *info = get_man_viewer_info(name); + + if (!strcasecmp(name, "man")) + exec_man_man(info, page); + else if (!strcasecmp(name, "woman")) + exec_woman_emacs(info, page); + else if (!strcasecmp(name, "konqueror")) + exec_man_konqueror(info, page); + else if (info) + exec_man_cmd(info, page); + else + warning("'%s': unknown man viewer.", name); +} + +static void show_man_page(const char *perf_cmd) +{ + struct man_viewer_list *viewer; + const char *page = cmd_to_page(perf_cmd); + const char *fallback = getenv("PERF_MAN_VIEWER"); + + setup_man_path(); + for (viewer = man_viewer_list; viewer; viewer = viewer->next) + { + exec_viewer(viewer->name, page); /* will return when unable */ + } + if (fallback) + exec_viewer(fallback, page); + exec_viewer("man", page); + die("no man viewer handled the request"); +} + +static void show_info_page(const char *perf_cmd) +{ + const char *page = cmd_to_page(perf_cmd); + setenv("INFOPATH", system_path(PERF_INFO_PATH), 1); + execlp("info", "info", "perfman", page, NULL); +} + +static void get_html_page_path(struct strbuf *page_path, const char *page) +{ + struct stat st; + const char *html_path = system_path(PERF_HTML_PATH); + + /* Check that we have a perf documentation directory. */ + if (stat(mkpath("%s/perf.html", html_path), &st) + || !S_ISREG(st.st_mode)) + die("'%s': not a documentation directory.", html_path); + + strbuf_init(page_path, 0); + strbuf_addf(page_path, "%s/%s.html", html_path, page); +} + +/* + * If open_html is not defined in a platform-specific way (see for + * example compat/mingw.h), we use the script web--browse to display + * HTML. + */ +#ifndef open_html +void open_html(const char *path) +{ + execl_perf_cmd("web--browse", "-c", "help.browser", path, NULL); +} +#endif + +static void show_html_page(const char *perf_cmd) +{ + const char *page = cmd_to_page(perf_cmd); + struct strbuf page_path; /* it leaks but we exec bellow */ + + get_html_page_path(&page_path, page); + + open_html(page_path.buf); +} + +int cmd_help(int argc, const char **argv, const char *prefix) +{ + int nonperf; + const char *alias; + load_command_list("perf-", &main_cmds, &other_cmds); + + /* setup_perf_directory_gently(&nonperf); */ + perf_config(perf_help_config, NULL); + + argc = parse_options(argc, argv, builtin_help_options, + builtin_help_usage, 0); + + if (show_all) { + printf("usage: %s\n\n", perf_usage_string); + list_commands("perf commands", &main_cmds, &other_cmds); + printf("%s\n", perf_more_info_string); + return 0; + } + + if (!argv[0]) { + printf("usage: %s\n\n", perf_usage_string); + list_common_cmds_help(); + printf("\n%s\n", perf_more_info_string); + return 0; + } + + alias = alias_lookup(argv[0]); + if (alias && !is_perf_command(argv[0])) { + printf("`perf %s' is aliased to `%s'\n", argv[0], alias); + return 0; + } + + switch (help_format) { + case HELP_FORMAT_MAN: + show_man_page(argv[0]); + break; + case HELP_FORMAT_INFO: + show_info_page(argv[0]); + break; + case HELP_FORMAT_WEB: + show_html_page(argv[0]); + break; + } + + return 0; +} diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c new file mode 100644 index 0000000..9d2c769 --- /dev/null +++ b/Documentation/perf_counter/builtin-top.c @@ -0,0 +1,1411 @@ +/* + * kerneltop.c: show top kernel functions - performance counters showcase + + Build with: + + cc -O6 -Wall -c -o kerneltop.o kerneltop.c -lrt + + Sample output: + +------------------------------------------------------------------------------ + KernelTop: 2669 irqs/sec [NMI, cache-misses/cache-refs], (all, cpu: 2) +------------------------------------------------------------------------------ + + weight RIP kernel function + ______ ________________ _______________ + + 35.20 - ffffffff804ce74b : skb_copy_and_csum_dev + 33.00 - ffffffff804cb740 : sock_alloc_send_skb + 31.26 - ffffffff804ce808 : skb_push + 22.43 - ffffffff80510004 : tcp_established_options + 19.00 - ffffffff8027d250 : find_get_page + 15.76 - ffffffff804e4fc9 : eth_type_trans + 15.20 - ffffffff804d8baa : dst_release + 14.86 - ffffffff804cf5d8 : skb_release_head_state + 14.00 - ffffffff802217d5 : read_hpet + 12.00 - ffffffff804ffb7f : __ip_local_out + 11.97 - ffffffff804fc0c8 : ip_local_deliver_finish + 8.54 - ffffffff805001a3 : ip_queue_xmit + */ + +/* + * perfstat: /usr/bin/time -alike performance counter statistics utility + + It summarizes the counter events of all tasks (and child tasks), + covering all CPUs that the command (or workload) executes on. + It only counts the per-task events of the workload started, + independent of how many other tasks run on those CPUs. + + Sample output: + + $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null + + Performance counter stats for 'ls': + + 163516953 instructions + 2295 cache-misses + 2855182 branch-misses + */ + + /* + * Copyright (C) 2008, Red Hat Inc, Ingo Molnar + * + * Improvements and fixes by: + * + * Arjan van de Ven + * Yanmin Zhang + * Wu Fengguang + * Mike Galbraith + * Paul Mackerras + * + * Released under the GPL v2. (and only v2, not any later version) + */ + +#include "util.h" + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "../../include/linux/perf_counter.h" + + +/* + * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all + * counters in the current task. + */ +#define PR_TASK_PERF_COUNTERS_DISABLE 31 +#define PR_TASK_PERF_COUNTERS_ENABLE 32 + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +#define rdclock() \ +({ \ + struct timespec ts; \ + \ + clock_gettime(CLOCK_MONOTONIC, &ts); \ + ts.tv_sec * 1000000000ULL + ts.tv_nsec; \ +}) + +/* + * Pick up some kernel type conventions: + */ +#define __user +#define asmlinkage + +#ifdef __x86_64__ +#define __NR_perf_counter_open 295 +#define rmb() asm volatile("lfence" ::: "memory") +#define cpu_relax() asm volatile("rep; nop" ::: "memory"); +#endif + +#ifdef __i386__ +#define __NR_perf_counter_open 333 +#define rmb() asm volatile("lfence" ::: "memory") +#define cpu_relax() asm volatile("rep; nop" ::: "memory"); +#endif + +#ifdef __powerpc__ +#define __NR_perf_counter_open 319 +#define rmb() asm volatile ("sync" ::: "memory") +#define cpu_relax() asm volatile ("" ::: "memory"); +#endif + +#define unlikely(x) __builtin_expect(!!(x), 0) +#define min(x, y) ({ \ + typeof(x) _min1 = (x); \ + typeof(y) _min2 = (y); \ + (void) (&_min1 == &_min2); \ + _min1 < _min2 ? _min1 : _min2; }) + +asmlinkage int sys_perf_counter_open( + struct perf_counter_hw_event *hw_event_uptr __user, + pid_t pid, + int cpu, + int group_fd, + unsigned long flags) +{ + return syscall( + __NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags); +} + +#define MAX_COUNTERS 64 +#define MAX_NR_CPUS 256 + +#define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id)) + +static int run_perfstat = 0; +static int system_wide = 0; + +static int nr_counters = 0; +static __u64 event_id[MAX_COUNTERS] = { + EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), + EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), + EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), + EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), + + EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), + EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), + EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), + EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), +}; +static int default_interval = 100000; +static int event_count[MAX_COUNTERS]; +static int fd[MAX_NR_CPUS][MAX_COUNTERS]; + +static __u64 count_filter = 100; + +static int tid = -1; +static int profile_cpu = -1; +static int nr_cpus = 0; +static int nmi = 1; +static unsigned int realtime_prio = 0; +static int group = 0; +static unsigned int page_size; +static unsigned int mmap_pages = 16; +static int use_mmap = 0; +static int use_munmap = 0; + +static char *vmlinux; + +static char *sym_filter; +static unsigned long filter_start; +static unsigned long filter_end; + +static int delay_secs = 2; +static int zero; +static int dump_symtab; + +static int scale; + +struct source_line { + uint64_t EIP; + unsigned long count; + char *line; + struct source_line *next; +}; + +static struct source_line *lines; +static struct source_line **lines_tail; + +const unsigned int default_count[] = { + 1000000, + 1000000, + 10000, + 10000, + 1000000, + 10000, +}; + +static char *hw_event_names[] = { + "CPU cycles", + "instructions", + "cache references", + "cache misses", + "branches", + "branch misses", + "bus cycles", +}; + +static char *sw_event_names[] = { + "cpu clock ticks", + "task clock ticks", + "pagefaults", + "context switches", + "CPU migrations", + "minor faults", + "major faults", +}; + +struct event_symbol { + __u64 event; + char *symbol; +}; + +static struct event_symbol event_symbols[] = { + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cpu-cycles", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cycles", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), "instructions", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), "cache-references", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), "cache-misses", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branch-instructions", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branches", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES), "branch-misses", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES), "bus-cycles", }, + + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK), "cpu-clock", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), "task-clock", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "page-faults", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "faults", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN), "minor-faults", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ), "major-faults", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "context-switches", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "cs", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "cpu-migrations", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "migrations", }, +}; + +#define __PERF_COUNTER_FIELD(config, name) \ + ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT) + +#define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW) +#define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG) +#define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE) +#define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT) + +static void display_events_help(void) +{ + unsigned int i; + __u64 e; + + printf( + " -e EVENT --event=EVENT # symbolic-name abbreviations"); + + for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { + int type, id; + + e = event_symbols[i].event; + type = PERF_COUNTER_TYPE(e); + id = PERF_COUNTER_ID(e); + + printf("\n %d:%d: %-20s", + type, id, event_symbols[i].symbol); + } + + printf("\n" + " rNNN: raw PMU events (eventsel+umask)\n\n"); +} + +static void display_perfstat_help(void) +{ + printf( + "Usage: perfstat [] \n\n" + "PerfStat Options (up to %d event types can be specified):\n\n", + MAX_COUNTERS); + + display_events_help(); + + printf( + " -l # scale counter values\n" + " -a # system-wide collection\n"); + exit(0); +} + +static void display_help(void) +{ + if (run_perfstat) + return display_perfstat_help(); + + printf( + "Usage: kerneltop []\n" + " Or: kerneltop -S [] COMMAND [ARGS]\n\n" + "KernelTop Options (up to %d event types can be specified at once):\n\n", + MAX_COUNTERS); + + display_events_help(); + + printf( + " -S --stat # perfstat COMMAND\n" + " -a # system-wide collection (for perfstat)\n\n" + " -c CNT --count=CNT # event period to sample\n\n" + " -C CPU --cpu=CPU # CPU (-1 for all) [default: -1]\n" + " -p PID --pid=PID # PID of sampled task (-1 for all) [default: -1]\n\n" + " -l # show scale factor for RR events\n" + " -d delay --delay= # sampling/display delay [default: 2]\n" + " -f CNT --filter=CNT # min-event-count filter [default: 100]\n\n" + " -r prio --realtime= # event acquisition runs with SCHED_FIFO policy\n" + " -s symbol --symbol= # function to be showed annotated one-shot\n" + " -x path --vmlinux= # the vmlinux binary, required for -s use\n" + " -z --zero # zero counts after display\n" + " -D --dump_symtab # dump symbol table to stderr on startup\n" + " -m pages --mmap_pages= # number of mmap data pages\n" + " -M --mmap_info # print mmap info stream\n" + " -U --munmap_info # print munmap info stream\n" + ); + + exit(0); +} + +static char *event_name(int ctr) +{ + __u64 config = event_id[ctr]; + int type = PERF_COUNTER_TYPE(config); + int id = PERF_COUNTER_ID(config); + static char buf[32]; + + if (PERF_COUNTER_RAW(config)) { + sprintf(buf, "raw 0x%llx", PERF_COUNTER_CONFIG(config)); + return buf; + } + + switch (type) { + case PERF_TYPE_HARDWARE: + if (id < PERF_HW_EVENTS_MAX) + return hw_event_names[id]; + return "unknown-hardware"; + + case PERF_TYPE_SOFTWARE: + if (id < PERF_SW_EVENTS_MAX) + return sw_event_names[id]; + return "unknown-software"; + + default: + break; + } + + return "unknown"; +} + +/* + * Each event can have multiple symbolic names. + * Symbolic names are (almost) exactly matched. + */ +static __u64 match_event_symbols(char *str) +{ + __u64 config, id; + int type; + unsigned int i; + + if (sscanf(str, "r%llx", &config) == 1) + return config | PERF_COUNTER_RAW_MASK; + + if (sscanf(str, "%d:%llu", &type, &id) == 2) + return EID(type, id); + + for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { + if (!strncmp(str, event_symbols[i].symbol, + strlen(event_symbols[i].symbol))) + return event_symbols[i].event; + } + + return ~0ULL; +} + +static int parse_events(char *str) +{ + __u64 config; + +again: + if (nr_counters == MAX_COUNTERS) + return -1; + + config = match_event_symbols(str); + if (config == ~0ULL) + return -1; + + event_id[nr_counters] = config; + nr_counters++; + + str = strstr(str, ","); + if (str) { + str++; + goto again; + } + + return 0; +} + + +/* + * perfstat + */ + +char fault_here[1000000]; + +static void create_perfstat_counter(int counter) +{ + struct perf_counter_hw_event hw_event; + + memset(&hw_event, 0, sizeof(hw_event)); + hw_event.config = event_id[counter]; + hw_event.record_type = 0; + hw_event.nmi = 0; + if (scale) + hw_event.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING; + + if (system_wide) { + int cpu; + for (cpu = 0; cpu < nr_cpus; cpu ++) { + fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0); + if (fd[cpu][counter] < 0) { + printf("perfstat error: syscall returned with %d (%s)\n", + fd[cpu][counter], strerror(errno)); + exit(-1); + } + } + } else { + hw_event.inherit = 1; + hw_event.disabled = 1; + + fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0); + if (fd[0][counter] < 0) { + printf("perfstat error: syscall returned with %d (%s)\n", + fd[0][counter], strerror(errno)); + exit(-1); + } + } +} + +int do_perfstat(int argc, char *argv[]) +{ + unsigned long long t0, t1; + int counter; + ssize_t res; + int status; + int pid; + + if (!system_wide) + nr_cpus = 1; + + for (counter = 0; counter < nr_counters; counter++) + create_perfstat_counter(counter); + + argc -= optind; + argv += optind; + + if (!argc) + display_help(); + + /* + * Enable counters and exec the command: + */ + t0 = rdclock(); + prctl(PR_TASK_PERF_COUNTERS_ENABLE); + + if ((pid = fork()) < 0) + perror("failed to fork"); + if (!pid) { + if (execvp(argv[0], argv)) { + perror(argv[0]); + exit(-1); + } + } + while (wait(&status) >= 0) + ; + prctl(PR_TASK_PERF_COUNTERS_DISABLE); + t1 = rdclock(); + + fflush(stdout); + + fprintf(stderr, "\n"); + fprintf(stderr, " Performance counter stats for \'%s\':\n", + argv[0]); + fprintf(stderr, "\n"); + + for (counter = 0; counter < nr_counters; counter++) { + int cpu, nv; + __u64 count[3], single_count[3]; + int scaled; + + count[0] = count[1] = count[2] = 0; + nv = scale ? 3 : 1; + for (cpu = 0; cpu < nr_cpus; cpu ++) { + res = read(fd[cpu][counter], + single_count, nv * sizeof(__u64)); + assert(res == nv * sizeof(__u64)); + + count[0] += single_count[0]; + if (scale) { + count[1] += single_count[1]; + count[2] += single_count[2]; + } + } + + scaled = 0; + if (scale) { + if (count[2] == 0) { + fprintf(stderr, " %14s %-20s\n", + "", event_name(counter)); + continue; + } + if (count[2] < count[1]) { + scaled = 1; + count[0] = (unsigned long long) + ((double)count[0] * count[1] / count[2] + 0.5); + } + } + + if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK) || + event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) { + + double msecs = (double)count[0] / 1000000; + + fprintf(stderr, " %14.6f %-20s (msecs)", + msecs, event_name(counter)); + } else { + fprintf(stderr, " %14Ld %-20s (events)", + count[0], event_name(counter)); + } + if (scaled) + fprintf(stderr, " (scaled from %.2f%%)", + (double) count[2] / count[1] * 100); + fprintf(stderr, "\n"); + } + fprintf(stderr, "\n"); + fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n", + (double)(t1-t0)/1e6); + fprintf(stderr, "\n"); + + return 0; +} + +/* + * Symbols + */ + +static uint64_t min_ip; +static uint64_t max_ip = -1ll; + +struct sym_entry { + unsigned long long addr; + char *sym; + unsigned long count[MAX_COUNTERS]; + int skip; + struct source_line *source; +}; + +#define MAX_SYMS 100000 + +static int sym_table_count; + +struct sym_entry *sym_filter_entry; + +static struct sym_entry sym_table[MAX_SYMS]; + +static void show_details(struct sym_entry *sym); + +/* + * Ordering weight: count-1 * count-2 * ... / count-n + */ +static double sym_weight(const struct sym_entry *sym) +{ + double weight; + int counter; + + weight = sym->count[0]; + + for (counter = 1; counter < nr_counters-1; counter++) + weight *= sym->count[counter]; + + weight /= (sym->count[counter] + 1); + + return weight; +} + +static int compare(const void *__sym1, const void *__sym2) +{ + const struct sym_entry *sym1 = __sym1, *sym2 = __sym2; + + return sym_weight(sym1) < sym_weight(sym2); +} + +static long events; +static long userspace_events; +static const char CONSOLE_CLEAR[] = ""; + +static struct sym_entry tmp[MAX_SYMS]; + +static void print_sym_table(void) +{ + int i, printed; + int counter; + float events_per_sec = events/delay_secs; + float kevents_per_sec = (events-userspace_events)/delay_secs; + float sum_kevents = 0.0; + + events = userspace_events = 0; + memcpy(tmp, sym_table, sizeof(sym_table[0])*sym_table_count); + qsort(tmp, sym_table_count, sizeof(tmp[0]), compare); + + for (i = 0; i < sym_table_count && tmp[i].count[0]; i++) + sum_kevents += tmp[i].count[0]; + + write(1, CONSOLE_CLEAR, strlen(CONSOLE_CLEAR)); + + printf( +"------------------------------------------------------------------------------\n"); + printf( " KernelTop:%8.0f irqs/sec kernel:%4.1f%% [%s, ", + events_per_sec, + 100.0 - (100.0*((events_per_sec-kevents_per_sec)/events_per_sec)), + nmi ? "NMI" : "IRQ"); + + if (nr_counters == 1) + printf("%d ", event_count[0]); + + for (counter = 0; counter < nr_counters; counter++) { + if (counter) + printf("/"); + + printf("%s", event_name(counter)); + } + + printf( "], "); + + if (tid != -1) + printf(" (tid: %d", tid); + else + printf(" (all"); + + if (profile_cpu != -1) + printf(", cpu: %d)\n", profile_cpu); + else { + if (tid != -1) + printf(")\n"); + else + printf(", %d CPUs)\n", nr_cpus); + } + + printf("------------------------------------------------------------------------------\n\n"); + + if (nr_counters == 1) + printf(" events pcnt"); + else + printf(" weight events pcnt"); + + printf(" RIP kernel function\n" + " ______ ______ _____ ________________ _______________\n\n" + ); + + for (i = 0, printed = 0; i < sym_table_count; i++) { + float pcnt; + int count; + + if (printed <= 18 && tmp[i].count[0] >= count_filter) { + pcnt = 100.0 - (100.0*((sum_kevents-tmp[i].count[0])/sum_kevents)); + + if (nr_counters == 1) + printf("%19.2f - %4.1f%% - %016llx : %s\n", + sym_weight(tmp + i), + pcnt, tmp[i].addr, tmp[i].sym); + else + printf("%8.1f %10ld - %4.1f%% - %016llx : %s\n", + sym_weight(tmp + i), + tmp[i].count[0], + pcnt, tmp[i].addr, tmp[i].sym); + printed++; + } + /* + * Add decay to the counts: + */ + for (count = 0; count < nr_counters; count++) + sym_table[i].count[count] = zero ? 0 : sym_table[i].count[count] * 7 / 8; + } + + if (sym_filter_entry) + show_details(sym_filter_entry); + + { + struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; + + if (poll(&stdin_poll, 1, 0) == 1) { + printf("key pressed - exiting.\n"); + exit(0); + } + } +} + +static void *display_thread(void *arg) +{ + printf("KernelTop refresh period: %d seconds\n", delay_secs); + + while (!sleep(delay_secs)) + print_sym_table(); + + return NULL; +} + +static int read_symbol(FILE *in, struct sym_entry *s) +{ + static int filter_match = 0; + char *sym, stype; + char str[500]; + int rc, pos; + + rc = fscanf(in, "%llx %c %499s", &s->addr, &stype, str); + if (rc == EOF) + return -1; + + assert(rc == 3); + + /* skip until end of line: */ + pos = strlen(str); + do { + rc = fgetc(in); + if (rc == '\n' || rc == EOF || pos >= 499) + break; + str[pos] = rc; + pos++; + } while (1); + str[pos] = 0; + + sym = str; + + /* Filter out known duplicates and non-text symbols. */ + if (!strcmp(sym, "_text")) + return 1; + if (!min_ip && !strcmp(sym, "_stext")) + return 1; + if (!strcmp(sym, "_etext") || !strcmp(sym, "_sinittext")) + return 1; + if (stype != 'T' && stype != 't') + return 1; + if (!strncmp("init_module", sym, 11) || !strncmp("cleanup_module", sym, 14)) + return 1; + if (strstr(sym, "_text_start") || strstr(sym, "_text_end")) + return 1; + + s->sym = malloc(strlen(str)); + assert(s->sym); + + strcpy((char *)s->sym, str); + s->skip = 0; + + /* Tag events to be skipped. */ + if (!strcmp("default_idle", s->sym) || !strcmp("cpu_idle", s->sym)) + s->skip = 1; + else if (!strcmp("enter_idle", s->sym) || !strcmp("exit_idle", s->sym)) + s->skip = 1; + else if (!strcmp("mwait_idle", s->sym)) + s->skip = 1; + + if (filter_match == 1) { + filter_end = s->addr; + filter_match = -1; + if (filter_end - filter_start > 10000) { + printf("hm, too large filter symbol <%s> - skipping.\n", + sym_filter); + printf("symbol filter start: %016lx\n", filter_start); + printf(" end: %016lx\n", filter_end); + filter_end = filter_start = 0; + sym_filter = NULL; + sleep(1); + } + } + if (filter_match == 0 && sym_filter && !strcmp(s->sym, sym_filter)) { + filter_match = 1; + filter_start = s->addr; + } + + return 0; +} + +int compare_addr(const void *__sym1, const void *__sym2) +{ + const struct sym_entry *sym1 = __sym1, *sym2 = __sym2; + + return sym1->addr > sym2->addr; +} + +static void sort_symbol_table(void) +{ + int i, dups; + + do { + qsort(sym_table, sym_table_count, sizeof(sym_table[0]), compare_addr); + for (i = 0, dups = 0; i < sym_table_count; i++) { + if (sym_table[i].addr == sym_table[i+1].addr) { + sym_table[i+1].addr = -1ll; + dups++; + } + } + sym_table_count -= dups; + } while(dups); +} + +static void parse_symbols(void) +{ + struct sym_entry *last; + + FILE *kallsyms = fopen("/proc/kallsyms", "r"); + + if (!kallsyms) { + printf("Could not open /proc/kallsyms - no CONFIG_KALLSYMS_ALL=y?\n"); + exit(-1); + } + + while (!feof(kallsyms)) { + if (read_symbol(kallsyms, &sym_table[sym_table_count]) == 0) { + sym_table_count++; + assert(sym_table_count <= MAX_SYMS); + } + } + + sort_symbol_table(); + min_ip = sym_table[0].addr; + max_ip = sym_table[sym_table_count-1].addr; + last = sym_table + sym_table_count++; + + last->addr = -1ll; + last->sym = ""; + + if (filter_end) { + int count; + for (count=0; count < sym_table_count; count ++) { + if (!strcmp(sym_table[count].sym, sym_filter)) { + sym_filter_entry = &sym_table[count]; + break; + } + } + } + if (dump_symtab) { + int i; + + for (i = 0; i < sym_table_count; i++) + fprintf(stderr, "%llx %s\n", + sym_table[i].addr, sym_table[i].sym); + } +} + +/* + * Source lines + */ + +static void parse_vmlinux(char *filename) +{ + FILE *file; + char command[PATH_MAX*2]; + if (!filename) + return; + + sprintf(command, "objdump --start-address=0x%016lx --stop-address=0x%016lx -dS %s", filter_start, filter_end, filename); + + file = popen(command, "r"); + if (!file) + return; + + lines_tail = &lines; + while (!feof(file)) { + struct source_line *src; + size_t dummy = 0; + char *c; + + src = malloc(sizeof(struct source_line)); + assert(src != NULL); + memset(src, 0, sizeof(struct source_line)); + + if (getline(&src->line, &dummy, file) < 0) + break; + if (!src->line) + break; + + c = strchr(src->line, '\n'); + if (c) + *c = 0; + + src->next = NULL; + *lines_tail = src; + lines_tail = &src->next; + + if (strlen(src->line)>8 && src->line[8] == ':') + src->EIP = strtoull(src->line, NULL, 16); + if (strlen(src->line)>8 && src->line[16] == ':') + src->EIP = strtoull(src->line, NULL, 16); + } + pclose(file); +} + +static void record_precise_ip(uint64_t ip) +{ + struct source_line *line; + + for (line = lines; line; line = line->next) { + if (line->EIP == ip) + line->count++; + if (line->EIP > ip) + break; + } +} + +static void lookup_sym_in_vmlinux(struct sym_entry *sym) +{ + struct source_line *line; + char pattern[PATH_MAX]; + sprintf(pattern, "<%s>:", sym->sym); + + for (line = lines; line; line = line->next) { + if (strstr(line->line, pattern)) { + sym->source = line; + break; + } + } +} + +static void show_lines(struct source_line *line_queue, int line_queue_count) +{ + int i; + struct source_line *line; + + line = line_queue; + for (i = 0; i < line_queue_count; i++) { + printf("%8li\t%s\n", line->count, line->line); + line = line->next; + } +} + +#define TRACE_COUNT 3 + +static void show_details(struct sym_entry *sym) +{ + struct source_line *line; + struct source_line *line_queue = NULL; + int displayed = 0; + int line_queue_count = 0; + + if (!sym->source) + lookup_sym_in_vmlinux(sym); + if (!sym->source) + return; + + printf("Showing details for %s\n", sym->sym); + + line = sym->source; + while (line) { + if (displayed && strstr(line->line, ">:")) + break; + + if (!line_queue_count) + line_queue = line; + line_queue_count ++; + + if (line->count >= count_filter) { + show_lines(line_queue, line_queue_count); + line_queue_count = 0; + line_queue = NULL; + } else if (line_queue_count > TRACE_COUNT) { + line_queue = line_queue->next; + line_queue_count --; + } + + line->count = 0; + displayed++; + if (displayed > 300) + break; + line = line->next; + } +} + +/* + * Binary search in the histogram table and record the hit: + */ +static void record_ip(uint64_t ip, int counter) +{ + int left_idx, middle_idx, right_idx, idx; + unsigned long left, middle, right; + + record_precise_ip(ip); + + left_idx = 0; + right_idx = sym_table_count-1; + assert(ip <= max_ip && ip >= min_ip); + + while (left_idx + 1 < right_idx) { + middle_idx = (left_idx + right_idx) / 2; + + left = sym_table[ left_idx].addr; + middle = sym_table[middle_idx].addr; + right = sym_table[ right_idx].addr; + + if (!(left <= middle && middle <= right)) { + printf("%016lx...\n%016lx...\n%016lx\n", left, middle, right); + printf("%d %d %d\n", left_idx, middle_idx, right_idx); + } + assert(left <= middle && middle <= right); + if (!(left <= ip && ip <= right)) { + printf(" left: %016lx\n", left); + printf(" ip: %016lx\n", (unsigned long)ip); + printf("right: %016lx\n", right); + } + assert(left <= ip && ip <= right); + /* + * [ left .... target .... middle .... right ] + * => right := middle + */ + if (ip < middle) { + right_idx = middle_idx; + continue; + } + /* + * [ left .... middle ... target ... right ] + * => left := middle + */ + left_idx = middle_idx; + } + + idx = left_idx; + + if (!sym_table[idx].skip) + sym_table[idx].count[counter]++; + else events--; +} + +static void process_event(uint64_t ip, int counter) +{ + events++; + + if (ip < min_ip || ip > max_ip) { + userspace_events++; + return; + } + + record_ip(ip, counter); +} + +static void process_options(int argc, char *argv[]) +{ + int error = 0, counter; + + if (strstr(argv[0], "perfstat")) + run_perfstat = 1; + + for (;;) { + int option_index = 0; + /** Options for getopt */ + static struct option long_options[] = { + {"count", required_argument, NULL, 'c'}, + {"cpu", required_argument, NULL, 'C'}, + {"delay", required_argument, NULL, 'd'}, + {"dump_symtab", no_argument, NULL, 'D'}, + {"event", required_argument, NULL, 'e'}, + {"filter", required_argument, NULL, 'f'}, + {"group", required_argument, NULL, 'g'}, + {"help", no_argument, NULL, 'h'}, + {"nmi", required_argument, NULL, 'n'}, + {"mmap_info", no_argument, NULL, 'M'}, + {"mmap_pages", required_argument, NULL, 'm'}, + {"munmap_info", no_argument, NULL, 'U'}, + {"pid", required_argument, NULL, 'p'}, + {"realtime", required_argument, NULL, 'r'}, + {"scale", no_argument, NULL, 'l'}, + {"symbol", required_argument, NULL, 's'}, + {"stat", no_argument, NULL, 'S'}, + {"vmlinux", required_argument, NULL, 'x'}, + {"zero", no_argument, NULL, 'z'}, + {NULL, 0, NULL, 0 } + }; + int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMU", + long_options, &option_index); + if (c == -1) + break; + + switch (c) { + case 'a': system_wide = 1; break; + case 'c': default_interval = atoi(optarg); break; + case 'C': + /* CPU and PID are mutually exclusive */ + if (tid != -1) { + printf("WARNING: CPU switch overriding PID\n"); + sleep(1); + tid = -1; + } + profile_cpu = atoi(optarg); break; + case 'd': delay_secs = atoi(optarg); break; + case 'D': dump_symtab = 1; break; + + case 'e': error = parse_events(optarg); break; + + case 'f': count_filter = atoi(optarg); break; + case 'g': group = atoi(optarg); break; + case 'h': display_help(); break; + case 'l': scale = 1; break; + case 'n': nmi = atoi(optarg); break; + case 'p': + /* CPU and PID are mutually exclusive */ + if (profile_cpu != -1) { + printf("WARNING: PID switch overriding CPU\n"); + sleep(1); + profile_cpu = -1; + } + tid = atoi(optarg); break; + case 'r': realtime_prio = atoi(optarg); break; + case 's': sym_filter = strdup(optarg); break; + case 'S': run_perfstat = 1; break; + case 'x': vmlinux = strdup(optarg); break; + case 'z': zero = 1; break; + case 'm': mmap_pages = atoi(optarg); break; + case 'M': use_mmap = 1; break; + case 'U': use_munmap = 1; break; + default: error = 1; break; + } + } + if (error) + display_help(); + + if (!nr_counters) { + if (run_perfstat) + nr_counters = 8; + else { + nr_counters = 1; + event_id[0] = 0; + } + } + + for (counter = 0; counter < nr_counters; counter++) { + if (event_count[counter]) + continue; + + event_count[counter] = default_interval; + } +} + +struct mmap_data { + int counter; + void *base; + unsigned int mask; + unsigned int prev; +}; + +static unsigned int mmap_read_head(struct mmap_data *md) +{ + struct perf_counter_mmap_page *pc = md->base; + int head; + + head = pc->data_head; + rmb(); + + return head; +} + +struct timeval last_read, this_read; + +static void mmap_read(struct mmap_data *md) +{ + unsigned int head = mmap_read_head(md); + unsigned int old = md->prev; + unsigned char *data = md->base + page_size; + int diff; + + gettimeofday(&this_read, NULL); + + /* + * If we're further behind than half the buffer, there's a chance + * the writer will bite our tail and screw up the events under us. + * + * If we somehow ended up ahead of the head, we got messed up. + * + * In either case, truncate and restart at head. + */ + diff = head - old; + if (diff > md->mask / 2 || diff < 0) { + struct timeval iv; + unsigned long msecs; + + timersub(&this_read, &last_read, &iv); + msecs = iv.tv_sec*1000 + iv.tv_usec/1000; + + fprintf(stderr, "WARNING: failed to keep up with mmap data." + " Last read %lu msecs ago.\n", msecs); + + /* + * head points to a known good entry, start there. + */ + old = head; + } + + last_read = this_read; + + for (; old != head;) { + struct ip_event { + struct perf_event_header header; + __u64 ip; + __u32 pid, tid; + }; + struct mmap_event { + struct perf_event_header header; + __u32 pid, tid; + __u64 start; + __u64 len; + __u64 pgoff; + char filename[PATH_MAX]; + }; + + typedef union event_union { + struct perf_event_header header; + struct ip_event ip; + struct mmap_event mmap; + } event_t; + + event_t *event = (event_t *)&data[old & md->mask]; + + event_t event_copy; + + unsigned int size = event->header.size; + + /* + * Event straddles the mmap boundary -- header should always + * be inside due to u64 alignment of output. + */ + if ((old & md->mask) + size != ((old + size) & md->mask)) { + unsigned int offset = old; + unsigned int len = min(sizeof(*event), size), cpy; + void *dst = &event_copy; + + do { + cpy = min(md->mask + 1 - (offset & md->mask), len); + memcpy(dst, &data[offset & md->mask], cpy); + offset += cpy; + dst += cpy; + len -= cpy; + } while (len); + + event = &event_copy; + } + + old += size; + + if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) { + if (event->header.type & PERF_RECORD_IP) + process_event(event->ip.ip, md->counter); + } else { + switch (event->header.type) { + case PERF_EVENT_MMAP: + case PERF_EVENT_MUNMAP: + printf("%s: %Lu %Lu %Lu %s\n", + event->header.type == PERF_EVENT_MMAP + ? "mmap" : "munmap", + event->mmap.start, + event->mmap.len, + event->mmap.pgoff, + event->mmap.filename); + break; + } + } + } + + md->prev = old; +} + +int cmd_top(int argc, const char **argv, const char *prefix) +{ + struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; + struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; + struct perf_counter_hw_event hw_event; + pthread_t thread; + int i, counter, group_fd, nr_poll = 0; + unsigned int cpu; + int ret; + + page_size = sysconf(_SC_PAGE_SIZE); + + process_options(argc, argv); + + nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); + assert(nr_cpus <= MAX_NR_CPUS); + assert(nr_cpus >= 0); + + if (run_perfstat) + return do_perfstat(argc, argv); + + if (tid != -1 || profile_cpu != -1) + nr_cpus = 1; + + parse_symbols(); + if (vmlinux && sym_filter_entry) + parse_vmlinux(vmlinux); + + for (i = 0; i < nr_cpus; i++) { + group_fd = -1; + for (counter = 0; counter < nr_counters; counter++) { + + cpu = profile_cpu; + if (tid == -1 && profile_cpu == -1) + cpu = i; + + memset(&hw_event, 0, sizeof(hw_event)); + hw_event.config = event_id[counter]; + hw_event.irq_period = event_count[counter]; + hw_event.record_type = PERF_RECORD_IP | PERF_RECORD_TID; + hw_event.nmi = nmi; + hw_event.mmap = use_mmap; + hw_event.munmap = use_munmap; + + fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0); + if (fd[i][counter] < 0) { + int err = errno; + printf("kerneltop error: syscall returned with %d (%s)\n", + fd[i][counter], strerror(err)); + if (err == EPERM) + printf("Are you root?\n"); + exit(-1); + } + assert(fd[i][counter] >= 0); + fcntl(fd[i][counter], F_SETFL, O_NONBLOCK); + + /* + * First counter acts as the group leader: + */ + if (group && group_fd == -1) + group_fd = fd[i][counter]; + + event_array[nr_poll].fd = fd[i][counter]; + event_array[nr_poll].events = POLLIN; + nr_poll++; + + mmap_array[i][counter].counter = counter; + mmap_array[i][counter].prev = 0; + mmap_array[i][counter].mask = mmap_pages*page_size - 1; + mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size, + PROT_READ, MAP_SHARED, fd[i][counter], 0); + if (mmap_array[i][counter].base == MAP_FAILED) { + printf("kerneltop error: failed to mmap with %d (%s)\n", + errno, strerror(errno)); + exit(-1); + } + } + } + + if (pthread_create(&thread, NULL, display_thread, NULL)) { + printf("Could not create display thread.\n"); + exit(-1); + } + + if (realtime_prio) { + struct sched_param param; + + param.sched_priority = realtime_prio; + if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { + printf("Could not set realtime priority.\n"); + exit(-1); + } + } + + while (1) { + int hits = events; + + for (i = 0; i < nr_cpus; i++) { + for (counter = 0; counter < nr_counters; counter++) + mmap_read(&mmap_array[i][counter]); + } + + if (hits == events) + ret = poll(event_array, nr_poll, 100); + } + + return 0; +} diff --git a/Documentation/perf_counter/builtin.h b/Documentation/perf_counter/builtin.h new file mode 100644 index 0000000..4163744 --- /dev/null +++ b/Documentation/perf_counter/builtin.h @@ -0,0 +1,18 @@ +#ifndef BUILTIN_H +#define BUILTIN_H + +#include "util.h" +#include "strbuf.h" + +extern const char perf_version_string[]; +extern const char perf_usage_string[]; +extern const char perf_more_info_string[]; + +extern void list_common_cmds_help(void); +extern const char *help_unknown_cmd(const char *cmd); +extern void prune_packed_objects(int); +extern int read_line_with_nul(char *buf, int size, FILE *file); +extern int check_pager_config(const char *cmd); + +extern int cmd_top(int argc, const char **argv, const char *prefix); +#endif diff --git a/Documentation/perf_counter/cache.h b/Documentation/perf_counter/cache.h new file mode 100644 index 0000000..dc08564 --- /dev/null +++ b/Documentation/perf_counter/cache.h @@ -0,0 +1,97 @@ +#ifndef CACHE_H +#define CACHE_H + +#include "util.h" +#include "strbuf.h" + +#define PERF_DIR_ENVIRONMENT "PERF_DIR" +#define PERF_WORK_TREE_ENVIRONMENT "PERF_WORK_TREE" +#define DEFAULT_PERF_DIR_ENVIRONMENT ".perf" +#define DB_ENVIRONMENT "PERF_OBJECT_DIRECTORY" +#define INDEX_ENVIRONMENT "PERF_INDEX_FILE" +#define GRAFT_ENVIRONMENT "PERF_GRAFT_FILE" +#define TEMPLATE_DIR_ENVIRONMENT "PERF_TEMPLATE_DIR" +#define CONFIG_ENVIRONMENT "PERF_CONFIG" +#define EXEC_PATH_ENVIRONMENT "PERF_EXEC_PATH" +#define CEILING_DIRECTORIES_ENVIRONMENT "PERF_CEILING_DIRECTORIES" +#define PERFATTRIBUTES_FILE ".perfattributes" +#define INFOATTRIBUTES_FILE "info/attributes" +#define ATTRIBUTE_MACRO_PREFIX "[attr]" + +typedef int (*config_fn_t)(const char *, const char *, void *); +extern int perf_default_config(const char *, const char *, void *); +extern int perf_config_from_file(config_fn_t fn, const char *, void *); +extern int perf_config(config_fn_t fn, void *); +extern int perf_parse_ulong(const char *, unsigned long *); +extern int perf_config_int(const char *, const char *); +extern unsigned long perf_config_ulong(const char *, const char *); +extern int perf_config_bool_or_int(const char *, const char *, int *); +extern int perf_config_bool(const char *, const char *); +extern int perf_config_string(const char **, const char *, const char *); +extern int perf_config_set(const char *, const char *); +extern int perf_config_set_multivar(const char *, const char *, const char *, int); +extern int perf_config_rename_section(const char *, const char *); +extern const char *perf_etc_perfconfig(void); +extern int check_repository_format_version(const char *var, const char *value, void *cb); +extern int perf_config_system(void); +extern int perf_config_global(void); +extern int config_error_nonbool(const char *); +extern const char *config_exclusive_filename; + +#define MAX_PERFNAME (1000) +extern char perf_default_email[MAX_PERFNAME]; +extern char perf_default_name[MAX_PERFNAME]; +extern int user_ident_explicitly_given; + +extern const char *perf_log_output_encoding; +extern const char *perf_mailmap_file; + +/* IO helper functions */ +extern void maybe_flush_or_die(FILE *, const char *); +extern int copy_fd(int ifd, int ofd); +extern int copy_file(const char *dst, const char *src, int mode); +extern ssize_t read_in_full(int fd, void *buf, size_t count); +extern ssize_t write_in_full(int fd, const void *buf, size_t count); +extern void write_or_die(int fd, const void *buf, size_t count); +extern int write_or_whine(int fd, const void *buf, size_t count, const char *msg); +extern int write_or_whine_pipe(int fd, const void *buf, size_t count, const char *msg); +extern void fsync_or_die(int fd, const char *); + +/* pager.c */ +extern void setup_pager(void); +extern const char *pager_program; +extern int pager_in_use(void); +extern int pager_use_color; + +extern const char *editor_program; +extern const char *excludes_file; + +char *alias_lookup(const char *alias); +int split_cmdline(char *cmdline, const char ***argv); + +#define alloc_nr(x) (((x)+16)*3/2) + +/* + * Realloc the buffer pointed at by variable 'x' so that it can hold + * at least 'nr' entries; the number of entries currently allocated + * is 'alloc', using the standard growing factor alloc_nr() macro. + * + * DO NOT USE any expression with side-effect for 'x' or 'alloc'. + */ +#define ALLOC_GROW(x, nr, alloc) \ + do { \ + if ((nr) > alloc) { \ + if (alloc_nr(alloc) < (nr)) \ + alloc = (nr); \ + else \ + alloc = alloc_nr(alloc); \ + x = xrealloc((x), alloc * sizeof(*(x))); \ + } \ + } while(0) + + +static inline int is_absolute_path(const char *path) +{ + return path[0] == '/'; +} +#endif /* CACHE_H */ diff --git a/Documentation/perf_counter/command-list.txt b/Documentation/perf_counter/command-list.txt new file mode 100644 index 0000000..1eab365 --- /dev/null +++ b/Documentation/perf_counter/command-list.txt @@ -0,0 +1,4 @@ +# List of known perf commands. +# command name category [deprecated] [common] +perf-top mainporcelain common + diff --git a/Documentation/perf_counter/config.c b/Documentation/perf_counter/config.c new file mode 100644 index 0000000..672d539 --- /dev/null +++ b/Documentation/perf_counter/config.c @@ -0,0 +1,966 @@ +/* + * GIT - The information manager from hell + * + * Copyright (C) Linus Torvalds, 2005 + * Copyright (C) Johannes Schindelin, 2005 + * + */ +#include "util.h" +#include "cache.h" +#include "exec_cmd.h" + +#define MAXNAME (256) + +static FILE *config_file; +static const char *config_file_name; +static int config_linenr; +static int config_file_eof; +static int zlib_compression_seen; + +const char *config_exclusive_filename = NULL; + +static int get_next_char(void) +{ + int c; + FILE *f; + + c = '\n'; + if ((f = config_file) != NULL) { + c = fgetc(f); + if (c == '\r') { + /* DOS like systems */ + c = fgetc(f); + if (c != '\n') { + ungetc(c, f); + c = '\r'; + } + } + if (c == '\n') + config_linenr++; + if (c == EOF) { + config_file_eof = 1; + c = '\n'; + } + } + return c; +} + +static char *parse_value(void) +{ + static char value[1024]; + int quote = 0, comment = 0, len = 0, space = 0; + + for (;;) { + int c = get_next_char(); + if (len >= sizeof(value) - 1) + return NULL; + if (c == '\n') { + if (quote) + return NULL; + value[len] = 0; + return value; + } + if (comment) + continue; + if (isspace(c) && !quote) { + space = 1; + continue; + } + if (!quote) { + if (c == ';' || c == '#') { + comment = 1; + continue; + } + } + if (space) { + if (len) + value[len++] = ' '; + space = 0; + } + if (c == '\\') { + c = get_next_char(); + switch (c) { + case '\n': + continue; + case 't': + c = '\t'; + break; + case 'b': + c = '\b'; + break; + case 'n': + c = '\n'; + break; + /* Some characters escape as themselves */ + case '\\': case '"': + break; + /* Reject unknown escape sequences */ + default: + return NULL; + } + value[len++] = c; + continue; + } + if (c == '"') { + quote = 1-quote; + continue; + } + value[len++] = c; + } +} + +static inline int iskeychar(int c) +{ + return isalnum(c) || c == '-'; +} + +static int get_value(config_fn_t fn, void *data, char *name, unsigned int len) +{ + int c; + char *value; + + /* Get the full name */ + for (;;) { + c = get_next_char(); + if (config_file_eof) + break; + if (!iskeychar(c)) + break; + name[len++] = tolower(c); + if (len >= MAXNAME) + return -1; + } + name[len] = 0; + while (c == ' ' || c == '\t') + c = get_next_char(); + + value = NULL; + if (c != '\n') { + if (c != '=') + return -1; + value = parse_value(); + if (!value) + return -1; + } + return fn(name, value, data); +} + +static int get_extended_base_var(char *name, int baselen, int c) +{ + do { + if (c == '\n') + return -1; + c = get_next_char(); + } while (isspace(c)); + + /* We require the format to be '[base "extension"]' */ + if (c != '"') + return -1; + name[baselen++] = '.'; + + for (;;) { + int c = get_next_char(); + if (c == '\n') + return -1; + if (c == '"') + break; + if (c == '\\') { + c = get_next_char(); + if (c == '\n') + return -1; + } + name[baselen++] = c; + if (baselen > MAXNAME / 2) + return -1; + } + + /* Final ']' */ + if (get_next_char() != ']') + return -1; + return baselen; +} + +static int get_base_var(char *name) +{ + int baselen = 0; + + for (;;) { + int c = get_next_char(); + if (config_file_eof) + return -1; + if (c == ']') + return baselen; + if (isspace(c)) + return get_extended_base_var(name, baselen, c); + if (!iskeychar(c) && c != '.') + return -1; + if (baselen > MAXNAME / 2) + return -1; + name[baselen++] = tolower(c); + } +} + +static int perf_parse_file(config_fn_t fn, void *data) +{ + int comment = 0; + int baselen = 0; + static char var[MAXNAME]; + + /* U+FEFF Byte Order Mark in UTF8 */ + static const unsigned char *utf8_bom = (unsigned char *) "\xef\xbb\xbf"; + const unsigned char *bomptr = utf8_bom; + + for (;;) { + int c = get_next_char(); + if (bomptr && *bomptr) { + /* We are at the file beginning; skip UTF8-encoded BOM + * if present. Sane editors won't put this in on their + * own, but e.g. Windows Notepad will do it happily. */ + if ((unsigned char) c == *bomptr) { + bomptr++; + continue; + } else { + /* Do not tolerate partial BOM. */ + if (bomptr != utf8_bom) + break; + /* No BOM at file beginning. Cool. */ + bomptr = NULL; + } + } + if (c == '\n') { + if (config_file_eof) + return 0; + comment = 0; + continue; + } + if (comment || isspace(c)) + continue; + if (c == '#' || c == ';') { + comment = 1; + continue; + } + if (c == '[') { + baselen = get_base_var(var); + if (baselen <= 0) + break; + var[baselen++] = '.'; + var[baselen] = 0; + continue; + } + if (!isalpha(c)) + break; + var[baselen] = tolower(c); + if (get_value(fn, data, var, baselen+1) < 0) + break; + } + die("bad config file line %d in %s", config_linenr, config_file_name); +} + +static int parse_unit_factor(const char *end, unsigned long *val) +{ + if (!*end) + return 1; + else if (!strcasecmp(end, "k")) { + *val *= 1024; + return 1; + } + else if (!strcasecmp(end, "m")) { + *val *= 1024 * 1024; + return 1; + } + else if (!strcasecmp(end, "g")) { + *val *= 1024 * 1024 * 1024; + return 1; + } + return 0; +} + +static int perf_parse_long(const char *value, long *ret) +{ + if (value && *value) { + char *end; + long val = strtol(value, &end, 0); + unsigned long factor = 1; + if (!parse_unit_factor(end, &factor)) + return 0; + *ret = val * factor; + return 1; + } + return 0; +} + +int perf_parse_ulong(const char *value, unsigned long *ret) +{ + if (value && *value) { + char *end; + unsigned long val = strtoul(value, &end, 0); + if (!parse_unit_factor(end, &val)) + return 0; + *ret = val; + return 1; + } + return 0; +} + +static void die_bad_config(const char *name) +{ + if (config_file_name) + die("bad config value for '%s' in %s", name, config_file_name); + die("bad config value for '%s'", name); +} + +int perf_config_int(const char *name, const char *value) +{ + long ret = 0; + if (!perf_parse_long(value, &ret)) + die_bad_config(name); + return ret; +} + +unsigned long perf_config_ulong(const char *name, const char *value) +{ + unsigned long ret; + if (!perf_parse_ulong(value, &ret)) + die_bad_config(name); + return ret; +} + +int perf_config_bool_or_int(const char *name, const char *value, int *is_bool) +{ + *is_bool = 1; + if (!value) + return 1; + if (!*value) + return 0; + if (!strcasecmp(value, "true") || !strcasecmp(value, "yes") || !strcasecmp(value, "on")) + return 1; + if (!strcasecmp(value, "false") || !strcasecmp(value, "no") || !strcasecmp(value, "off")) + return 0; + *is_bool = 0; + return perf_config_int(name, value); +} + +int perf_config_bool(const char *name, const char *value) +{ + int discard; + return !!perf_config_bool_or_int(name, value, &discard); +} + +int perf_config_string(const char **dest, const char *var, const char *value) +{ + if (!value) + return config_error_nonbool(var); + *dest = strdup(value); + return 0; +} + +static int perf_default_core_config(const char *var, const char *value) +{ + /* Add other config variables here and to Documentation/config.txt. */ + return 0; +} + +int perf_default_config(const char *var, const char *value, void *dummy) +{ + if (!prefixcmp(var, "core.")) + return perf_default_core_config(var, value); + + /* Add other config variables here and to Documentation/config.txt. */ + return 0; +} + +int perf_config_from_file(config_fn_t fn, const char *filename, void *data) +{ + int ret; + FILE *f = fopen(filename, "r"); + + ret = -1; + if (f) { + config_file = f; + config_file_name = filename; + config_linenr = 1; + config_file_eof = 0; + ret = perf_parse_file(fn, data); + fclose(f); + config_file_name = NULL; + } + return ret; +} + +const char *perf_etc_perfconfig(void) +{ + static const char *system_wide; + if (!system_wide) + system_wide = system_path(ETC_PERFCONFIG); + return system_wide; +} + +static int perf_env_bool(const char *k, int def) +{ + const char *v = getenv(k); + return v ? perf_config_bool(k, v) : def; +} + +int perf_config_system(void) +{ + return !perf_env_bool("PERF_CONFIG_NOSYSTEM", 0); +} + +int perf_config_global(void) +{ + return !perf_env_bool("PERF_CONFIG_NOGLOBAL", 0); +} + +int perf_config(config_fn_t fn, void *data) +{ + int ret = 0, found = 0; + char *repo_config = NULL; + const char *home = NULL; + + /* Setting $PERF_CONFIG makes perf read _only_ the given config file. */ + if (config_exclusive_filename) + return perf_config_from_file(fn, config_exclusive_filename, data); + if (perf_config_system() && !access(perf_etc_perfconfig(), R_OK)) { + ret += perf_config_from_file(fn, perf_etc_perfconfig(), + data); + found += 1; + } + + home = getenv("HOME"); + if (perf_config_global() && home) { + char *user_config = strdup(mkpath("%s/.perfconfig", home)); + if (!access(user_config, R_OK)) { + ret += perf_config_from_file(fn, user_config, data); + found += 1; + } + free(user_config); + } + + repo_config = perf_pathdup("config"); + if (!access(repo_config, R_OK)) { + ret += perf_config_from_file(fn, repo_config, data); + found += 1; + } + free(repo_config); + if (found == 0) + return -1; + return ret; +} + +/* + * Find all the stuff for perf_config_set() below. + */ + +#define MAX_MATCHES 512 + +static struct { + int baselen; + char* key; + int do_not_match; + regex_t* value_regex; + int multi_replace; + size_t offset[MAX_MATCHES]; + enum { START, SECTION_SEEN, SECTION_END_SEEN, KEY_SEEN } state; + int seen; +} store; + +static int matches(const char* key, const char* value) +{ + return !strcmp(key, store.key) && + (store.value_regex == NULL || + (store.do_not_match ^ + !regexec(store.value_regex, value, 0, NULL, 0))); +} + +static int store_aux(const char* key, const char* value, void *cb) +{ + const char *ep; + size_t section_len; + + switch (store.state) { + case KEY_SEEN: + if (matches(key, value)) { + if (store.seen == 1 && store.multi_replace == 0) { + warning("%s has multiple values", key); + } else if (store.seen >= MAX_MATCHES) { + error("too many matches for %s", key); + return 1; + } + + store.offset[store.seen] = ftell(config_file); + store.seen++; + } + break; + case SECTION_SEEN: + /* + * What we are looking for is in store.key (both + * section and var), and its section part is baselen + * long. We found key (again, both section and var). + * We would want to know if this key is in the same + * section as what we are looking for. We already + * know we are in the same section as what should + * hold store.key. + */ + ep = strrchr(key, '.'); + section_len = ep - key; + + if ((section_len != store.baselen) || + memcmp(key, store.key, section_len+1)) { + store.state = SECTION_END_SEEN; + break; + } + + /* + * Do not increment matches: this is no match, but we + * just made sure we are in the desired section. + */ + store.offset[store.seen] = ftell(config_file); + /* fallthru */ + case SECTION_END_SEEN: + case START: + if (matches(key, value)) { + store.offset[store.seen] = ftell(config_file); + store.state = KEY_SEEN; + store.seen++; + } else { + if (strrchr(key, '.') - key == store.baselen && + !strncmp(key, store.key, store.baselen)) { + store.state = SECTION_SEEN; + store.offset[store.seen] = ftell(config_file); + } + } + } + return 0; +} + +static int write_error(const char *filename) +{ + error("failed to write new configuration file %s", filename); + + /* Same error code as "failed to rename". */ + return 4; +} + +static int store_write_section(int fd, const char* key) +{ + const char *dot; + int i, success; + struct strbuf sb = STRBUF_INIT; + + dot = memchr(key, '.', store.baselen); + if (dot) { + strbuf_addf(&sb, "[%.*s \"", (int)(dot - key), key); + for (i = dot - key + 1; i < store.baselen; i++) { + if (key[i] == '"' || key[i] == '\\') + strbuf_addch(&sb, '\\'); + strbuf_addch(&sb, key[i]); + } + strbuf_addstr(&sb, "\"]\n"); + } else { + strbuf_addf(&sb, "[%.*s]\n", store.baselen, key); + } + + success = write_in_full(fd, sb.buf, sb.len) == sb.len; + strbuf_release(&sb); + + return success; +} + +static int store_write_pair(int fd, const char* key, const char* value) +{ + int i, success; + int length = strlen(key + store.baselen + 1); + const char *quote = ""; + struct strbuf sb = STRBUF_INIT; + + /* + * Check to see if the value needs to be surrounded with a dq pair. + * Note that problematic characters are always backslash-quoted; this + * check is about not losing leading or trailing SP and strings that + * follow beginning-of-comment characters (i.e. ';' and '#') by the + * configuration parser. + */ + if (value[0] == ' ') + quote = "\""; + for (i = 0; value[i]; i++) + if (value[i] == ';' || value[i] == '#') + quote = "\""; + if (i && value[i - 1] == ' ') + quote = "\""; + + strbuf_addf(&sb, "\t%.*s = %s", + length, key + store.baselen + 1, quote); + + for (i = 0; value[i]; i++) + switch (value[i]) { + case '\n': + strbuf_addstr(&sb, "\\n"); + break; + case '\t': + strbuf_addstr(&sb, "\\t"); + break; + case '"': + case '\\': + strbuf_addch(&sb, '\\'); + default: + strbuf_addch(&sb, value[i]); + break; + } + strbuf_addf(&sb, "%s\n", quote); + + success = write_in_full(fd, sb.buf, sb.len) == sb.len; + strbuf_release(&sb); + + return success; +} + +static ssize_t find_beginning_of_line(const char* contents, size_t size, + size_t offset_, int* found_bracket) +{ + size_t equal_offset = size, bracket_offset = size; + ssize_t offset; + +contline: + for (offset = offset_-2; offset > 0 + && contents[offset] != '\n'; offset--) + switch (contents[offset]) { + case '=': equal_offset = offset; break; + case ']': bracket_offset = offset; break; + } + if (offset > 0 && contents[offset-1] == '\\') { + offset_ = offset; + goto contline; + } + if (bracket_offset < equal_offset) { + *found_bracket = 1; + offset = bracket_offset+1; + } else + offset++; + + return offset; +} + +int perf_config_set(const char* key, const char* value) +{ + return perf_config_set_multivar(key, value, NULL, 0); +} + +/* + * If value==NULL, unset in (remove from) config, + * if value_regex!=NULL, disregard key/value pairs where value does not match. + * if multi_replace==0, nothing, or only one matching key/value is replaced, + * else all matching key/values (regardless how many) are removed, + * before the new pair is written. + * + * Returns 0 on success. + * + * This function does this: + * + * - it locks the config file by creating ".perf/config.lock" + * + * - it then parses the config using store_aux() as validator to find + * the position on the key/value pair to replace. If it is to be unset, + * it must be found exactly once. + * + * - the config file is mmap()ed and the part before the match (if any) is + * written to the lock file, then the changed part and the rest. + * + * - the config file is removed and the lock file rename()d to it. + * + */ +int perf_config_set_multivar(const char* key, const char* value, + const char* value_regex, int multi_replace) +{ + int i, dot; + int fd = -1, in_fd; + int ret; + char* config_filename; + const char* last_dot = strrchr(key, '.'); + + if (config_exclusive_filename) + config_filename = strdup(config_exclusive_filename); + else + config_filename = perf_pathdup("config"); + + /* + * Since "key" actually contains the section name and the real + * key name separated by a dot, we have to know where the dot is. + */ + + if (last_dot == NULL) { + error("key does not contain a section: %s", key); + ret = 2; + goto out_free; + } + store.baselen = last_dot - key; + + store.multi_replace = multi_replace; + + /* + * Validate the key and while at it, lower case it for matching. + */ + store.key = malloc(strlen(key) + 1); + dot = 0; + for (i = 0; key[i]; i++) { + unsigned char c = key[i]; + if (c == '.') + dot = 1; + /* Leave the extended basename untouched.. */ + if (!dot || i > store.baselen) { + if (!iskeychar(c) || (i == store.baselen+1 && !isalpha(c))) { + error("invalid key: %s", key); + free(store.key); + ret = 1; + goto out_free; + } + c = tolower(c); + } else if (c == '\n') { + error("invalid key (newline): %s", key); + free(store.key); + ret = 1; + goto out_free; + } + store.key[i] = c; + } + store.key[i] = 0; + + /* + * If .perf/config does not exist yet, write a minimal version. + */ + in_fd = open(config_filename, O_RDONLY); + if ( in_fd < 0 ) { + free(store.key); + + if ( ENOENT != errno ) { + error("opening %s: %s", config_filename, + strerror(errno)); + ret = 3; /* same as "invalid config file" */ + goto out_free; + } + /* if nothing to unset, error out */ + if (value == NULL) { + ret = 5; + goto out_free; + } + + store.key = (char*)key; + if (!store_write_section(fd, key) || + !store_write_pair(fd, key, value)) + goto write_err_out; + } else { + struct stat st; + char* contents; + size_t contents_sz, copy_begin, copy_end; + int i, new_line = 0; + + if (value_regex == NULL) + store.value_regex = NULL; + else { + if (value_regex[0] == '!') { + store.do_not_match = 1; + value_regex++; + } else + store.do_not_match = 0; + + store.value_regex = (regex_t*)malloc(sizeof(regex_t)); + if (regcomp(store.value_regex, value_regex, + REG_EXTENDED)) { + error("invalid pattern: %s", value_regex); + free(store.value_regex); + ret = 6; + goto out_free; + } + } + + store.offset[0] = 0; + store.state = START; + store.seen = 0; + + /* + * After this, store.offset will contain the *end* offset + * of the last match, or remain at 0 if no match was found. + * As a side effect, we make sure to transform only a valid + * existing config file. + */ + if (perf_config_from_file(store_aux, config_filename, NULL)) { + error("invalid config file %s", config_filename); + free(store.key); + if (store.value_regex != NULL) { + regfree(store.value_regex); + free(store.value_regex); + } + ret = 3; + goto out_free; + } + + free(store.key); + if (store.value_regex != NULL) { + regfree(store.value_regex); + free(store.value_regex); + } + + /* if nothing to unset, or too many matches, error out */ + if ((store.seen == 0 && value == NULL) || + (store.seen > 1 && multi_replace == 0)) { + ret = 5; + goto out_free; + } + + fstat(in_fd, &st); + contents_sz = xsize_t(st.st_size); + contents = mmap(NULL, contents_sz, PROT_READ, + MAP_PRIVATE, in_fd, 0); + close(in_fd); + + if (store.seen == 0) + store.seen = 1; + + for (i = 0, copy_begin = 0; i < store.seen; i++) { + if (store.offset[i] == 0) { + store.offset[i] = copy_end = contents_sz; + } else if (store.state != KEY_SEEN) { + copy_end = store.offset[i]; + } else + copy_end = find_beginning_of_line( + contents, contents_sz, + store.offset[i]-2, &new_line); + + if (copy_end > 0 && contents[copy_end-1] != '\n') + new_line = 1; + + /* write the first part of the config */ + if (copy_end > copy_begin) { + if (write_in_full(fd, contents + copy_begin, + copy_end - copy_begin) < + copy_end - copy_begin) + goto write_err_out; + if (new_line && + write_in_full(fd, "\n", 1) != 1) + goto write_err_out; + } + copy_begin = store.offset[i]; + } + + /* write the pair (value == NULL means unset) */ + if (value != NULL) { + if (store.state == START) { + if (!store_write_section(fd, key)) + goto write_err_out; + } + if (!store_write_pair(fd, key, value)) + goto write_err_out; + } + + /* write the rest of the config */ + if (copy_begin < contents_sz) + if (write_in_full(fd, contents + copy_begin, + contents_sz - copy_begin) < + contents_sz - copy_begin) + goto write_err_out; + + munmap(contents, contents_sz); + } + + ret = 0; + +out_free: + free(config_filename); + return ret; + +write_err_out: + goto out_free; + +} + +static int section_name_match (const char *buf, const char *name) +{ + int i = 0, j = 0, dot = 0; + for (; buf[i] && buf[i] != ']'; i++) { + if (!dot && isspace(buf[i])) { + dot = 1; + if (name[j++] != '.') + break; + for (i++; isspace(buf[i]); i++) + ; /* do nothing */ + if (buf[i] != '"') + break; + continue; + } + if (buf[i] == '\\' && dot) + i++; + else if (buf[i] == '"' && dot) { + for (i++; isspace(buf[i]); i++) + ; /* do_nothing */ + break; + } + if (buf[i] != name[j++]) + break; + } + return (buf[i] == ']' && name[j] == 0); +} + +/* if new_name == NULL, the section is removed instead */ +int perf_config_rename_section(const char *old_name, const char *new_name) +{ + int ret = 0, remove = 0; + char *config_filename; + int out_fd; + char buf[1024]; + + if (config_exclusive_filename) + config_filename = strdup(config_exclusive_filename); + else + config_filename = perf_pathdup("config"); + if (out_fd < 0) { + ret = error("could not lock config file %s", config_filename); + goto out; + } + + if (!(config_file = fopen(config_filename, "rb"))) { + /* no config file means nothing to rename, no error */ + goto unlock_and_out; + } + + while (fgets(buf, sizeof(buf), config_file)) { + int i; + int length; + for (i = 0; buf[i] && isspace(buf[i]); i++) + ; /* do nothing */ + if (buf[i] == '[') { + /* it's a section */ + if (section_name_match (&buf[i+1], old_name)) { + ret++; + if (new_name == NULL) { + remove = 1; + continue; + } + store.baselen = strlen(new_name); + if (!store_write_section(out_fd, new_name)) { + goto out; + } + continue; + } + remove = 0; + } + if (remove) + continue; + length = strlen(buf); + if (write_in_full(out_fd, buf, length) != length) { + goto out; + } + } + fclose(config_file); + unlock_and_out: + out: + free(config_filename); + return ret; +} + +/* + * Call this to report error for your variable that should not + * get a boolean value (i.e. "[my] var" means "true"). + */ +int config_error_nonbool(const char *var) +{ + return error("Missing value for '%s'", var); +} diff --git a/Documentation/perf_counter/ctype.c b/Documentation/perf_counter/ctype.c new file mode 100644 index 0000000..b90ec00 --- /dev/null +++ b/Documentation/perf_counter/ctype.c @@ -0,0 +1,26 @@ +/* + * Sane locale-independent, ASCII ctype. + * + * No surprises, and works with signed and unsigned chars. + */ +#include "cache.h" + +enum { + S = GIT_SPACE, + A = GIT_ALPHA, + D = GIT_DIGIT, + G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */ + R = GIT_REGEX_SPECIAL, /* $, (, ), +, ., ^, {, | * */ +}; + +unsigned char sane_ctype[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0, /* 0.. 15 */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16.. 31 */ + S, 0, 0, 0, R, 0, 0, 0, R, R, G, R, 0, 0, R, 0, /* 32.. 47 */ + D, D, D, D, D, D, D, D, D, D, 0, 0, 0, 0, 0, G, /* 48.. 63 */ + 0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */ + A, A, A, A, A, A, A, A, A, A, A, G, G, 0, R, 0, /* 80.. 95 */ + 0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */ + A, A, A, A, A, A, A, A, A, A, A, R, R, 0, 0, 0, /* 112..127 */ + /* Nothing in the 128.. range */ +}; diff --git a/Documentation/perf_counter/exec_cmd.c b/Documentation/perf_counter/exec_cmd.c new file mode 100644 index 0000000..d392922 --- /dev/null +++ b/Documentation/perf_counter/exec_cmd.c @@ -0,0 +1,165 @@ +#include "cache.h" +#include "exec_cmd.h" +#include "quote.h" +#define MAX_ARGS 32 + +extern char **environ; +static const char *argv_exec_path; +static const char *argv0_path; + +const char *system_path(const char *path) +{ +#ifdef RUNTIME_PREFIX + static const char *prefix; +#else + static const char *prefix = PREFIX; +#endif + struct strbuf d = STRBUF_INIT; + + if (is_absolute_path(path)) + return path; + +#ifdef RUNTIME_PREFIX + assert(argv0_path); + assert(is_absolute_path(argv0_path)); + + if (!prefix && + !(prefix = strip_path_suffix(argv0_path, PERF_EXEC_PATH)) && + !(prefix = strip_path_suffix(argv0_path, BINDIR)) && + !(prefix = strip_path_suffix(argv0_path, "perf"))) { + prefix = PREFIX; + fprintf(stderr, "RUNTIME_PREFIX requested, " + "but prefix computation failed. " + "Using static fallback '%s'.\n", prefix); + } +#endif + + strbuf_addf(&d, "%s/%s", prefix, path); + path = strbuf_detach(&d, NULL); + return path; +} + +const char *perf_extract_argv0_path(const char *argv0) +{ + const char *slash; + + if (!argv0 || !*argv0) + return NULL; + slash = argv0 + strlen(argv0); + + while (argv0 <= slash && !is_dir_sep(*slash)) + slash--; + + if (slash >= argv0) { + argv0_path = strndup(argv0, slash - argv0); + return slash + 1; + } + + return argv0; +} + +void perf_set_argv_exec_path(const char *exec_path) +{ + argv_exec_path = exec_path; + /* + * Propagate this setting to external programs. + */ + setenv(EXEC_PATH_ENVIRONMENT, exec_path, 1); +} + + +/* Returns the highest-priority, location to look for perf programs. */ +const char *perf_exec_path(void) +{ + const char *env; + + if (argv_exec_path) + return argv_exec_path; + + env = getenv(EXEC_PATH_ENVIRONMENT); + if (env && *env) { + return env; + } + + return system_path(PERF_EXEC_PATH); +} + +static void add_path(struct strbuf *out, const char *path) +{ + if (path && *path) { + if (is_absolute_path(path)) + strbuf_addstr(out, path); + else + strbuf_addstr(out, make_nonrelative_path(path)); + + strbuf_addch(out, PATH_SEP); + } +} + +void setup_path(void) +{ + const char *old_path = getenv("PATH"); + struct strbuf new_path = STRBUF_INIT; + + add_path(&new_path, perf_exec_path()); + add_path(&new_path, argv0_path); + + if (old_path) + strbuf_addstr(&new_path, old_path); + else + strbuf_addstr(&new_path, "/usr/local/bin:/usr/bin:/bin"); + + setenv("PATH", new_path.buf, 1); + + strbuf_release(&new_path); +} + +const char **prepare_perf_cmd(const char **argv) +{ + int argc; + const char **nargv; + + for (argc = 0; argv[argc]; argc++) + ; /* just counting */ + nargv = malloc(sizeof(*nargv) * (argc + 2)); + + nargv[0] = "perf"; + for (argc = 0; argv[argc]; argc++) + nargv[argc + 1] = argv[argc]; + nargv[argc + 1] = NULL; + return nargv; +} + +int execv_perf_cmd(const char **argv) { + const char **nargv = prepare_perf_cmd(argv); + + /* execvp() can only ever return if it fails */ + execvp("perf", (char **)nargv); + + free(nargv); + return -1; +} + + +int execl_perf_cmd(const char *cmd,...) +{ + int argc; + const char *argv[MAX_ARGS + 1]; + const char *arg; + va_list param; + + va_start(param, cmd); + argv[0] = cmd; + argc = 1; + while (argc < MAX_ARGS) { + arg = argv[argc++] = va_arg(param, char *); + if (!arg) + break; + } + va_end(param); + if (MAX_ARGS <= argc) + return error("too many args to run %s", cmd); + + argv[argc] = NULL; + return execv_perf_cmd(argv); +} diff --git a/Documentation/perf_counter/exec_cmd.h b/Documentation/perf_counter/exec_cmd.h new file mode 100644 index 0000000..effe25e --- /dev/null +++ b/Documentation/perf_counter/exec_cmd.h @@ -0,0 +1,13 @@ +#ifndef PERF_EXEC_CMD_H +#define PERF_EXEC_CMD_H + +extern void perf_set_argv_exec_path(const char *exec_path); +extern const char *perf_extract_argv0_path(const char *path); +extern const char *perf_exec_path(void); +extern void setup_path(void); +extern const char **prepare_perf_cmd(const char **argv); +extern int execv_perf_cmd(const char **argv); /* NULL terminated */ +extern int execl_perf_cmd(const char *cmd, ...); +extern const char *system_path(const char *path); + +#endif /* PERF_EXEC_CMD_H */ diff --git a/Documentation/perf_counter/generate-cmdlist.sh b/Documentation/perf_counter/generate-cmdlist.sh new file mode 100755 index 0000000..75c68d9 --- /dev/null +++ b/Documentation/perf_counter/generate-cmdlist.sh @@ -0,0 +1,24 @@ +#!/bin/sh + +echo "/* Automatically generated by $0 */ +struct cmdname_help +{ + char name[16]; + char help[80]; +}; + +static struct cmdname_help common_cmds[] = {" + +sed -n -e 's/^git-\([^ ]*\)[ ].* common.*/\1/p' command-list.txt | +sort | +while read cmd +do + sed -n ' + /^NAME/,/git-'"$cmd"'/H + ${ + x + s/.*git-'"$cmd"' - \(.*\)/ {"'"$cmd"'", "\1"},/ + p + }' "Documentation/git-$cmd.txt" +done +echo "};" diff --git a/Documentation/perf_counter/help.c b/Documentation/perf_counter/help.c new file mode 100644 index 0000000..ec01167 --- /dev/null +++ b/Documentation/perf_counter/help.c @@ -0,0 +1,366 @@ +#include "cache.h" +#include "builtin.h" +#include "exec_cmd.h" +#include "levenshtein.h" +#include "help.h" + +/* most GUI terminals set COLUMNS (although some don't export it) */ +static int term_columns(void) +{ + char *col_string = getenv("COLUMNS"); + int n_cols; + + if (col_string && (n_cols = atoi(col_string)) > 0) + return n_cols; + +#ifdef TIOCGWINSZ + { + struct winsize ws; + if (!ioctl(1, TIOCGWINSZ, &ws)) { + if (ws.ws_col) + return ws.ws_col; + } + } +#endif + + return 80; +} + +void add_cmdname(struct cmdnames *cmds, const char *name, int len) +{ + struct cmdname *ent = malloc(sizeof(*ent) + len + 1); + + ent->len = len; + memcpy(ent->name, name, len); + ent->name[len] = 0; + + ALLOC_GROW(cmds->names, cmds->cnt + 1, cmds->alloc); + cmds->names[cmds->cnt++] = ent; +} + +static void clean_cmdnames(struct cmdnames *cmds) +{ + int i; + for (i = 0; i < cmds->cnt; ++i) + free(cmds->names[i]); + free(cmds->names); + cmds->cnt = 0; + cmds->alloc = 0; +} + +static int cmdname_compare(const void *a_, const void *b_) +{ + struct cmdname *a = *(struct cmdname **)a_; + struct cmdname *b = *(struct cmdname **)b_; + return strcmp(a->name, b->name); +} + +static void uniq(struct cmdnames *cmds) +{ + int i, j; + + if (!cmds->cnt) + return; + + for (i = j = 1; i < cmds->cnt; i++) + if (strcmp(cmds->names[i]->name, cmds->names[i-1]->name)) + cmds->names[j++] = cmds->names[i]; + + cmds->cnt = j; +} + +void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes) +{ + int ci, cj, ei; + int cmp; + + ci = cj = ei = 0; + while (ci < cmds->cnt && ei < excludes->cnt) { + cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name); + if (cmp < 0) + cmds->names[cj++] = cmds->names[ci++]; + else if (cmp == 0) + ci++, ei++; + else if (cmp > 0) + ei++; + } + + while (ci < cmds->cnt) + cmds->names[cj++] = cmds->names[ci++]; + + cmds->cnt = cj; +} + +static void pretty_print_string_list(struct cmdnames *cmds, int longest) +{ + int cols = 1, rows; + int space = longest + 1; /* min 1 SP between words */ + int max_cols = term_columns() - 1; /* don't print *on* the edge */ + int i, j; + + if (space < max_cols) + cols = max_cols / space; + rows = (cmds->cnt + cols - 1) / cols; + + for (i = 0; i < rows; i++) { + printf(" "); + + for (j = 0; j < cols; j++) { + int n = j * rows + i; + int size = space; + if (n >= cmds->cnt) + break; + if (j == cols-1 || n + rows >= cmds->cnt) + size = 1; + printf("%-*s", size, cmds->names[n]->name); + } + putchar('\n'); + } +} + +static int is_executable(const char *name) +{ + struct stat st; + + if (stat(name, &st) || /* stat, not lstat */ + !S_ISREG(st.st_mode)) + return 0; + +#ifdef __MINGW32__ + /* cannot trust the executable bit, peek into the file instead */ + char buf[3] = { 0 }; + int n; + int fd = open(name, O_RDONLY); + st.st_mode &= ~S_IXUSR; + if (fd >= 0) { + n = read(fd, buf, 2); + if (n == 2) + /* DOS executables start with "MZ" */ + if (!strcmp(buf, "#!") || !strcmp(buf, "MZ")) + st.st_mode |= S_IXUSR; + close(fd); + } +#endif + return st.st_mode & S_IXUSR; +} + +static void list_commands_in_dir(struct cmdnames *cmds, + const char *path, + const char *prefix) +{ + int prefix_len; + DIR *dir = opendir(path); + struct dirent *de; + struct strbuf buf = STRBUF_INIT; + int len; + + if (!dir) + return; + if (!prefix) + prefix = "perf-"; + prefix_len = strlen(prefix); + + strbuf_addf(&buf, "%s/", path); + len = buf.len; + + while ((de = readdir(dir)) != NULL) { + int entlen; + + if (prefixcmp(de->d_name, prefix)) + continue; + + strbuf_setlen(&buf, len); + strbuf_addstr(&buf, de->d_name); + if (!is_executable(buf.buf)) + continue; + + entlen = strlen(de->d_name) - prefix_len; + if (has_extension(de->d_name, ".exe")) + entlen -= 4; + + add_cmdname(cmds, de->d_name + prefix_len, entlen); + } + closedir(dir); + strbuf_release(&buf); +} + +void load_command_list(const char *prefix, + struct cmdnames *main_cmds, + struct cmdnames *other_cmds) +{ + const char *env_path = getenv("PATH"); + const char *exec_path = perf_exec_path(); + + if (exec_path) { + list_commands_in_dir(main_cmds, exec_path, prefix); + qsort(main_cmds->names, main_cmds->cnt, + sizeof(*main_cmds->names), cmdname_compare); + uniq(main_cmds); + } + + if (env_path) { + char *paths, *path, *colon; + path = paths = strdup(env_path); + while (1) { + if ((colon = strchr(path, PATH_SEP))) + *colon = 0; + if (!exec_path || strcmp(path, exec_path)) + list_commands_in_dir(other_cmds, path, prefix); + + if (!colon) + break; + path = colon + 1; + } + free(paths); + + qsort(other_cmds->names, other_cmds->cnt, + sizeof(*other_cmds->names), cmdname_compare); + uniq(other_cmds); + } + exclude_cmds(other_cmds, main_cmds); +} + +void list_commands(const char *title, struct cmdnames *main_cmds, + struct cmdnames *other_cmds) +{ + int i, longest = 0; + + for (i = 0; i < main_cmds->cnt; i++) + if (longest < main_cmds->names[i]->len) + longest = main_cmds->names[i]->len; + for (i = 0; i < other_cmds->cnt; i++) + if (longest < other_cmds->names[i]->len) + longest = other_cmds->names[i]->len; + + if (main_cmds->cnt) { + const char *exec_path = perf_exec_path(); + printf("available %s in '%s'\n", title, exec_path); + printf("----------------"); + mput_char('-', strlen(title) + strlen(exec_path)); + putchar('\n'); + pretty_print_string_list(main_cmds, longest); + putchar('\n'); + } + + if (other_cmds->cnt) { + printf("%s available from elsewhere on your $PATH\n", title); + printf("---------------------------------------"); + mput_char('-', strlen(title)); + putchar('\n'); + pretty_print_string_list(other_cmds, longest); + putchar('\n'); + } +} + +int is_in_cmdlist(struct cmdnames *c, const char *s) +{ + int i; + for (i = 0; i < c->cnt; i++) + if (!strcmp(s, c->names[i]->name)) + return 1; + return 0; +} + +static int autocorrect; +static struct cmdnames aliases; + +static int perf_unknown_cmd_config(const char *var, const char *value, void *cb) +{ + if (!strcmp(var, "help.autocorrect")) + autocorrect = perf_config_int(var,value); + /* Also use aliases for command lookup */ + if (!prefixcmp(var, "alias.")) + add_cmdname(&aliases, var + 6, strlen(var + 6)); + + return perf_default_config(var, value, cb); +} + +static int levenshtein_compare(const void *p1, const void *p2) +{ + const struct cmdname *const *c1 = p1, *const *c2 = p2; + const char *s1 = (*c1)->name, *s2 = (*c2)->name; + int l1 = (*c1)->len; + int l2 = (*c2)->len; + return l1 != l2 ? l1 - l2 : strcmp(s1, s2); +} + +static void add_cmd_list(struct cmdnames *cmds, struct cmdnames *old) +{ + int i; + ALLOC_GROW(cmds->names, cmds->cnt + old->cnt, cmds->alloc); + + for (i = 0; i < old->cnt; i++) + cmds->names[cmds->cnt++] = old->names[i]; + free(old->names); + old->cnt = 0; + old->names = NULL; +} + +const char *help_unknown_cmd(const char *cmd) +{ + int i, n, best_similarity = 0; + struct cmdnames main_cmds, other_cmds; + + memset(&main_cmds, 0, sizeof(main_cmds)); + memset(&other_cmds, 0, sizeof(main_cmds)); + memset(&aliases, 0, sizeof(aliases)); + + perf_config(perf_unknown_cmd_config, NULL); + + load_command_list("perf-", &main_cmds, &other_cmds); + + add_cmd_list(&main_cmds, &aliases); + add_cmd_list(&main_cmds, &other_cmds); + qsort(main_cmds.names, main_cmds.cnt, + sizeof(main_cmds.names), cmdname_compare); + uniq(&main_cmds); + + /* This reuses cmdname->len for similarity index */ + for (i = 0; i < main_cmds.cnt; ++i) + main_cmds.names[i]->len = + levenshtein(cmd, main_cmds.names[i]->name, 0, 2, 1, 4); + + qsort(main_cmds.names, main_cmds.cnt, + sizeof(*main_cmds.names), levenshtein_compare); + + if (!main_cmds.cnt) + die ("Uh oh. Your system reports no Git commands at all."); + + best_similarity = main_cmds.names[0]->len; + n = 1; + while (n < main_cmds.cnt && best_similarity == main_cmds.names[n]->len) + ++n; + if (autocorrect && n == 1) { + const char *assumed = main_cmds.names[0]->name; + main_cmds.names[0] = NULL; + clean_cmdnames(&main_cmds); + fprintf(stderr, "WARNING: You called a Git program named '%s', " + "which does not exist.\n" + "Continuing under the assumption that you meant '%s'\n", + cmd, assumed); + if (autocorrect > 0) { + fprintf(stderr, "in %0.1f seconds automatically...\n", + (float)autocorrect/10.0); + poll(NULL, 0, autocorrect * 100); + } + return assumed; + } + + fprintf(stderr, "perf: '%s' is not a perf-command. See 'perf --help'.\n", cmd); + + if (best_similarity < 6) { + fprintf(stderr, "\nDid you mean %s?\n", + n < 2 ? "this": "one of these"); + + for (i = 0; i < n; i++) + fprintf(stderr, "\t%s\n", main_cmds.names[i]->name); + } + + exit(1); +} + +int cmd_version(int argc, const char **argv, const char *prefix) +{ + printf("perf version %s\n", perf_version_string); + return 0; +} diff --git a/Documentation/perf_counter/help.h b/Documentation/perf_counter/help.h new file mode 100644 index 0000000..56bc154 --- /dev/null +++ b/Documentation/perf_counter/help.h @@ -0,0 +1,29 @@ +#ifndef HELP_H +#define HELP_H + +struct cmdnames { + int alloc; + int cnt; + struct cmdname { + size_t len; /* also used for similarity index in help.c */ + char name[FLEX_ARRAY]; + } **names; +}; + +static inline void mput_char(char c, unsigned int num) +{ + while(num--) + putchar(c); +} + +void load_command_list(const char *prefix, + struct cmdnames *main_cmds, + struct cmdnames *other_cmds); +void add_cmdname(struct cmdnames *cmds, const char *name, int len); +/* Here we require that excludes is a sorted list. */ +void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes); +int is_in_cmdlist(struct cmdnames *c, const char *s); +void list_commands(const char *title, struct cmdnames *main_cmds, + struct cmdnames *other_cmds); + +#endif /* HELP_H */ diff --git a/Documentation/perf_counter/levenshtein.c b/Documentation/perf_counter/levenshtein.c new file mode 100644 index 0000000..e521d15 --- /dev/null +++ b/Documentation/perf_counter/levenshtein.c @@ -0,0 +1,84 @@ +#include "cache.h" +#include "levenshtein.h" + +/* + * This function implements the Damerau-Levenshtein algorithm to + * calculate a distance between strings. + * + * Basically, it says how many letters need to be swapped, substituted, + * deleted from, or added to string1, at least, to get string2. + * + * The idea is to build a distance matrix for the substrings of both + * strings. To avoid a large space complexity, only the last three rows + * are kept in memory (if swaps had the same or higher cost as one deletion + * plus one insertion, only two rows would be needed). + * + * At any stage, "i + 1" denotes the length of the current substring of + * string1 that the distance is calculated for. + * + * row2 holds the current row, row1 the previous row (i.e. for the substring + * of string1 of length "i"), and row0 the row before that. + * + * In other words, at the start of the big loop, row2[j + 1] contains the + * Damerau-Levenshtein distance between the substring of string1 of length + * "i" and the substring of string2 of length "j + 1". + * + * All the big loop does is determine the partial minimum-cost paths. + * + * It does so by calculating the costs of the path ending in characters + * i (in string1) and j (in string2), respectively, given that the last + * operation is a substition, a swap, a deletion, or an insertion. + * + * This implementation allows the costs to be weighted: + * + * - w (as in "sWap") + * - s (as in "Substitution") + * - a (for insertion, AKA "Add") + * - d (as in "Deletion") + * + * Note that this algorithm calculates a distance _iff_ d == a. + */ +int levenshtein(const char *string1, const char *string2, + int w, int s, int a, int d) +{ + int len1 = strlen(string1), len2 = strlen(string2); + int *row0 = malloc(sizeof(int) * (len2 + 1)); + int *row1 = malloc(sizeof(int) * (len2 + 1)); + int *row2 = malloc(sizeof(int) * (len2 + 1)); + int i, j; + + for (j = 0; j <= len2; j++) + row1[j] = j * a; + for (i = 0; i < len1; i++) { + int *dummy; + + row2[0] = (i + 1) * d; + for (j = 0; j < len2; j++) { + /* substitution */ + row2[j + 1] = row1[j] + s * (string1[i] != string2[j]); + /* swap */ + if (i > 0 && j > 0 && string1[i - 1] == string2[j] && + string1[i] == string2[j - 1] && + row2[j + 1] > row0[j - 1] + w) + row2[j + 1] = row0[j - 1] + w; + /* deletion */ + if (row2[j + 1] > row1[j + 1] + d) + row2[j + 1] = row1[j + 1] + d; + /* insertion */ + if (row2[j + 1] > row2[j] + a) + row2[j + 1] = row2[j] + a; + } + + dummy = row0; + row0 = row1; + row1 = row2; + row2 = dummy; + } + + i = row1[len2]; + free(row0); + free(row1); + free(row2); + + return i; +} diff --git a/Documentation/perf_counter/levenshtein.h b/Documentation/perf_counter/levenshtein.h new file mode 100644 index 0000000..0173abe --- /dev/null +++ b/Documentation/perf_counter/levenshtein.h @@ -0,0 +1,8 @@ +#ifndef LEVENSHTEIN_H +#define LEVENSHTEIN_H + +int levenshtein(const char *string1, const char *string2, + int swap_penalty, int substition_penalty, + int insertion_penalty, int deletion_penalty); + +#endif diff --git a/Documentation/perf_counter/parse-options.c b/Documentation/perf_counter/parse-options.c new file mode 100644 index 0000000..7464f34 --- /dev/null +++ b/Documentation/perf_counter/parse-options.c @@ -0,0 +1,495 @@ +#include "util.h" +#include "parse-options.h" +#include "cache.h" + +#define OPT_SHORT 1 +#define OPT_UNSET 2 + +static int opterror(const struct option *opt, const char *reason, int flags) +{ + if (flags & OPT_SHORT) + return error("switch `%c' %s", opt->short_name, reason); + if (flags & OPT_UNSET) + return error("option `no-%s' %s", opt->long_name, reason); + return error("option `%s' %s", opt->long_name, reason); +} + +static int get_arg(struct parse_opt_ctx_t *p, const struct option *opt, + int flags, const char **arg) +{ + if (p->opt) { + *arg = p->opt; + p->opt = NULL; + } else if (p->argc == 1 && (opt->flags & PARSE_OPT_LASTARG_DEFAULT)) { + *arg = (const char *)opt->defval; + } else if (p->argc > 1) { + p->argc--; + *arg = *++p->argv; + } else + return opterror(opt, "requires a value", flags); + return 0; +} + +static int get_value(struct parse_opt_ctx_t *p, + const struct option *opt, int flags) +{ + const char *s, *arg; + const int unset = flags & OPT_UNSET; + + if (unset && p->opt) + return opterror(opt, "takes no value", flags); + if (unset && (opt->flags & PARSE_OPT_NONEG)) + return opterror(opt, "isn't available", flags); + + if (!(flags & OPT_SHORT) && p->opt) { + switch (opt->type) { + case OPTION_CALLBACK: + if (!(opt->flags & PARSE_OPT_NOARG)) + break; + /* FALLTHROUGH */ + case OPTION_BOOLEAN: + case OPTION_BIT: + case OPTION_SET_INT: + case OPTION_SET_PTR: + return opterror(opt, "takes no value", flags); + default: + break; + } + } + + switch (opt->type) { + case OPTION_BIT: + if (unset) + *(int *)opt->value &= ~opt->defval; + else + *(int *)opt->value |= opt->defval; + return 0; + + case OPTION_BOOLEAN: + *(int *)opt->value = unset ? 0 : *(int *)opt->value + 1; + return 0; + + case OPTION_SET_INT: + *(int *)opt->value = unset ? 0 : opt->defval; + return 0; + + case OPTION_SET_PTR: + *(void **)opt->value = unset ? NULL : (void *)opt->defval; + return 0; + + case OPTION_STRING: + if (unset) + *(const char **)opt->value = NULL; + else if (opt->flags & PARSE_OPT_OPTARG && !p->opt) + *(const char **)opt->value = (const char *)opt->defval; + else + return get_arg(p, opt, flags, (const char **)opt->value); + return 0; + + case OPTION_CALLBACK: + if (unset) + return (*opt->callback)(opt, NULL, 1) ? (-1) : 0; + if (opt->flags & PARSE_OPT_NOARG) + return (*opt->callback)(opt, NULL, 0) ? (-1) : 0; + if (opt->flags & PARSE_OPT_OPTARG && !p->opt) + return (*opt->callback)(opt, NULL, 0) ? (-1) : 0; + if (get_arg(p, opt, flags, &arg)) + return -1; + return (*opt->callback)(opt, arg, 0) ? (-1) : 0; + + case OPTION_INTEGER: + if (unset) { + *(int *)opt->value = 0; + return 0; + } + if (opt->flags & PARSE_OPT_OPTARG && !p->opt) { + *(int *)opt->value = opt->defval; + return 0; + } + if (get_arg(p, opt, flags, &arg)) + return -1; + *(int *)opt->value = strtol(arg, (char **)&s, 10); + if (*s) + return opterror(opt, "expects a numerical value", flags); + return 0; + + default: + die("should not happen, someone must be hit on the forehead"); + } +} + +static int parse_short_opt(struct parse_opt_ctx_t *p, const struct option *options) +{ + for (; options->type != OPTION_END; options++) { + if (options->short_name == *p->opt) { + p->opt = p->opt[1] ? p->opt + 1 : NULL; + return get_value(p, options, OPT_SHORT); + } + } + return -2; +} + +static int parse_long_opt(struct parse_opt_ctx_t *p, const char *arg, + const struct option *options) +{ + const char *arg_end = strchr(arg, '='); + const struct option *abbrev_option = NULL, *ambiguous_option = NULL; + int abbrev_flags = 0, ambiguous_flags = 0; + + if (!arg_end) + arg_end = arg + strlen(arg); + + for (; options->type != OPTION_END; options++) { + const char *rest; + int flags = 0; + + if (!options->long_name) + continue; + + rest = skip_prefix(arg, options->long_name); + if (options->type == OPTION_ARGUMENT) { + if (!rest) + continue; + if (*rest == '=') + return opterror(options, "takes no value", flags); + if (*rest) + continue; + p->out[p->cpidx++] = arg - 2; + return 0; + } + if (!rest) { + /* abbreviated? */ + if (!strncmp(options->long_name, arg, arg_end - arg)) { +is_abbreviated: + if (abbrev_option) { + /* + * If this is abbreviated, it is + * ambiguous. So when there is no + * exact match later, we need to + * error out. + */ + ambiguous_option = abbrev_option; + ambiguous_flags = abbrev_flags; + } + if (!(flags & OPT_UNSET) && *arg_end) + p->opt = arg_end + 1; + abbrev_option = options; + abbrev_flags = flags; + continue; + } + /* negated and abbreviated very much? */ + if (!prefixcmp("no-", arg)) { + flags |= OPT_UNSET; + goto is_abbreviated; + } + /* negated? */ + if (strncmp(arg, "no-", 3)) + continue; + flags |= OPT_UNSET; + rest = skip_prefix(arg + 3, options->long_name); + /* abbreviated and negated? */ + if (!rest && !prefixcmp(options->long_name, arg + 3)) + goto is_abbreviated; + if (!rest) + continue; + } + if (*rest) { + if (*rest != '=') + continue; + p->opt = rest + 1; + } + return get_value(p, options, flags); + } + + if (ambiguous_option) + return error("Ambiguous option: %s " + "(could be --%s%s or --%s%s)", + arg, + (ambiguous_flags & OPT_UNSET) ? "no-" : "", + ambiguous_option->long_name, + (abbrev_flags & OPT_UNSET) ? "no-" : "", + abbrev_option->long_name); + if (abbrev_option) + return get_value(p, abbrev_option, abbrev_flags); + return -2; +} + +static void check_typos(const char *arg, const struct option *options) +{ + if (strlen(arg) < 3) + return; + + if (!prefixcmp(arg, "no-")) { + error ("did you mean `--%s` (with two dashes ?)", arg); + exit(129); + } + + for (; options->type != OPTION_END; options++) { + if (!options->long_name) + continue; + if (!prefixcmp(options->long_name, arg)) { + error ("did you mean `--%s` (with two dashes ?)", arg); + exit(129); + } + } +} + +void parse_options_start(struct parse_opt_ctx_t *ctx, + int argc, const char **argv, int flags) +{ + memset(ctx, 0, sizeof(*ctx)); + ctx->argc = argc - 1; + ctx->argv = argv + 1; + ctx->out = argv; + ctx->cpidx = ((flags & PARSE_OPT_KEEP_ARGV0) != 0); + ctx->flags = flags; + if ((flags & PARSE_OPT_KEEP_UNKNOWN) && + (flags & PARSE_OPT_STOP_AT_NON_OPTION)) + die("STOP_AT_NON_OPTION and KEEP_UNKNOWN don't go together"); +} + +static int usage_with_options_internal(const char * const *, + const struct option *, int); + +int parse_options_step(struct parse_opt_ctx_t *ctx, + const struct option *options, + const char * const usagestr[]) +{ + int internal_help = !(ctx->flags & PARSE_OPT_NO_INTERNAL_HELP); + + /* we must reset ->opt, unknown short option leave it dangling */ + ctx->opt = NULL; + + for (; ctx->argc; ctx->argc--, ctx->argv++) { + const char *arg = ctx->argv[0]; + + if (*arg != '-' || !arg[1]) { + if (ctx->flags & PARSE_OPT_STOP_AT_NON_OPTION) + break; + ctx->out[ctx->cpidx++] = ctx->argv[0]; + continue; + } + + if (arg[1] != '-') { + ctx->opt = arg + 1; + if (internal_help && *ctx->opt == 'h') + return parse_options_usage(usagestr, options); + switch (parse_short_opt(ctx, options)) { + case -1: + return parse_options_usage(usagestr, options); + case -2: + goto unknown; + } + if (ctx->opt) + check_typos(arg + 1, options); + while (ctx->opt) { + if (internal_help && *ctx->opt == 'h') + return parse_options_usage(usagestr, options); + switch (parse_short_opt(ctx, options)) { + case -1: + return parse_options_usage(usagestr, options); + case -2: + /* fake a short option thing to hide the fact that we may have + * started to parse aggregated stuff + * + * This is leaky, too bad. + */ + ctx->argv[0] = strdup(ctx->opt - 1); + *(char *)ctx->argv[0] = '-'; + goto unknown; + } + } + continue; + } + + if (!arg[2]) { /* "--" */ + if (!(ctx->flags & PARSE_OPT_KEEP_DASHDASH)) { + ctx->argc--; + ctx->argv++; + } + break; + } + + if (internal_help && !strcmp(arg + 2, "help-all")) + return usage_with_options_internal(usagestr, options, 1); + if (internal_help && !strcmp(arg + 2, "help")) + return parse_options_usage(usagestr, options); + switch (parse_long_opt(ctx, arg + 2, options)) { + case -1: + return parse_options_usage(usagestr, options); + case -2: + goto unknown; + } + continue; +unknown: + if (!(ctx->flags & PARSE_OPT_KEEP_UNKNOWN)) + return PARSE_OPT_UNKNOWN; + ctx->out[ctx->cpidx++] = ctx->argv[0]; + ctx->opt = NULL; + } + return PARSE_OPT_DONE; +} + +int parse_options_end(struct parse_opt_ctx_t *ctx) +{ + memmove(ctx->out + ctx->cpidx, ctx->argv, ctx->argc * sizeof(*ctx->out)); + ctx->out[ctx->cpidx + ctx->argc] = NULL; + return ctx->cpidx + ctx->argc; +} + +int parse_options(int argc, const char **argv, const struct option *options, + const char * const usagestr[], int flags) +{ + struct parse_opt_ctx_t ctx; + + parse_options_start(&ctx, argc, argv, flags); + switch (parse_options_step(&ctx, options, usagestr)) { + case PARSE_OPT_HELP: + exit(129); + case PARSE_OPT_DONE: + break; + default: /* PARSE_OPT_UNKNOWN */ + if (ctx.argv[0][1] == '-') { + error("unknown option `%s'", ctx.argv[0] + 2); + } else { + error("unknown switch `%c'", *ctx.opt); + } + usage_with_options(usagestr, options); + } + + return parse_options_end(&ctx); +} + +#define USAGE_OPTS_WIDTH 24 +#define USAGE_GAP 2 + +int usage_with_options_internal(const char * const *usagestr, + const struct option *opts, int full) +{ + if (!usagestr) + return PARSE_OPT_HELP; + + fprintf(stderr, "usage: %s\n", *usagestr++); + while (*usagestr && **usagestr) + fprintf(stderr, " or: %s\n", *usagestr++); + while (*usagestr) { + fprintf(stderr, "%s%s\n", + **usagestr ? " " : "", + *usagestr); + usagestr++; + } + + if (opts->type != OPTION_GROUP) + fputc('\n', stderr); + + for (; opts->type != OPTION_END; opts++) { + size_t pos; + int pad; + + if (opts->type == OPTION_GROUP) { + fputc('\n', stderr); + if (*opts->help) + fprintf(stderr, "%s\n", opts->help); + continue; + } + if (!full && (opts->flags & PARSE_OPT_HIDDEN)) + continue; + + pos = fprintf(stderr, " "); + if (opts->short_name) + pos += fprintf(stderr, "-%c", opts->short_name); + if (opts->long_name && opts->short_name) + pos += fprintf(stderr, ", "); + if (opts->long_name) + pos += fprintf(stderr, "--%s", opts->long_name); + + switch (opts->type) { + case OPTION_ARGUMENT: + break; + case OPTION_INTEGER: + if (opts->flags & PARSE_OPT_OPTARG) + if (opts->long_name) + pos += fprintf(stderr, "[=]"); + else + pos += fprintf(stderr, "[]"); + else + pos += fprintf(stderr, " "); + break; + case OPTION_CALLBACK: + if (opts->flags & PARSE_OPT_NOARG) + break; + /* FALLTHROUGH */ + case OPTION_STRING: + if (opts->argh) { + if (opts->flags & PARSE_OPT_OPTARG) + if (opts->long_name) + pos += fprintf(stderr, "[=<%s>]", opts->argh); + else + pos += fprintf(stderr, "[<%s>]", opts->argh); + else + pos += fprintf(stderr, " <%s>", opts->argh); + } else { + if (opts->flags & PARSE_OPT_OPTARG) + if (opts->long_name) + pos += fprintf(stderr, "[=...]"); + else + pos += fprintf(stderr, "[...]"); + else + pos += fprintf(stderr, " ..."); + } + break; + default: /* OPTION_{BIT,BOOLEAN,SET_INT,SET_PTR} */ + break; + } + + if (pos <= USAGE_OPTS_WIDTH) + pad = USAGE_OPTS_WIDTH - pos; + else { + fputc('\n', stderr); + pad = USAGE_OPTS_WIDTH; + } + fprintf(stderr, "%*s%s\n", pad + USAGE_GAP, "", opts->help); + } + fputc('\n', stderr); + + return PARSE_OPT_HELP; +} + +void usage_with_options(const char * const *usagestr, + const struct option *opts) +{ + usage_with_options_internal(usagestr, opts, 0); + exit(129); +} + +int parse_options_usage(const char * const *usagestr, + const struct option *opts) +{ + return usage_with_options_internal(usagestr, opts, 0); +} + + +/*----- some often used options -----*/ +#include "cache.h" + +int parse_opt_verbosity_cb(const struct option *opt, const char *arg, + int unset) +{ + int *target = opt->value; + + if (unset) + /* --no-quiet, --no-verbose */ + *target = 0; + else if (opt->short_name == 'v') { + if (*target >= 0) + (*target)++; + else + *target = 1; + } else { + if (*target <= 0) + (*target)--; + else + *target = -1; + } + return 0; +} diff --git a/Documentation/perf_counter/parse-options.h b/Documentation/perf_counter/parse-options.h new file mode 100644 index 0000000..a81c7fa --- /dev/null +++ b/Documentation/perf_counter/parse-options.h @@ -0,0 +1,172 @@ +#ifndef PARSE_OPTIONS_H +#define PARSE_OPTIONS_H + +enum parse_opt_type { + /* special types */ + OPTION_END, + OPTION_ARGUMENT, + OPTION_GROUP, + /* options with no arguments */ + OPTION_BIT, + OPTION_BOOLEAN, /* _INCR would have been a better name */ + OPTION_SET_INT, + OPTION_SET_PTR, + /* options with arguments (usually) */ + OPTION_STRING, + OPTION_INTEGER, + OPTION_CALLBACK, +}; + +enum parse_opt_flags { + PARSE_OPT_KEEP_DASHDASH = 1, + PARSE_OPT_STOP_AT_NON_OPTION = 2, + PARSE_OPT_KEEP_ARGV0 = 4, + PARSE_OPT_KEEP_UNKNOWN = 8, + PARSE_OPT_NO_INTERNAL_HELP = 16, +}; + +enum parse_opt_option_flags { + PARSE_OPT_OPTARG = 1, + PARSE_OPT_NOARG = 2, + PARSE_OPT_NONEG = 4, + PARSE_OPT_HIDDEN = 8, + PARSE_OPT_LASTARG_DEFAULT = 16, +}; + +struct option; +typedef int parse_opt_cb(const struct option *, const char *arg, int unset); + +/* + * `type`:: + * holds the type of the option, you must have an OPTION_END last in your + * array. + * + * `short_name`:: + * the character to use as a short option name, '\0' if none. + * + * `long_name`:: + * the long option name, without the leading dashes, NULL if none. + * + * `value`:: + * stores pointers to the values to be filled. + * + * `argh`:: + * token to explain the kind of argument this option wants. Keep it + * homogenous across the repository. + * + * `help`:: + * the short help associated to what the option does. + * Must never be NULL (except for OPTION_END). + * OPTION_GROUP uses this pointer to store the group header. + * + * `flags`:: + * mask of parse_opt_option_flags. + * PARSE_OPT_OPTARG: says that the argument is optionnal (not for BOOLEANs) + * PARSE_OPT_NOARG: says that this option takes no argument, for CALLBACKs + * PARSE_OPT_NONEG: says that this option cannot be negated + * PARSE_OPT_HIDDEN this option is skipped in the default usage, showed in + * the long one. + * + * `callback`:: + * pointer to the callback to use for OPTION_CALLBACK. + * + * `defval`:: + * default value to fill (*->value) with for PARSE_OPT_OPTARG. + * OPTION_{BIT,SET_INT,SET_PTR} store the {mask,integer,pointer} to put in + * the value when met. + * CALLBACKS can use it like they want. + */ +struct option { + enum parse_opt_type type; + int short_name; + const char *long_name; + void *value; + const char *argh; + const char *help; + + int flags; + parse_opt_cb *callback; + intptr_t defval; +}; + +#define OPT_END() { OPTION_END } +#define OPT_ARGUMENT(l, h) { OPTION_ARGUMENT, 0, (l), NULL, NULL, (h) } +#define OPT_GROUP(h) { OPTION_GROUP, 0, NULL, NULL, NULL, (h) } +#define OPT_BIT(s, l, v, h, b) { OPTION_BIT, (s), (l), (v), NULL, (h), 0, NULL, (b) } +#define OPT_BOOLEAN(s, l, v, h) { OPTION_BOOLEAN, (s), (l), (v), NULL, (h) } +#define OPT_SET_INT(s, l, v, h, i) { OPTION_SET_INT, (s), (l), (v), NULL, (h), 0, NULL, (i) } +#define OPT_SET_PTR(s, l, v, h, p) { OPTION_SET_PTR, (s), (l), (v), NULL, (h), 0, NULL, (p) } +#define OPT_INTEGER(s, l, v, h) { OPTION_INTEGER, (s), (l), (v), NULL, (h) } +#define OPT_STRING(s, l, v, a, h) { OPTION_STRING, (s), (l), (v), (a), (h) } +#define OPT_DATE(s, l, v, h) \ + { OPTION_CALLBACK, (s), (l), (v), "time",(h), 0, \ + parse_opt_approxidate_cb } +#define OPT_CALLBACK(s, l, v, a, h, f) \ + { OPTION_CALLBACK, (s), (l), (v), (a), (h), 0, (f) } + +/* parse_options() will filter out the processed options and leave the + * non-option argments in argv[]. + * Returns the number of arguments left in argv[]. + */ +extern int parse_options(int argc, const char **argv, + const struct option *options, + const char * const usagestr[], int flags); + +extern NORETURN void usage_with_options(const char * const *usagestr, + const struct option *options); + +/*----- incremantal advanced APIs -----*/ + +enum { + PARSE_OPT_HELP = -1, + PARSE_OPT_DONE, + PARSE_OPT_UNKNOWN, +}; + +/* + * It's okay for the caller to consume argv/argc in the usual way. + * Other fields of that structure are private to parse-options and should not + * be modified in any way. + */ +struct parse_opt_ctx_t { + const char **argv; + const char **out; + int argc, cpidx; + const char *opt; + int flags; +}; + +extern int parse_options_usage(const char * const *usagestr, + const struct option *opts); + +extern void parse_options_start(struct parse_opt_ctx_t *ctx, + int argc, const char **argv, int flags); + +extern int parse_options_step(struct parse_opt_ctx_t *ctx, + const struct option *options, + const char * const usagestr[]); + +extern int parse_options_end(struct parse_opt_ctx_t *ctx); + + +/*----- some often used options -----*/ +extern int parse_opt_abbrev_cb(const struct option *, const char *, int); +extern int parse_opt_approxidate_cb(const struct option *, const char *, int); +extern int parse_opt_verbosity_cb(const struct option *, const char *, int); + +#define OPT__VERBOSE(var) OPT_BOOLEAN('v', "verbose", (var), "be verbose") +#define OPT__QUIET(var) OPT_BOOLEAN('q', "quiet", (var), "be quiet") +#define OPT__VERBOSITY(var) \ + { OPTION_CALLBACK, 'v', "verbose", (var), NULL, "be more verbose", \ + PARSE_OPT_NOARG, &parse_opt_verbosity_cb, 0 }, \ + { OPTION_CALLBACK, 'q', "quiet", (var), NULL, "be more quiet", \ + PARSE_OPT_NOARG, &parse_opt_verbosity_cb, 0 } +#define OPT__DRY_RUN(var) OPT_BOOLEAN('n', "dry-run", (var), "dry run") +#define OPT__ABBREV(var) \ + { OPTION_CALLBACK, 0, "abbrev", (var), "n", \ + "use digits to display SHA-1s", \ + PARSE_OPT_OPTARG, &parse_opt_abbrev_cb, 0 } + +extern const char *parse_options_fix_filename(const char *prefix, const char *file); + +#endif diff --git a/Documentation/perf_counter/path.c b/Documentation/perf_counter/path.c new file mode 100644 index 0000000..891b612 --- /dev/null +++ b/Documentation/perf_counter/path.c @@ -0,0 +1,392 @@ +/* + * I'm tired of doing "vsnprintf()" etc just to open a + * file, so here's a "return static buffer with printf" + * interface for paths. + * + * It's obviously not thread-safe. Sue me. But it's quite + * useful for doing things like + * + * f = open(mkpath("%s/%s.perf", base, name), O_RDONLY); + * + * which is what it's designed for. + */ +#include "cache.h" + +static char bad_path[] = "/bad-path/"; +/* + * Two hacks: + */ + +static char *get_perf_dir(void) +{ + return "."; +} + +size_t strlcpy(char *dest, const char *src, size_t size) +{ + size_t ret = strlen(src); + + if (size) { + size_t len = (ret >= size) ? size - 1 : ret; + memcpy(dest, src, len); + dest[len] = '\0'; + } + return ret; +} + + +static char *get_pathname(void) +{ + static char pathname_array[4][PATH_MAX]; + static int index; + return pathname_array[3 & ++index]; +} + +static char *cleanup_path(char *path) +{ + /* Clean it up */ + if (!memcmp(path, "./", 2)) { + path += 2; + while (*path == '/') + path++; + } + return path; +} + +char *mksnpath(char *buf, size_t n, const char *fmt, ...) +{ + va_list args; + unsigned len; + + va_start(args, fmt); + len = vsnprintf(buf, n, fmt, args); + va_end(args); + if (len >= n) { + strlcpy(buf, bad_path, n); + return buf; + } + return cleanup_path(buf); +} + +static char *perf_vsnpath(char *buf, size_t n, const char *fmt, va_list args) +{ + const char *perf_dir = get_perf_dir(); + size_t len; + + len = strlen(perf_dir); + if (n < len + 1) + goto bad; + memcpy(buf, perf_dir, len); + if (len && !is_dir_sep(perf_dir[len-1])) + buf[len++] = '/'; + len += vsnprintf(buf + len, n - len, fmt, args); + if (len >= n) + goto bad; + return cleanup_path(buf); +bad: + strlcpy(buf, bad_path, n); + return buf; +} + +char *perf_snpath(char *buf, size_t n, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + (void)perf_vsnpath(buf, n, fmt, args); + va_end(args); + return buf; +} + +char *perf_pathdup(const char *fmt, ...) +{ + char path[PATH_MAX]; + va_list args; + va_start(args, fmt); + (void)perf_vsnpath(path, sizeof(path), fmt, args); + va_end(args); + return xstrdup(path); +} + +char *mkpath(const char *fmt, ...) +{ + va_list args; + unsigned len; + char *pathname = get_pathname(); + + va_start(args, fmt); + len = vsnprintf(pathname, PATH_MAX, fmt, args); + va_end(args); + if (len >= PATH_MAX) + return bad_path; + return cleanup_path(pathname); +} + +char *perf_path(const char *fmt, ...) +{ + const char *perf_dir = get_perf_dir(); + char *pathname = get_pathname(); + va_list args; + unsigned len; + + len = strlen(perf_dir); + if (len > PATH_MAX-100) + return bad_path; + memcpy(pathname, perf_dir, len); + if (len && perf_dir[len-1] != '/') + pathname[len++] = '/'; + va_start(args, fmt); + len += vsnprintf(pathname + len, PATH_MAX - len, fmt, args); + va_end(args); + if (len >= PATH_MAX) + return bad_path; + return cleanup_path(pathname); +} + + +/* perf_mkstemp() - create tmp file honoring TMPDIR variable */ +int perf_mkstemp(char *path, size_t len, const char *template) +{ + const char *tmp; + size_t n; + + tmp = getenv("TMPDIR"); + if (!tmp) + tmp = "/tmp"; + n = snprintf(path, len, "%s/%s", tmp, template); + if (len <= n) { + errno = ENAMETOOLONG; + return -1; + } + return mkstemp(path); +} + + +static char *user_path(char *buf, char *path, int sz) +{ + struct passwd *pw; + char *slash; + int len, baselen; + + if (!path || path[0] != '~') + return NULL; + path++; + slash = strchr(path, '/'); + if (path[0] == '/' || !path[0]) { + pw = getpwuid(getuid()); + } + else { + if (slash) { + *slash = 0; + pw = getpwnam(path); + *slash = '/'; + } + else + pw = getpwnam(path); + } + if (!pw || !pw->pw_dir || sz <= strlen(pw->pw_dir)) + return NULL; + baselen = strlen(pw->pw_dir); + memcpy(buf, pw->pw_dir, baselen); + while ((1 < baselen) && (buf[baselen-1] == '/')) { + buf[baselen-1] = 0; + baselen--; + } + if (slash && slash[1]) { + len = strlen(slash); + if (sz <= baselen + len) + return NULL; + memcpy(buf + baselen, slash, len + 1); + } + return buf; +} + +const char *make_relative_path(const char *abs, const char *base) +{ + static char buf[PATH_MAX + 1]; + int baselen; + if (!base) + return abs; + baselen = strlen(base); + if (prefixcmp(abs, base)) + return abs; + if (abs[baselen] == '/') + baselen++; + else if (base[baselen - 1] != '/') + return abs; + strcpy(buf, abs + baselen); + return buf; +} + +/* + * It is okay if dst == src, but they should not overlap otherwise. + * + * Performs the following normalizations on src, storing the result in dst: + * - Ensures that components are separated by '/' (Windows only) + * - Squashes sequences of '/'. + * - Removes "." components. + * - Removes ".." components, and the components the precede them. + * Returns failure (non-zero) if a ".." component appears as first path + * component anytime during the normalization. Otherwise, returns success (0). + * + * Note that this function is purely textual. It does not follow symlinks, + * verify the existence of the path, or make any system calls. + */ +int normalize_path_copy(char *dst, const char *src) +{ + char *dst0; + + if (has_dos_drive_prefix(src)) { + *dst++ = *src++; + *dst++ = *src++; + } + dst0 = dst; + + if (is_dir_sep(*src)) { + *dst++ = '/'; + while (is_dir_sep(*src)) + src++; + } + + for (;;) { + char c = *src; + + /* + * A path component that begins with . could be + * special: + * (1) "." and ends -- ignore and terminate. + * (2) "./" -- ignore them, eat slash and continue. + * (3) ".." and ends -- strip one and terminate. + * (4) "../" -- strip one, eat slash and continue. + */ + if (c == '.') { + if (!src[1]) { + /* (1) */ + src++; + } else if (is_dir_sep(src[1])) { + /* (2) */ + src += 2; + while (is_dir_sep(*src)) + src++; + continue; + } else if (src[1] == '.') { + if (!src[2]) { + /* (3) */ + src += 2; + goto up_one; + } else if (is_dir_sep(src[2])) { + /* (4) */ + src += 3; + while (is_dir_sep(*src)) + src++; + goto up_one; + } + } + } + + /* copy up to the next '/', and eat all '/' */ + while ((c = *src++) != '\0' && !is_dir_sep(c)) + *dst++ = c; + if (is_dir_sep(c)) { + *dst++ = '/'; + while (is_dir_sep(c)) + c = *src++; + src--; + } else if (!c) + break; + continue; + + up_one: + /* + * dst0..dst is prefix portion, and dst[-1] is '/'; + * go up one level. + */ + dst--; /* go to trailing '/' */ + if (dst <= dst0) + return -1; + /* Windows: dst[-1] cannot be backslash anymore */ + while (dst0 < dst && dst[-1] != '/') + dst--; + } + *dst = '\0'; + return 0; +} + +/* + * path = Canonical absolute path + * prefix_list = Colon-separated list of absolute paths + * + * Determines, for each path in prefix_list, whether the "prefix" really + * is an ancestor directory of path. Returns the length of the longest + * ancestor directory, excluding any trailing slashes, or -1 if no prefix + * is an ancestor. (Note that this means 0 is returned if prefix_list is + * "/".) "/foo" is not considered an ancestor of "/foobar". Directories + * are not considered to be their own ancestors. path must be in a + * canonical form: empty components, or "." or ".." components are not + * allowed. prefix_list may be null, which is like "". + */ +int longest_ancestor_length(const char *path, const char *prefix_list) +{ + char buf[PATH_MAX+1]; + const char *ceil, *colon; + int len, max_len = -1; + + if (prefix_list == NULL || !strcmp(path, "/")) + return -1; + + for (colon = ceil = prefix_list; *colon; ceil = colon+1) { + for (colon = ceil; *colon && *colon != PATH_SEP; colon++); + len = colon - ceil; + if (len == 0 || len > PATH_MAX || !is_absolute_path(ceil)) + continue; + strlcpy(buf, ceil, len+1); + if (normalize_path_copy(buf, buf) < 0) + continue; + len = strlen(buf); + if (len > 0 && buf[len-1] == '/') + buf[--len] = '\0'; + + if (!strncmp(path, buf, len) && + path[len] == '/' && + len > max_len) { + max_len = len; + } + } + + return max_len; +} + +/* strip arbitrary amount of directory separators at end of path */ +static inline int chomp_trailing_dir_sep(const char *path, int len) +{ + while (len && is_dir_sep(path[len - 1])) + len--; + return len; +} + +/* + * If path ends with suffix (complete path components), returns the + * part before suffix (sans trailing directory separators). + * Otherwise returns NULL. + */ +char *strip_path_suffix(const char *path, const char *suffix) +{ + int path_len = strlen(path), suffix_len = strlen(suffix); + + while (suffix_len) { + if (!path_len) + return NULL; + + if (is_dir_sep(path[path_len - 1])) { + if (!is_dir_sep(suffix[suffix_len - 1])) + return NULL; + path_len = chomp_trailing_dir_sep(path, path_len); + suffix_len = chomp_trailing_dir_sep(suffix, suffix_len); + } + else if (path[--path_len] != suffix[--suffix_len]) + return NULL; + } + + if (path_len && !is_dir_sep(path[path_len - 1])) + return NULL; + return xstrndup(path, chomp_trailing_dir_sep(path, path_len)); +} diff --git a/Documentation/perf_counter/perf.c b/Documentation/perf_counter/perf.c new file mode 100644 index 0000000..9256f6a --- /dev/null +++ b/Documentation/perf_counter/perf.c @@ -0,0 +1,411 @@ +#include "builtin.h" +#include "exec_cmd.h" +#include "cache.h" +//#include "quote.h" +#include "run-command.h" + +const char perf_usage_string[] = + "perf [--version] [--exec-path[=PERF_EXEC_PATH]] [--html-path] [-p|--paginate|--no-pager] [--bare] [--perf-dir=PERF_DIR] [--work-tree=PERF_WORK_TREE] [--help] COMMAND [ARGS]"; + +const char perf_more_info_string[] = + "See 'perf help COMMAND' for more information on a specific command."; + +static int use_pager = -1; +struct pager_config { + const char *cmd; + int val; +}; + +static int pager_command_config(const char *var, const char *value, void *data) +{ + struct pager_config *c = data; + if (!prefixcmp(var, "pager.") && !strcmp(var + 6, c->cmd)) + c->val = perf_config_bool(var, value); + return 0; +} + +/* returns 0 for "no pager", 1 for "use pager", and -1 for "not specified" */ +int check_pager_config(const char *cmd) +{ + struct pager_config c; + c.cmd = cmd; + c.val = -1; + perf_config(pager_command_config, &c); + return c.val; +} + +static void commit_pager_choice(void) { + switch (use_pager) { + case 0: + setenv("PERF_PAGER", "cat", 1); + break; + case 1: + /* setup_pager(); */ + break; + default: + break; + } +} + +static int handle_options(const char*** argv, int* argc, int* envchanged) +{ + int handled = 0; + + while (*argc > 0) { + const char *cmd = (*argv)[0]; + if (cmd[0] != '-') + break; + + /* + * For legacy reasons, the "version" and "help" + * commands can be written with "--" prepended + * to make them look like flags. + */ + if (!strcmp(cmd, "--help") || !strcmp(cmd, "--version")) + break; + + /* + * Check remaining flags. + */ + if (!prefixcmp(cmd, "--exec-path")) { + cmd += 11; + if (*cmd == '=') + perf_set_argv_exec_path(cmd + 1); + else { + puts(perf_exec_path()); + exit(0); + } + } else if (!strcmp(cmd, "--html-path")) { + puts(system_path(PERF_HTML_PATH)); + exit(0); + } else if (!strcmp(cmd, "-p") || !strcmp(cmd, "--paginate")) { + use_pager = 1; + } else if (!strcmp(cmd, "--no-pager")) { + use_pager = 0; + if (envchanged) + *envchanged = 1; + } else if (!strcmp(cmd, "--perf-dir")) { + if (*argc < 2) { + fprintf(stderr, "No directory given for --perf-dir.\n" ); + usage(perf_usage_string); + } + setenv(PERF_DIR_ENVIRONMENT, (*argv)[1], 1); + if (envchanged) + *envchanged = 1; + (*argv)++; + (*argc)--; + handled++; + } else if (!prefixcmp(cmd, "--perf-dir=")) { + setenv(PERF_DIR_ENVIRONMENT, cmd + 10, 1); + if (envchanged) + *envchanged = 1; + } else if (!strcmp(cmd, "--work-tree")) { + if (*argc < 2) { + fprintf(stderr, "No directory given for --work-tree.\n" ); + usage(perf_usage_string); + } + setenv(PERF_WORK_TREE_ENVIRONMENT, (*argv)[1], 1); + if (envchanged) + *envchanged = 1; + (*argv)++; + (*argc)--; + } else if (!prefixcmp(cmd, "--work-tree=")) { + setenv(PERF_WORK_TREE_ENVIRONMENT, cmd + 12, 1); + if (envchanged) + *envchanged = 1; + } else { + fprintf(stderr, "Unknown option: %s\n", cmd); + usage(perf_usage_string); + } + + (*argv)++; + (*argc)--; + handled++; + } + return handled; +} + +static int handle_alias(int *argcp, const char ***argv) +{ + int envchanged = 0, ret = 0, saved_errno = errno; + int count, option_count; + const char** new_argv; + const char *alias_command; + char *alias_string; + int unused_nonperf; + + alias_command = (*argv)[0]; + alias_string = alias_lookup(alias_command); + if (alias_string) { + if (alias_string[0] == '!') { + if (*argcp > 1) { + struct strbuf buf; + + strbuf_init(&buf, PATH_MAX); + strbuf_addstr(&buf, alias_string); + sq_quote_argv(&buf, (*argv) + 1, PATH_MAX); + free(alias_string); + alias_string = buf.buf; + } + ret = system(alias_string + 1); + if (ret >= 0 && WIFEXITED(ret) && + WEXITSTATUS(ret) != 127) + exit(WEXITSTATUS(ret)); + die("Failed to run '%s' when expanding alias '%s'", + alias_string + 1, alias_command); + } + count = split_cmdline(alias_string, &new_argv); + if (count < 0) + die("Bad alias.%s string", alias_command); + option_count = handle_options(&new_argv, &count, &envchanged); + if (envchanged) + die("alias '%s' changes environment variables\n" + "You can use '!perf' in the alias to do this.", + alias_command); + memmove(new_argv - option_count, new_argv, + count * sizeof(char *)); + new_argv -= option_count; + + if (count < 1) + die("empty alias for %s", alias_command); + + if (!strcmp(alias_command, new_argv[0])) + die("recursive alias: %s", alias_command); + + new_argv = realloc(new_argv, sizeof(char*) * + (count + *argcp + 1)); + /* insert after command name */ + memcpy(new_argv + count, *argv + 1, sizeof(char*) * *argcp); + new_argv[count+*argcp] = NULL; + + *argv = new_argv; + *argcp += count - 1; + + ret = 1; + } + + errno = saved_errno; + + return ret; +} + +const char perf_version_string[] = PERF_VERSION; + +#define RUN_SETUP (1<<0) +#define USE_PAGER (1<<1) +/* + * require working tree to be present -- anything uses this needs + * RUN_SETUP for reading from the configuration file. + */ +#define NEED_WORK_TREE (1<<2) + +struct cmd_struct { + const char *cmd; + int (*fn)(int, const char **, const char *); + int option; +}; + +static int run_builtin(struct cmd_struct *p, int argc, const char **argv) +{ + int status; + struct stat st; + const char *prefix; + + prefix = NULL; + if (p->option & RUN_SETUP) + prefix = NULL; /* setup_perf_directory(); */ + + if (use_pager == -1 && p->option & RUN_SETUP) + use_pager = check_pager_config(p->cmd); + if (use_pager == -1 && p->option & USE_PAGER) + use_pager = 1; + commit_pager_choice(); + + if (p->option & NEED_WORK_TREE) + /* setup_work_tree() */; + + status = p->fn(argc, argv, prefix); + if (status) + return status & 0xff; + + /* Somebody closed stdout? */ + if (fstat(fileno(stdout), &st)) + return 0; + /* Ignore write errors for pipes and sockets.. */ + if (S_ISFIFO(st.st_mode) || S_ISSOCK(st.st_mode)) + return 0; + + /* Check for ENOSPC and EIO errors.. */ + if (fflush(stdout)) + die("write failure on standard output: %s", strerror(errno)); + if (ferror(stdout)) + die("unknown write failure on standard output"); + if (fclose(stdout)) + die("close failed on standard output: %s", strerror(errno)); + return 0; +} + +static void handle_internal_command(int argc, const char **argv) +{ + const char *cmd = argv[0]; + static struct cmd_struct commands[] = { + { "top", cmd_top, 0 }, + }; + int i; + static const char ext[] = STRIP_EXTENSION; + + if (sizeof(ext) > 1) { + i = strlen(argv[0]) - strlen(ext); + if (i > 0 && !strcmp(argv[0] + i, ext)) { + char *argv0 = strdup(argv[0]); + argv[0] = cmd = argv0; + argv0[i] = '\0'; + } + } + + /* Turn "perf cmd --help" into "perf help cmd" */ + if (argc > 1 && !strcmp(argv[1], "--help")) { + argv[1] = argv[0]; + argv[0] = cmd = "help"; + } + + for (i = 0; i < ARRAY_SIZE(commands); i++) { + struct cmd_struct *p = commands+i; + if (strcmp(p->cmd, cmd)) + continue; + exit(run_builtin(p, argc, argv)); + } +} + +static void execv_dashed_external(const char **argv) +{ + struct strbuf cmd = STRBUF_INIT; + const char *tmp; + int status; + + strbuf_addf(&cmd, "perf-%s", argv[0]); + + /* + * argv[0] must be the perf command, but the argv array + * belongs to the caller, and may be reused in + * subsequent loop iterations. Save argv[0] and + * restore it on error. + */ + tmp = argv[0]; + argv[0] = cmd.buf; + + /* + * if we fail because the command is not found, it is + * OK to return. Otherwise, we just pass along the status code. + */ + status = run_command_v_opt(argv, 0); + if (status != -ERR_RUN_COMMAND_EXEC) { + if (IS_RUN_COMMAND_ERR(status)) + die("unable to run '%s'", argv[0]); + exit(-status); + } + errno = ENOENT; /* as if we called execvp */ + + argv[0] = tmp; + + strbuf_release(&cmd); +} + +static int run_argv(int *argcp, const char ***argv) +{ + int done_alias = 0; + + while (1) { + /* See if it's an internal command */ + handle_internal_command(*argcp, *argv); + + /* .. then try the external ones */ + execv_dashed_external(*argv); + + /* It could be an alias -- this works around the insanity + * of overriding "perf log" with "perf show" by having + * alias.log = show + */ + if (done_alias || !handle_alias(argcp, argv)) + break; + done_alias = 1; + } + + return done_alias; +} + + +int main(int argc, const char **argv) +{ + const char *cmd; + + cmd = perf_extract_argv0_path(argv[0]); + if (!cmd) + cmd = "perf-help"; + + /* + * "perf-xxxx" is the same as "perf xxxx", but we obviously: + * + * - cannot take flags in between the "perf" and the "xxxx". + * - cannot execute it externally (since it would just do + * the same thing over again) + * + * So we just directly call the internal command handler, and + * die if that one cannot handle it. + */ + if (!prefixcmp(cmd, "perf-")) { + cmd += 4; + argv[0] = cmd; + handle_internal_command(argc, argv); + die("cannot handle %s internally", cmd); + } + + /* Look for flags.. */ + argv++; + argc--; + handle_options(&argv, &argc, NULL); + commit_pager_choice(); + if (argc > 0) { + if (!prefixcmp(argv[0], "--")) + argv[0] += 2; + } else { + /* The user didn't specify a command; give them help */ + printf("usage: %s\n\n", perf_usage_string); + list_common_cmds_help(); + printf("\n%s\n", perf_more_info_string); + exit(1); + } + cmd = argv[0]; + + /* + * We use PATH to find perf commands, but we prepend some higher + * precidence paths: the "--exec-path" option, the PERF_EXEC_PATH + * environment, and the $(perfexecdir) from the Makefile at build + * time. + */ + setup_path(); + + while (1) { + static int done_help = 0; + static int was_alias = 0; + was_alias = run_argv(&argc, &argv); + if (errno != ENOENT) + break; + if (was_alias) { + fprintf(stderr, "Expansion of alias '%s' failed; " + "'%s' is not a perf-command\n", + cmd, argv[0]); + exit(1); + } + if (!done_help) { + cmd = argv[0] = help_unknown_cmd(cmd); + done_help = 1; + } else + break; + } + + fprintf(stderr, "Failed to run command '%s': %s\n", + cmd, strerror(errno)); + + return 1; +} diff --git a/Documentation/perf_counter/quote.c b/Documentation/perf_counter/quote.c new file mode 100644 index 0000000..7a49fcf --- /dev/null +++ b/Documentation/perf_counter/quote.c @@ -0,0 +1,478 @@ +#include "cache.h" +#include "quote.h" + +int quote_path_fully = 1; + +/* Help to copy the thing properly quoted for the shell safety. + * any single quote is replaced with '\'', any exclamation point + * is replaced with '\!', and the whole thing is enclosed in a + * + * E.g. + * original sq_quote result + * name ==> name ==> 'name' + * a b ==> a b ==> 'a b' + * a'b ==> a'\''b ==> 'a'\''b' + * a!b ==> a'\!'b ==> 'a'\!'b' + */ +static inline int need_bs_quote(char c) +{ + return (c == '\'' || c == '!'); +} + +void sq_quote_buf(struct strbuf *dst, const char *src) +{ + char *to_free = NULL; + + if (dst->buf == src) + to_free = strbuf_detach(dst, NULL); + + strbuf_addch(dst, '\''); + while (*src) { + size_t len = strcspn(src, "'!"); + strbuf_add(dst, src, len); + src += len; + while (need_bs_quote(*src)) { + strbuf_addstr(dst, "'\\"); + strbuf_addch(dst, *src++); + strbuf_addch(dst, '\''); + } + } + strbuf_addch(dst, '\''); + free(to_free); +} + +void sq_quote_print(FILE *stream, const char *src) +{ + char c; + + fputc('\'', stream); + while ((c = *src++)) { + if (need_bs_quote(c)) { + fputs("'\\", stream); + fputc(c, stream); + fputc('\'', stream); + } else { + fputc(c, stream); + } + } + fputc('\'', stream); +} + +void sq_quote_argv(struct strbuf *dst, const char** argv, size_t maxlen) +{ + int i; + + /* Copy into destination buffer. */ + strbuf_grow(dst, 255); + for (i = 0; argv[i]; ++i) { + strbuf_addch(dst, ' '); + sq_quote_buf(dst, argv[i]); + if (maxlen && dst->len > maxlen) + die("Too many or long arguments"); + } +} + +char *sq_dequote_step(char *arg, char **next) +{ + char *dst = arg; + char *src = arg; + char c; + + if (*src != '\'') + return NULL; + for (;;) { + c = *++src; + if (!c) + return NULL; + if (c != '\'') { + *dst++ = c; + continue; + } + /* We stepped out of sq */ + switch (*++src) { + case '\0': + *dst = 0; + if (next) + *next = NULL; + return arg; + case '\\': + c = *++src; + if (need_bs_quote(c) && *++src == '\'') { + *dst++ = c; + continue; + } + /* Fallthrough */ + default: + if (!next || !isspace(*src)) + return NULL; + do { + c = *++src; + } while (isspace(c)); + *dst = 0; + *next = src; + return arg; + } + } +} + +char *sq_dequote(char *arg) +{ + return sq_dequote_step(arg, NULL); +} + +int sq_dequote_to_argv(char *arg, const char ***argv, int *nr, int *alloc) +{ + char *next = arg; + + if (!*arg) + return 0; + do { + char *dequoted = sq_dequote_step(next, &next); + if (!dequoted) + return -1; + ALLOC_GROW(*argv, *nr + 1, *alloc); + (*argv)[(*nr)++] = dequoted; + } while (next); + + return 0; +} + +/* 1 means: quote as octal + * 0 means: quote as octal if (quote_path_fully) + * -1 means: never quote + * c: quote as "\\c" + */ +#define X8(x) x, x, x, x, x, x, x, x +#define X16(x) X8(x), X8(x) +static signed char const sq_lookup[256] = { + /* 0 1 2 3 4 5 6 7 */ + /* 0x00 */ 1, 1, 1, 1, 1, 1, 1, 'a', + /* 0x08 */ 'b', 't', 'n', 'v', 'f', 'r', 1, 1, + /* 0x10 */ X16(1), + /* 0x20 */ -1, -1, '"', -1, -1, -1, -1, -1, + /* 0x28 */ X16(-1), X16(-1), X16(-1), + /* 0x58 */ -1, -1, -1, -1,'\\', -1, -1, -1, + /* 0x60 */ X16(-1), X8(-1), + /* 0x78 */ -1, -1, -1, -1, -1, -1, -1, 1, + /* 0x80 */ /* set to 0 */ +}; + +static inline int sq_must_quote(char c) +{ + return sq_lookup[(unsigned char)c] + quote_path_fully > 0; +} + +/* returns the longest prefix not needing a quote up to maxlen if positive. + This stops at the first \0 because it's marked as a character needing an + escape */ +static size_t next_quote_pos(const char *s, ssize_t maxlen) +{ + size_t len; + if (maxlen < 0) { + for (len = 0; !sq_must_quote(s[len]); len++); + } else { + for (len = 0; len < maxlen && !sq_must_quote(s[len]); len++); + } + return len; +} + +/* + * C-style name quoting. + * + * (1) if sb and fp are both NULL, inspect the input name and counts the + * number of bytes that are needed to hold c_style quoted version of name, + * counting the double quotes around it but not terminating NUL, and + * returns it. + * However, if name does not need c_style quoting, it returns 0. + * + * (2) if sb or fp are not NULL, it emits the c_style quoted version + * of name, enclosed with double quotes if asked and needed only. + * Return value is the same as in (1). + */ +static size_t quote_c_style_counted(const char *name, ssize_t maxlen, + struct strbuf *sb, FILE *fp, int no_dq) +{ +#undef EMIT +#define EMIT(c) \ + do { \ + if (sb) strbuf_addch(sb, (c)); \ + if (fp) fputc((c), fp); \ + count++; \ + } while (0) +#define EMITBUF(s, l) \ + do { \ + if (sb) strbuf_add(sb, (s), (l)); \ + if (fp) fwrite((s), (l), 1, fp); \ + count += (l); \ + } while (0) + + size_t len, count = 0; + const char *p = name; + + for (;;) { + int ch; + + len = next_quote_pos(p, maxlen); + if (len == maxlen || !p[len]) + break; + + if (!no_dq && p == name) + EMIT('"'); + + EMITBUF(p, len); + EMIT('\\'); + p += len; + ch = (unsigned char)*p++; + if (sq_lookup[ch] >= ' ') { + EMIT(sq_lookup[ch]); + } else { + EMIT(((ch >> 6) & 03) + '0'); + EMIT(((ch >> 3) & 07) + '0'); + EMIT(((ch >> 0) & 07) + '0'); + } + } + + EMITBUF(p, len); + if (p == name) /* no ending quote needed */ + return 0; + + if (!no_dq) + EMIT('"'); + return count; +} + +size_t quote_c_style(const char *name, struct strbuf *sb, FILE *fp, int nodq) +{ + return quote_c_style_counted(name, -1, sb, fp, nodq); +} + +void quote_two_c_style(struct strbuf *sb, const char *prefix, const char *path, int nodq) +{ + if (quote_c_style(prefix, NULL, NULL, 0) || + quote_c_style(path, NULL, NULL, 0)) { + if (!nodq) + strbuf_addch(sb, '"'); + quote_c_style(prefix, sb, NULL, 1); + quote_c_style(path, sb, NULL, 1); + if (!nodq) + strbuf_addch(sb, '"'); + } else { + strbuf_addstr(sb, prefix); + strbuf_addstr(sb, path); + } +} + +void write_name_quoted(const char *name, FILE *fp, int terminator) +{ + if (terminator) { + quote_c_style(name, NULL, fp, 0); + } else { + fputs(name, fp); + } + fputc(terminator, fp); +} + +extern void write_name_quotedpfx(const char *pfx, size_t pfxlen, + const char *name, FILE *fp, int terminator) +{ + int needquote = 0; + + if (terminator) { + needquote = next_quote_pos(pfx, pfxlen) < pfxlen + || name[next_quote_pos(name, -1)]; + } + if (needquote) { + fputc('"', fp); + quote_c_style_counted(pfx, pfxlen, NULL, fp, 1); + quote_c_style(name, NULL, fp, 1); + fputc('"', fp); + } else { + fwrite(pfx, pfxlen, 1, fp); + fputs(name, fp); + } + fputc(terminator, fp); +} + +/* quote path as relative to the given prefix */ +char *quote_path_relative(const char *in, int len, + struct strbuf *out, const char *prefix) +{ + int needquote; + + if (len < 0) + len = strlen(in); + + /* "../" prefix itself does not need quoting, but "in" might. */ + needquote = next_quote_pos(in, len) < len; + strbuf_setlen(out, 0); + strbuf_grow(out, len); + + if (needquote) + strbuf_addch(out, '"'); + if (prefix) { + int off = 0; + while (prefix[off] && off < len && prefix[off] == in[off]) + if (prefix[off] == '/') { + prefix += off + 1; + in += off + 1; + len -= off + 1; + off = 0; + } else + off++; + + for (; *prefix; prefix++) + if (*prefix == '/') + strbuf_addstr(out, "../"); + } + + quote_c_style_counted (in, len, out, NULL, 1); + + if (needquote) + strbuf_addch(out, '"'); + if (!out->len) + strbuf_addstr(out, "./"); + + return out->buf; +} + +/* + * C-style name unquoting. + * + * Quoted should point at the opening double quote. + * + Returns 0 if it was able to unquote the string properly, and appends the + * result in the strbuf `sb'. + * + Returns -1 in case of error, and doesn't touch the strbuf. Though note + * that this function will allocate memory in the strbuf, so calling + * strbuf_release is mandatory whichever result unquote_c_style returns. + * + * Updates endp pointer to point at one past the ending double quote if given. + */ +int unquote_c_style(struct strbuf *sb, const char *quoted, const char **endp) +{ + size_t oldlen = sb->len, len; + int ch, ac; + + if (*quoted++ != '"') + return -1; + + for (;;) { + len = strcspn(quoted, "\"\\"); + strbuf_add(sb, quoted, len); + quoted += len; + + switch (*quoted++) { + case '"': + if (endp) + *endp = quoted; + return 0; + case '\\': + break; + default: + goto error; + } + + switch ((ch = *quoted++)) { + case 'a': ch = '\a'; break; + case 'b': ch = '\b'; break; + case 'f': ch = '\f'; break; + case 'n': ch = '\n'; break; + case 'r': ch = '\r'; break; + case 't': ch = '\t'; break; + case 'v': ch = '\v'; break; + + case '\\': case '"': + break; /* verbatim */ + + /* octal values with first digit over 4 overflow */ + case '0': case '1': case '2': case '3': + ac = ((ch - '0') << 6); + if ((ch = *quoted++) < '0' || '7' < ch) + goto error; + ac |= ((ch - '0') << 3); + if ((ch = *quoted++) < '0' || '7' < ch) + goto error; + ac |= (ch - '0'); + ch = ac; + break; + default: + goto error; + } + strbuf_addch(sb, ch); + } + + error: + strbuf_setlen(sb, oldlen); + return -1; +} + +/* quoting as a string literal for other languages */ + +void perl_quote_print(FILE *stream, const char *src) +{ + const char sq = '\''; + const char bq = '\\'; + char c; + + fputc(sq, stream); + while ((c = *src++)) { + if (c == sq || c == bq) + fputc(bq, stream); + fputc(c, stream); + } + fputc(sq, stream); +} + +void python_quote_print(FILE *stream, const char *src) +{ + const char sq = '\''; + const char bq = '\\'; + const char nl = '\n'; + char c; + + fputc(sq, stream); + while ((c = *src++)) { + if (c == nl) { + fputc(bq, stream); + fputc('n', stream); + continue; + } + if (c == sq || c == bq) + fputc(bq, stream); + fputc(c, stream); + } + fputc(sq, stream); +} + +void tcl_quote_print(FILE *stream, const char *src) +{ + char c; + + fputc('"', stream); + while ((c = *src++)) { + switch (c) { + case '[': case ']': + case '{': case '}': + case '$': case '\\': case '"': + fputc('\\', stream); + default: + fputc(c, stream); + break; + case '\f': + fputs("\\f", stream); + break; + case '\r': + fputs("\\r", stream); + break; + case '\n': + fputs("\\n", stream); + break; + case '\t': + fputs("\\t", stream); + break; + case '\v': + fputs("\\v", stream); + break; + } + } + fputc('"', stream); +} diff --git a/Documentation/perf_counter/quote.h b/Documentation/perf_counter/quote.h new file mode 100644 index 0000000..66730f2 --- /dev/null +++ b/Documentation/perf_counter/quote.h @@ -0,0 +1,68 @@ +#ifndef QUOTE_H +#define QUOTE_H + +#include +#include + +/* Help to copy the thing properly quoted for the shell safety. + * any single quote is replaced with '\'', any exclamation point + * is replaced with '\!', and the whole thing is enclosed in a + * single quote pair. + * + * For example, if you are passing the result to system() as an + * argument: + * + * sprintf(cmd, "foobar %s %s", sq_quote(arg0), sq_quote(arg1)) + * + * would be appropriate. If the system() is going to call ssh to + * run the command on the other side: + * + * sprintf(cmd, "git-diff-tree %s %s", sq_quote(arg0), sq_quote(arg1)); + * sprintf(rcmd, "ssh %s %s", sq_quote(host), sq_quote(cmd)); + * + * Note that the above examples leak memory! Remember to free result from + * sq_quote() in a real application. + * + * sq_quote_buf() writes to an existing buffer of specified size; it + * will return the number of characters that would have been written + * excluding the final null regardless of the buffer size. + */ + +extern void sq_quote_print(FILE *stream, const char *src); + +extern void sq_quote_buf(struct strbuf *, const char *src); +extern void sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen); + +/* This unwraps what sq_quote() produces in place, but returns + * NULL if the input does not look like what sq_quote would have + * produced. + */ +extern char *sq_dequote(char *); + +/* + * Same as the above, but can be used to unwrap many arguments in the + * same string separated by space. "next" is changed to point to the + * next argument that should be passed as first parameter. When there + * is no more argument to be dequoted, "next" is updated to point to NULL. + */ +extern char *sq_dequote_step(char *arg, char **next); +extern int sq_dequote_to_argv(char *arg, const char ***argv, int *nr, int *alloc); + +extern int unquote_c_style(struct strbuf *, const char *quoted, const char **endp); +extern size_t quote_c_style(const char *name, struct strbuf *, FILE *, int no_dq); +extern void quote_two_c_style(struct strbuf *, const char *, const char *, int); + +extern void write_name_quoted(const char *name, FILE *, int terminator); +extern void write_name_quotedpfx(const char *pfx, size_t pfxlen, + const char *name, FILE *, int terminator); + +/* quote path as relative to the given prefix */ +char *quote_path_relative(const char *in, int len, + struct strbuf *out, const char *prefix); + +/* quoting as a string literal for other languages */ +extern void perl_quote_print(FILE *stream, const char *src); +extern void python_quote_print(FILE *stream, const char *src); +extern void tcl_quote_print(FILE *stream, const char *src); + +#endif diff --git a/Documentation/perf_counter/run-command.c b/Documentation/perf_counter/run-command.c new file mode 100644 index 0000000..b2f5e85 --- /dev/null +++ b/Documentation/perf_counter/run-command.c @@ -0,0 +1,395 @@ +#include "cache.h" +#include "run-command.h" +#include "exec_cmd.h" + +static inline void close_pair(int fd[2]) +{ + close(fd[0]); + close(fd[1]); +} + +static inline void dup_devnull(int to) +{ + int fd = open("/dev/null", O_RDWR); + dup2(fd, to); + close(fd); +} + +int start_command(struct child_process *cmd) +{ + int need_in, need_out, need_err; + int fdin[2], fdout[2], fderr[2]; + + /* + * In case of errors we must keep the promise to close FDs + * that have been passed in via ->in and ->out. + */ + + need_in = !cmd->no_stdin && cmd->in < 0; + if (need_in) { + if (pipe(fdin) < 0) { + if (cmd->out > 0) + close(cmd->out); + return -ERR_RUN_COMMAND_PIPE; + } + cmd->in = fdin[1]; + } + + need_out = !cmd->no_stdout + && !cmd->stdout_to_stderr + && cmd->out < 0; + if (need_out) { + if (pipe(fdout) < 0) { + if (need_in) + close_pair(fdin); + else if (cmd->in) + close(cmd->in); + return -ERR_RUN_COMMAND_PIPE; + } + cmd->out = fdout[0]; + } + + need_err = !cmd->no_stderr && cmd->err < 0; + if (need_err) { + if (pipe(fderr) < 0) { + if (need_in) + close_pair(fdin); + else if (cmd->in) + close(cmd->in); + if (need_out) + close_pair(fdout); + else if (cmd->out) + close(cmd->out); + return -ERR_RUN_COMMAND_PIPE; + } + cmd->err = fderr[0]; + } + +#ifndef __MINGW32__ + fflush(NULL); + cmd->pid = fork(); + if (!cmd->pid) { + if (cmd->no_stdin) + dup_devnull(0); + else if (need_in) { + dup2(fdin[0], 0); + close_pair(fdin); + } else if (cmd->in) { + dup2(cmd->in, 0); + close(cmd->in); + } + + if (cmd->no_stderr) + dup_devnull(2); + else if (need_err) { + dup2(fderr[1], 2); + close_pair(fderr); + } + + if (cmd->no_stdout) + dup_devnull(1); + else if (cmd->stdout_to_stderr) + dup2(2, 1); + else if (need_out) { + dup2(fdout[1], 1); + close_pair(fdout); + } else if (cmd->out > 1) { + dup2(cmd->out, 1); + close(cmd->out); + } + + if (cmd->dir && chdir(cmd->dir)) + die("exec %s: cd to %s failed (%s)", cmd->argv[0], + cmd->dir, strerror(errno)); + if (cmd->env) { + for (; *cmd->env; cmd->env++) { + if (strchr(*cmd->env, '=')) + putenv((char*)*cmd->env); + else + unsetenv(*cmd->env); + } + } + if (cmd->preexec_cb) + cmd->preexec_cb(); + if (cmd->perf_cmd) { + execv_perf_cmd(cmd->argv); + } else { + execvp(cmd->argv[0], (char *const*) cmd->argv); + } + exit(127); + } +#else + int s0 = -1, s1 = -1, s2 = -1; /* backups of stdin, stdout, stderr */ + const char **sargv = cmd->argv; + char **env = environ; + + if (cmd->no_stdin) { + s0 = dup(0); + dup_devnull(0); + } else if (need_in) { + s0 = dup(0); + dup2(fdin[0], 0); + } else if (cmd->in) { + s0 = dup(0); + dup2(cmd->in, 0); + } + + if (cmd->no_stderr) { + s2 = dup(2); + dup_devnull(2); + } else if (need_err) { + s2 = dup(2); + dup2(fderr[1], 2); + } + + if (cmd->no_stdout) { + s1 = dup(1); + dup_devnull(1); + } else if (cmd->stdout_to_stderr) { + s1 = dup(1); + dup2(2, 1); + } else if (need_out) { + s1 = dup(1); + dup2(fdout[1], 1); + } else if (cmd->out > 1) { + s1 = dup(1); + dup2(cmd->out, 1); + } + + if (cmd->dir) + die("chdir in start_command() not implemented"); + if (cmd->env) { + env = copy_environ(); + for (; *cmd->env; cmd->env++) + env = env_setenv(env, *cmd->env); + } + + if (cmd->perf_cmd) { + cmd->argv = prepare_perf_cmd(cmd->argv); + } + + cmd->pid = mingw_spawnvpe(cmd->argv[0], cmd->argv, env); + + if (cmd->env) + free_environ(env); + if (cmd->perf_cmd) + free(cmd->argv); + + cmd->argv = sargv; + if (s0 >= 0) + dup2(s0, 0), close(s0); + if (s1 >= 0) + dup2(s1, 1), close(s1); + if (s2 >= 0) + dup2(s2, 2), close(s2); +#endif + + if (cmd->pid < 0) { + int err = errno; + if (need_in) + close_pair(fdin); + else if (cmd->in) + close(cmd->in); + if (need_out) + close_pair(fdout); + else if (cmd->out) + close(cmd->out); + if (need_err) + close_pair(fderr); + return err == ENOENT ? + -ERR_RUN_COMMAND_EXEC : + -ERR_RUN_COMMAND_FORK; + } + + if (need_in) + close(fdin[0]); + else if (cmd->in) + close(cmd->in); + + if (need_out) + close(fdout[1]); + else if (cmd->out) + close(cmd->out); + + if (need_err) + close(fderr[1]); + + return 0; +} + +static int wait_or_whine(pid_t pid) +{ + for (;;) { + int status, code; + pid_t waiting = waitpid(pid, &status, 0); + + if (waiting < 0) { + if (errno == EINTR) + continue; + error("waitpid failed (%s)", strerror(errno)); + return -ERR_RUN_COMMAND_WAITPID; + } + if (waiting != pid) + return -ERR_RUN_COMMAND_WAITPID_WRONG_PID; + if (WIFSIGNALED(status)) + return -ERR_RUN_COMMAND_WAITPID_SIGNAL; + + if (!WIFEXITED(status)) + return -ERR_RUN_COMMAND_WAITPID_NOEXIT; + code = WEXITSTATUS(status); + switch (code) { + case 127: + return -ERR_RUN_COMMAND_EXEC; + case 0: + return 0; + default: + return -code; + } + } +} + +int finish_command(struct child_process *cmd) +{ + return wait_or_whine(cmd->pid); +} + +int run_command(struct child_process *cmd) +{ + int code = start_command(cmd); + if (code) + return code; + return finish_command(cmd); +} + +static void prepare_run_command_v_opt(struct child_process *cmd, + const char **argv, + int opt) +{ + memset(cmd, 0, sizeof(*cmd)); + cmd->argv = argv; + cmd->no_stdin = opt & RUN_COMMAND_NO_STDIN ? 1 : 0; + cmd->perf_cmd = opt & RUN_PERF_CMD ? 1 : 0; + cmd->stdout_to_stderr = opt & RUN_COMMAND_STDOUT_TO_STDERR ? 1 : 0; +} + +int run_command_v_opt(const char **argv, int opt) +{ + struct child_process cmd; + prepare_run_command_v_opt(&cmd, argv, opt); + return run_command(&cmd); +} + +int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const char *const *env) +{ + struct child_process cmd; + prepare_run_command_v_opt(&cmd, argv, opt); + cmd.dir = dir; + cmd.env = env; + return run_command(&cmd); +} + +#ifdef __MINGW32__ +static __stdcall unsigned run_thread(void *data) +{ + struct async *async = data; + return async->proc(async->fd_for_proc, async->data); +} +#endif + +int start_async(struct async *async) +{ + int pipe_out[2]; + + if (pipe(pipe_out) < 0) + return error("cannot create pipe: %s", strerror(errno)); + async->out = pipe_out[0]; + +#ifndef __MINGW32__ + /* Flush stdio before fork() to avoid cloning buffers */ + fflush(NULL); + + async->pid = fork(); + if (async->pid < 0) { + error("fork (async) failed: %s", strerror(errno)); + close_pair(pipe_out); + return -1; + } + if (!async->pid) { + close(pipe_out[0]); + exit(!!async->proc(pipe_out[1], async->data)); + } + close(pipe_out[1]); +#else + async->fd_for_proc = pipe_out[1]; + async->tid = (HANDLE) _beginthreadex(NULL, 0, run_thread, async, 0, NULL); + if (!async->tid) { + error("cannot create thread: %s", strerror(errno)); + close_pair(pipe_out); + return -1; + } +#endif + return 0; +} + +int finish_async(struct async *async) +{ +#ifndef __MINGW32__ + int ret = 0; + + if (wait_or_whine(async->pid)) + ret = error("waitpid (async) failed"); +#else + DWORD ret = 0; + if (WaitForSingleObject(async->tid, INFINITE) != WAIT_OBJECT_0) + ret = error("waiting for thread failed: %lu", GetLastError()); + else if (!GetExitCodeThread(async->tid, &ret)) + ret = error("cannot get thread exit code: %lu", GetLastError()); + CloseHandle(async->tid); +#endif + return ret; +} + +int run_hook(const char *index_file, const char *name, ...) +{ + struct child_process hook; + const char **argv = NULL, *env[2]; + char index[PATH_MAX]; + va_list args; + int ret; + size_t i = 0, alloc = 0; + + if (access(perf_path("hooks/%s", name), X_OK) < 0) + return 0; + + va_start(args, name); + ALLOC_GROW(argv, i + 1, alloc); + argv[i++] = perf_path("hooks/%s", name); + while (argv[i-1]) { + ALLOC_GROW(argv, i + 1, alloc); + argv[i++] = va_arg(args, const char *); + } + va_end(args); + + memset(&hook, 0, sizeof(hook)); + hook.argv = argv; + hook.no_stdin = 1; + hook.stdout_to_stderr = 1; + if (index_file) { + snprintf(index, sizeof(index), "PERF_INDEX_FILE=%s", index_file); + env[0] = index; + env[1] = NULL; + hook.env = env; + } + + ret = start_command(&hook); + free(argv); + if (ret) { + warning("Could not spawn %s", argv[0]); + return ret; + } + ret = finish_command(&hook); + if (ret == -ERR_RUN_COMMAND_WAITPID_SIGNAL) + warning("%s exited due to uncaught signal", argv[0]); + + return ret; +} diff --git a/Documentation/perf_counter/run-command.h b/Documentation/perf_counter/run-command.h new file mode 100644 index 0000000..328289f --- /dev/null +++ b/Documentation/perf_counter/run-command.h @@ -0,0 +1,93 @@ +#ifndef RUN_COMMAND_H +#define RUN_COMMAND_H + +enum { + ERR_RUN_COMMAND_FORK = 10000, + ERR_RUN_COMMAND_EXEC, + ERR_RUN_COMMAND_PIPE, + ERR_RUN_COMMAND_WAITPID, + ERR_RUN_COMMAND_WAITPID_WRONG_PID, + ERR_RUN_COMMAND_WAITPID_SIGNAL, + ERR_RUN_COMMAND_WAITPID_NOEXIT, +}; +#define IS_RUN_COMMAND_ERR(x) (-(x) >= ERR_RUN_COMMAND_FORK) + +struct child_process { + const char **argv; + pid_t pid; + /* + * Using .in, .out, .err: + * - Specify 0 for no redirections (child inherits stdin, stdout, + * stderr from parent). + * - Specify -1 to have a pipe allocated as follows: + * .in: returns the writable pipe end; parent writes to it, + * the readable pipe end becomes child's stdin + * .out, .err: returns the readable pipe end; parent reads from + * it, the writable pipe end becomes child's stdout/stderr + * The caller of start_command() must close the returned FDs + * after it has completed reading from/writing to it! + * - Specify > 0 to set a channel to a particular FD as follows: + * .in: a readable FD, becomes child's stdin + * .out: a writable FD, becomes child's stdout/stderr + * .err > 0 not supported + * The specified FD is closed by start_command(), even in case + * of errors! + */ + int in; + int out; + int err; + const char *dir; + const char *const *env; + unsigned no_stdin:1; + unsigned no_stdout:1; + unsigned no_stderr:1; + unsigned perf_cmd:1; /* if this is to be perf sub-command */ + unsigned stdout_to_stderr:1; + void (*preexec_cb)(void); +}; + +int start_command(struct child_process *); +int finish_command(struct child_process *); +int run_command(struct child_process *); + +extern int run_hook(const char *index_file, const char *name, ...); + +#define RUN_COMMAND_NO_STDIN 1 +#define RUN_PERF_CMD 2 /*If this is to be perf sub-command */ +#define RUN_COMMAND_STDOUT_TO_STDERR 4 +int run_command_v_opt(const char **argv, int opt); + +/* + * env (the environment) is to be formatted like environ: "VAR=VALUE". + * To unset an environment variable use just "VAR". + */ +int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const char *const *env); + +/* + * The purpose of the following functions is to feed a pipe by running + * a function asynchronously and providing output that the caller reads. + * + * It is expected that no synchronization and mutual exclusion between + * the caller and the feed function is necessary so that the function + * can run in a thread without interfering with the caller. + */ +struct async { + /* + * proc writes to fd and closes it; + * returns 0 on success, non-zero on failure + */ + int (*proc)(int fd, void *data); + void *data; + int out; /* caller reads from here and closes it */ +#ifndef __MINGW32__ + pid_t pid; +#else + HANDLE tid; + int fd_for_proc; +#endif +}; + +int start_async(struct async *async); +int finish_async(struct async *async); + +#endif diff --git a/Documentation/perf_counter/strbuf.c b/Documentation/perf_counter/strbuf.c new file mode 100644 index 0000000..eaba093 --- /dev/null +++ b/Documentation/perf_counter/strbuf.c @@ -0,0 +1,359 @@ +#include "cache.h" + +int prefixcmp(const char *str, const char *prefix) +{ + for (; ; str++, prefix++) + if (!*prefix) + return 0; + else if (*str != *prefix) + return (unsigned char)*prefix - (unsigned char)*str; +} + +/* + * Used as the default ->buf value, so that people can always assume + * buf is non NULL and ->buf is NUL terminated even for a freshly + * initialized strbuf. + */ +char strbuf_slopbuf[1]; + +void strbuf_init(struct strbuf *sb, size_t hint) +{ + sb->alloc = sb->len = 0; + sb->buf = strbuf_slopbuf; + if (hint) + strbuf_grow(sb, hint); +} + +void strbuf_release(struct strbuf *sb) +{ + if (sb->alloc) { + free(sb->buf); + strbuf_init(sb, 0); + } +} + +char *strbuf_detach(struct strbuf *sb, size_t *sz) +{ + char *res = sb->alloc ? sb->buf : NULL; + if (sz) + *sz = sb->len; + strbuf_init(sb, 0); + return res; +} + +void strbuf_attach(struct strbuf *sb, void *buf, size_t len, size_t alloc) +{ + strbuf_release(sb); + sb->buf = buf; + sb->len = len; + sb->alloc = alloc; + strbuf_grow(sb, 0); + sb->buf[sb->len] = '\0'; +} + +void strbuf_grow(struct strbuf *sb, size_t extra) +{ + if (sb->len + extra + 1 <= sb->len) + die("you want to use way too much memory"); + if (!sb->alloc) + sb->buf = NULL; + ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc); +} + +void strbuf_trim(struct strbuf *sb) +{ + char *b = sb->buf; + while (sb->len > 0 && isspace((unsigned char)sb->buf[sb->len - 1])) + sb->len--; + while (sb->len > 0 && isspace(*b)) { + b++; + sb->len--; + } + memmove(sb->buf, b, sb->len); + sb->buf[sb->len] = '\0'; +} +void strbuf_rtrim(struct strbuf *sb) +{ + while (sb->len > 0 && isspace((unsigned char)sb->buf[sb->len - 1])) + sb->len--; + sb->buf[sb->len] = '\0'; +} + +void strbuf_ltrim(struct strbuf *sb) +{ + char *b = sb->buf; + while (sb->len > 0 && isspace(*b)) { + b++; + sb->len--; + } + memmove(sb->buf, b, sb->len); + sb->buf[sb->len] = '\0'; +} + +void strbuf_tolower(struct strbuf *sb) +{ + int i; + for (i = 0; i < sb->len; i++) + sb->buf[i] = tolower(sb->buf[i]); +} + +struct strbuf **strbuf_split(const struct strbuf *sb, int delim) +{ + int alloc = 2, pos = 0; + char *n, *p; + struct strbuf **ret; + struct strbuf *t; + + ret = calloc(alloc, sizeof(struct strbuf *)); + p = n = sb->buf; + while (n < sb->buf + sb->len) { + int len; + n = memchr(n, delim, sb->len - (n - sb->buf)); + if (pos + 1 >= alloc) { + alloc = alloc * 2; + ret = realloc(ret, sizeof(struct strbuf *) * alloc); + } + if (!n) + n = sb->buf + sb->len - 1; + len = n - p + 1; + t = malloc(sizeof(struct strbuf)); + strbuf_init(t, len); + strbuf_add(t, p, len); + ret[pos] = t; + ret[++pos] = NULL; + p = ++n; + } + return ret; +} + +void strbuf_list_free(struct strbuf **sbs) +{ + struct strbuf **s = sbs; + + while (*s) { + strbuf_release(*s); + free(*s++); + } + free(sbs); +} + +int strbuf_cmp(const struct strbuf *a, const struct strbuf *b) +{ + int len = a->len < b->len ? a->len: b->len; + int cmp = memcmp(a->buf, b->buf, len); + if (cmp) + return cmp; + return a->len < b->len ? -1: a->len != b->len; +} + +void strbuf_splice(struct strbuf *sb, size_t pos, size_t len, + const void *data, size_t dlen) +{ + if (pos + len < pos) + die("you want to use way too much memory"); + if (pos > sb->len) + die("`pos' is too far after the end of the buffer"); + if (pos + len > sb->len) + die("`pos + len' is too far after the end of the buffer"); + + if (dlen >= len) + strbuf_grow(sb, dlen - len); + memmove(sb->buf + pos + dlen, + sb->buf + pos + len, + sb->len - pos - len); + memcpy(sb->buf + pos, data, dlen); + strbuf_setlen(sb, sb->len + dlen - len); +} + +void strbuf_insert(struct strbuf *sb, size_t pos, const void *data, size_t len) +{ + strbuf_splice(sb, pos, 0, data, len); +} + +void strbuf_remove(struct strbuf *sb, size_t pos, size_t len) +{ + strbuf_splice(sb, pos, len, NULL, 0); +} + +void strbuf_add(struct strbuf *sb, const void *data, size_t len) +{ + strbuf_grow(sb, len); + memcpy(sb->buf + sb->len, data, len); + strbuf_setlen(sb, sb->len + len); +} + +void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len) +{ + strbuf_grow(sb, len); + memcpy(sb->buf + sb->len, sb->buf + pos, len); + strbuf_setlen(sb, sb->len + len); +} + +void strbuf_addf(struct strbuf *sb, const char *fmt, ...) +{ + int len; + va_list ap; + + if (!strbuf_avail(sb)) + strbuf_grow(sb, 64); + va_start(ap, fmt); + len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap); + va_end(ap); + if (len < 0) + die("your vsnprintf is broken"); + if (len > strbuf_avail(sb)) { + strbuf_grow(sb, len); + va_start(ap, fmt); + len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap); + va_end(ap); + if (len > strbuf_avail(sb)) { + die("this should not happen, your snprintf is broken"); + } + } + strbuf_setlen(sb, sb->len + len); +} + +void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn, + void *context) +{ + for (;;) { + const char *percent; + size_t consumed; + + percent = strchrnul(format, '%'); + strbuf_add(sb, format, percent - format); + if (!*percent) + break; + format = percent + 1; + + consumed = fn(sb, format, context); + if (consumed) + format += consumed; + else + strbuf_addch(sb, '%'); + } +} + +size_t strbuf_expand_dict_cb(struct strbuf *sb, const char *placeholder, + void *context) +{ + struct strbuf_expand_dict_entry *e = context; + size_t len; + + for (; e->placeholder && (len = strlen(e->placeholder)); e++) { + if (!strncmp(placeholder, e->placeholder, len)) { + if (e->value) + strbuf_addstr(sb, e->value); + return len; + } + } + return 0; +} + +size_t strbuf_fread(struct strbuf *sb, size_t size, FILE *f) +{ + size_t res; + size_t oldalloc = sb->alloc; + + strbuf_grow(sb, size); + res = fread(sb->buf + sb->len, 1, size, f); + if (res > 0) + strbuf_setlen(sb, sb->len + res); + else if (res < 0 && oldalloc == 0) + strbuf_release(sb); + return res; +} + +ssize_t strbuf_read(struct strbuf *sb, int fd, size_t hint) +{ + size_t oldlen = sb->len; + size_t oldalloc = sb->alloc; + + strbuf_grow(sb, hint ? hint : 8192); + for (;;) { + ssize_t cnt; + + cnt = read(fd, sb->buf + sb->len, sb->alloc - sb->len - 1); + if (cnt < 0) { + if (oldalloc == 0) + strbuf_release(sb); + else + strbuf_setlen(sb, oldlen); + return -1; + } + if (!cnt) + break; + sb->len += cnt; + strbuf_grow(sb, 8192); + } + + sb->buf[sb->len] = '\0'; + return sb->len - oldlen; +} + +#define STRBUF_MAXLINK (2*PATH_MAX) + +int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint) +{ + size_t oldalloc = sb->alloc; + + if (hint < 32) + hint = 32; + + while (hint < STRBUF_MAXLINK) { + int len; + + strbuf_grow(sb, hint); + len = readlink(path, sb->buf, hint); + if (len < 0) { + if (errno != ERANGE) + break; + } else if (len < hint) { + strbuf_setlen(sb, len); + return 0; + } + + /* .. the buffer was too small - try again */ + hint *= 2; + } + if (oldalloc == 0) + strbuf_release(sb); + return -1; +} + +int strbuf_getline(struct strbuf *sb, FILE *fp, int term) +{ + int ch; + + strbuf_grow(sb, 0); + if (feof(fp)) + return EOF; + + strbuf_reset(sb); + while ((ch = fgetc(fp)) != EOF) { + if (ch == term) + break; + strbuf_grow(sb, 1); + sb->buf[sb->len++] = ch; + } + if (ch == EOF && sb->len == 0) + return EOF; + + sb->buf[sb->len] = '\0'; + return 0; +} + +int strbuf_read_file(struct strbuf *sb, const char *path, size_t hint) +{ + int fd, len; + + fd = open(path, O_RDONLY); + if (fd < 0) + return -1; + len = strbuf_read(sb, fd, hint); + close(fd); + if (len < 0) + return -1; + + return len; +} diff --git a/Documentation/perf_counter/strbuf.h b/Documentation/perf_counter/strbuf.h new file mode 100644 index 0000000..9ee908a --- /dev/null +++ b/Documentation/perf_counter/strbuf.h @@ -0,0 +1,137 @@ +#ifndef STRBUF_H +#define STRBUF_H + +/* + * Strbuf's can be use in many ways: as a byte array, or to store arbitrary + * long, overflow safe strings. + * + * Strbufs has some invariants that are very important to keep in mind: + * + * 1. the ->buf member is always malloc-ed, hence strbuf's can be used to + * build complex strings/buffers whose final size isn't easily known. + * + * It is NOT legal to copy the ->buf pointer away. + * `strbuf_detach' is the operation that detachs a buffer from its shell + * while keeping the shell valid wrt its invariants. + * + * 2. the ->buf member is a byte array that has at least ->len + 1 bytes + * allocated. The extra byte is used to store a '\0', allowing the ->buf + * member to be a valid C-string. Every strbuf function ensure this + * invariant is preserved. + * + * Note that it is OK to "play" with the buffer directly if you work it + * that way: + * + * strbuf_grow(sb, SOME_SIZE); + * ... Here, the memory array starting at sb->buf, and of length + * ... strbuf_avail(sb) is all yours, and you are sure that + * ... strbuf_avail(sb) is at least SOME_SIZE. + * strbuf_setlen(sb, sb->len + SOME_OTHER_SIZE); + * + * Of course, SOME_OTHER_SIZE must be smaller or equal to strbuf_avail(sb). + * + * Doing so is safe, though if it has to be done in many places, adding the + * missing API to the strbuf module is the way to go. + * + * XXX: do _not_ assume that the area that is yours is of size ->alloc - 1 + * even if it's true in the current implementation. Alloc is somehow a + * "private" member that should not be messed with. + */ + +#include + +extern char strbuf_slopbuf[]; +struct strbuf { + size_t alloc; + size_t len; + char *buf; +}; + +#define STRBUF_INIT { 0, 0, strbuf_slopbuf } + +/*----- strbuf life cycle -----*/ +extern void strbuf_init(struct strbuf *, size_t); +extern void strbuf_release(struct strbuf *); +extern char *strbuf_detach(struct strbuf *, size_t *); +extern void strbuf_attach(struct strbuf *, void *, size_t, size_t); +static inline void strbuf_swap(struct strbuf *a, struct strbuf *b) { + struct strbuf tmp = *a; + *a = *b; + *b = tmp; +} + +/*----- strbuf size related -----*/ +static inline size_t strbuf_avail(const struct strbuf *sb) { + return sb->alloc ? sb->alloc - sb->len - 1 : 0; +} + +extern void strbuf_grow(struct strbuf *, size_t); + +static inline void strbuf_setlen(struct strbuf *sb, size_t len) { + if (!sb->alloc) + strbuf_grow(sb, 0); + assert(len < sb->alloc); + sb->len = len; + sb->buf[len] = '\0'; +} +#define strbuf_reset(sb) strbuf_setlen(sb, 0) + +/*----- content related -----*/ +extern void strbuf_trim(struct strbuf *); +extern void strbuf_rtrim(struct strbuf *); +extern void strbuf_ltrim(struct strbuf *); +extern int strbuf_cmp(const struct strbuf *, const struct strbuf *); +extern void strbuf_tolower(struct strbuf *); + +extern struct strbuf **strbuf_split(const struct strbuf *, int delim); +extern void strbuf_list_free(struct strbuf **); + +/*----- add data in your buffer -----*/ +static inline void strbuf_addch(struct strbuf *sb, int c) { + strbuf_grow(sb, 1); + sb->buf[sb->len++] = c; + sb->buf[sb->len] = '\0'; +} + +extern void strbuf_insert(struct strbuf *, size_t pos, const void *, size_t); +extern void strbuf_remove(struct strbuf *, size_t pos, size_t len); + +/* splice pos..pos+len with given data */ +extern void strbuf_splice(struct strbuf *, size_t pos, size_t len, + const void *, size_t); + +extern void strbuf_add(struct strbuf *, const void *, size_t); +static inline void strbuf_addstr(struct strbuf *sb, const char *s) { + strbuf_add(sb, s, strlen(s)); +} +static inline void strbuf_addbuf(struct strbuf *sb, const struct strbuf *sb2) { + strbuf_add(sb, sb2->buf, sb2->len); +} +extern void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len); + +typedef size_t (*expand_fn_t) (struct strbuf *sb, const char *placeholder, void *context); +extern void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn, void *context); +struct strbuf_expand_dict_entry { + const char *placeholder; + const char *value; +}; +extern size_t strbuf_expand_dict_cb(struct strbuf *sb, const char *placeholder, void *context); + +__attribute__((format(printf,2,3))) +extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...); + +extern size_t strbuf_fread(struct strbuf *, size_t, FILE *); +/* XXX: if read fails, any partial read is undone */ +extern ssize_t strbuf_read(struct strbuf *, int fd, size_t hint); +extern int strbuf_read_file(struct strbuf *sb, const char *path, size_t hint); +extern int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint); + +extern int strbuf_getline(struct strbuf *, FILE *, int); + +extern void stripspace(struct strbuf *buf, int skip_comments); +extern int launch_editor(const char *path, struct strbuf *buffer, const char *const *env); + +extern int strbuf_branchname(struct strbuf *sb, const char *name); +extern int strbuf_check_branch_ref(struct strbuf *sb, const char *name); + +#endif /* STRBUF_H */ diff --git a/Documentation/perf_counter/usage.c b/Documentation/perf_counter/usage.c new file mode 100644 index 0000000..7a10421 --- /dev/null +++ b/Documentation/perf_counter/usage.c @@ -0,0 +1,80 @@ +/* + * GIT - The information manager from hell + * + * Copyright (C) Linus Torvalds, 2005 + */ +#include "util.h" + +static void report(const char *prefix, const char *err, va_list params) +{ + char msg[1024]; + vsnprintf(msg, sizeof(msg), err, params); + fprintf(stderr, "%s%s\n", prefix, msg); +} + +static NORETURN void usage_builtin(const char *err) +{ + fprintf(stderr, "usage: %s\n", err); + exit(129); +} + +static NORETURN void die_builtin(const char *err, va_list params) +{ + report("fatal: ", err, params); + exit(128); +} + +static void error_builtin(const char *err, va_list params) +{ + report("error: ", err, params); +} + +static void warn_builtin(const char *warn, va_list params) +{ + report("warning: ", warn, params); +} + +/* If we are in a dlopen()ed .so write to a global variable would segfault + * (ugh), so keep things static. */ +static void (*usage_routine)(const char *err) NORETURN = usage_builtin; +static void (*die_routine)(const char *err, va_list params) NORETURN = die_builtin; +static void (*error_routine)(const char *err, va_list params) = error_builtin; +static void (*warn_routine)(const char *err, va_list params) = warn_builtin; + +void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN) +{ + die_routine = routine; +} + +void usage(const char *err) +{ + usage_routine(err); +} + +void die(const char *err, ...) +{ + va_list params; + + va_start(params, err); + die_routine(err, params); + va_end(params); +} + +int error(const char *err, ...) +{ + va_list params; + + va_start(params, err); + error_routine(err, params); + va_end(params); + return -1; +} + +void warning(const char *warn, ...) +{ + va_list params; + + va_start(params, warn); + warn_routine(warn, params); + va_end(params); +} diff --git a/Documentation/perf_counter/util.h b/Documentation/perf_counter/util.h new file mode 100644 index 0000000..13f8bdc --- /dev/null +++ b/Documentation/perf_counter/util.h @@ -0,0 +1,394 @@ +#ifndef GIT_COMPAT_UTIL_H +#define GIT_COMPAT_UTIL_H + +#define _FILE_OFFSET_BITS 64 + +#ifndef FLEX_ARRAY +/* + * See if our compiler is known to support flexible array members. + */ +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) +# define FLEX_ARRAY /* empty */ +#elif defined(__GNUC__) +# if (__GNUC__ >= 3) +# define FLEX_ARRAY /* empty */ +# else +# define FLEX_ARRAY 0 /* older GNU extension */ +# endif +#endif + +/* + * Otherwise, default to safer but a bit wasteful traditional style + */ +#ifndef FLEX_ARRAY +# define FLEX_ARRAY 1 +#endif +#endif + +#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) + +#ifdef __GNUC__ +#define TYPEOF(x) (__typeof__(x)) +#else +#define TYPEOF(x) +#endif + +#define MSB(x, bits) ((x) & TYPEOF(x)(~0ULL << (sizeof(x) * 8 - (bits)))) +#define HAS_MULTI_BITS(i) ((i) & ((i) - 1)) /* checks if an integer has more than 1 bit set */ + +/* Approximation of the length of the decimal representation of this type. */ +#define decimal_length(x) ((int)(sizeof(x) * 2.56 + 0.5) + 1) + +#if !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(__USLC__) && !defined(_M_UNIX) +#define _XOPEN_SOURCE 600 /* glibc2 and AIX 5.3L need 500, OpenBSD needs 600 for S_ISLNK() */ +#define _XOPEN_SOURCE_EXTENDED 1 /* AIX 5.3L needs this */ +#endif +#define _ALL_SOURCE 1 +#define _GNU_SOURCE 1 +#define _BSD_SOURCE 1 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifndef __MINGW32__ +#include +#include +#include +#include +#ifndef NO_SYS_SELECT_H +#include +#endif +#include +#include +#include +#include +#include +#include +#if defined(__CYGWIN__) +#undef _XOPEN_SOURCE +#include +#define _XOPEN_SOURCE 600 +#include "compat/cygwin.h" +#else +#undef _ALL_SOURCE /* AIX 5.3L defines a struct list with _ALL_SOURCE. */ +#include +#define _ALL_SOURCE 1 +#endif +#else /* __MINGW32__ */ +/* pull in Windows compatibility stuff */ +#include "compat/mingw.h" +#endif /* __MINGW32__ */ + +#ifndef NO_ICONV +#include +#endif + +#ifndef NO_OPENSSL +#include +#include +#endif + +/* On most systems would have given us this, but + * not on some systems (e.g. GNU/Hurd). + */ +#ifndef PATH_MAX +#define PATH_MAX 4096 +#endif + +#ifndef PRIuMAX +#define PRIuMAX "llu" +#endif + +#ifndef PRIu32 +#define PRIu32 "u" +#endif + +#ifndef PRIx32 +#define PRIx32 "x" +#endif + +#ifndef PATH_SEP +#define PATH_SEP ':' +#endif + +#ifndef STRIP_EXTENSION +#define STRIP_EXTENSION "" +#endif + +#ifndef has_dos_drive_prefix +#define has_dos_drive_prefix(path) 0 +#endif + +#ifndef is_dir_sep +#define is_dir_sep(c) ((c) == '/') +#endif + +#ifdef __GNUC__ +#define NORETURN __attribute__((__noreturn__)) +#else +#define NORETURN +#ifndef __attribute__ +#define __attribute__(x) +#endif +#endif + +/* General helper functions */ +extern void usage(const char *err) NORETURN; +extern void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, 2))); +extern int error(const char *err, ...) __attribute__((format (printf, 1, 2))); +extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2))); + +extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN); + +extern int prefixcmp(const char *str, const char *prefix); +extern time_t tm_to_time_t(const struct tm *tm); + +static inline const char *skip_prefix(const char *str, const char *prefix) +{ + size_t len = strlen(prefix); + return strncmp(str, prefix, len) ? NULL : str + len; +} + +#if defined(NO_MMAP) || defined(USE_WIN32_MMAP) + +#ifndef PROT_READ +#define PROT_READ 1 +#define PROT_WRITE 2 +#define MAP_PRIVATE 1 +#define MAP_FAILED ((void*)-1) +#endif + +#define mmap git_mmap +#define munmap git_munmap +extern void *git_mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset); +extern int git_munmap(void *start, size_t length); + +#else /* NO_MMAP || USE_WIN32_MMAP */ + +#include + +#endif /* NO_MMAP || USE_WIN32_MMAP */ + +#ifdef NO_MMAP + +/* This value must be multiple of (pagesize * 2) */ +#define DEFAULT_PACKED_GIT_WINDOW_SIZE (1 * 1024 * 1024) + +#else /* NO_MMAP */ + +/* This value must be multiple of (pagesize * 2) */ +#define DEFAULT_PACKED_GIT_WINDOW_SIZE \ + (sizeof(void*) >= 8 \ + ? 1 * 1024 * 1024 * 1024 \ + : 32 * 1024 * 1024) + +#endif /* NO_MMAP */ + +#ifdef NO_ST_BLOCKS_IN_STRUCT_STAT +#define on_disk_bytes(st) ((st).st_size) +#else +#define on_disk_bytes(st) ((st).st_blocks * 512) +#endif + +#define DEFAULT_PACKED_GIT_LIMIT \ + ((1024L * 1024L) * (sizeof(void*) >= 8 ? 8192 : 256)) + +#ifdef NO_PREAD +#define pread git_pread +extern ssize_t git_pread(int fd, void *buf, size_t count, off_t offset); +#endif +/* + * Forward decl that will remind us if its twin in cache.h changes. + * This function is used in compat/pread.c. But we can't include + * cache.h there. + */ +extern ssize_t read_in_full(int fd, void *buf, size_t count); + +#ifdef NO_SETENV +#define setenv gitsetenv +extern int gitsetenv(const char *, const char *, int); +#endif + +#ifdef NO_MKDTEMP +#define mkdtemp gitmkdtemp +extern char *gitmkdtemp(char *); +#endif + +#ifdef NO_UNSETENV +#define unsetenv gitunsetenv +extern void gitunsetenv(const char *); +#endif + +#ifdef NO_STRCASESTR +#define strcasestr gitstrcasestr +extern char *gitstrcasestr(const char *haystack, const char *needle); +#endif + +#ifdef NO_STRLCPY +#define strlcpy gitstrlcpy +extern size_t gitstrlcpy(char *, const char *, size_t); +#endif + +#ifdef NO_STRTOUMAX +#define strtoumax gitstrtoumax +extern uintmax_t gitstrtoumax(const char *, char **, int); +#endif + +#ifdef NO_HSTRERROR +#define hstrerror githstrerror +extern const char *githstrerror(int herror); +#endif + +#ifdef NO_MEMMEM +#define memmem gitmemmem +void *gitmemmem(const void *haystack, size_t haystacklen, + const void *needle, size_t needlelen); +#endif + +#ifdef FREAD_READS_DIRECTORIES +#ifdef fopen +#undef fopen +#endif +#define fopen(a,b) git_fopen(a,b) +extern FILE *git_fopen(const char*, const char*); +#endif + +#ifdef SNPRINTF_RETURNS_BOGUS +#define snprintf git_snprintf +extern int git_snprintf(char *str, size_t maxsize, + const char *format, ...); +#define vsnprintf git_vsnprintf +extern int git_vsnprintf(char *str, size_t maxsize, + const char *format, va_list ap); +#endif + +#ifdef __GLIBC_PREREQ +#if __GLIBC_PREREQ(2, 1) +#define HAVE_STRCHRNUL +#endif +#endif + +#ifndef HAVE_STRCHRNUL +#define strchrnul gitstrchrnul +static inline char *gitstrchrnul(const char *s, int c) +{ + while (*s && *s != c) + s++; + return (char *)s; +} +#endif + +static inline size_t xsize_t(off_t len) +{ + return (size_t)len; +} + +static inline int has_extension(const char *filename, const char *ext) +{ + size_t len = strlen(filename); + size_t extlen = strlen(ext); + return len > extlen && !memcmp(filename + len - extlen, ext, extlen); +} + +/* Sane ctype - no locale, and works with signed chars */ +#undef isascii +#undef isspace +#undef isdigit +#undef isalpha +#undef isalnum +#undef tolower +#undef toupper +extern unsigned char sane_ctype[256]; +#define GIT_SPACE 0x01 +#define GIT_DIGIT 0x02 +#define GIT_ALPHA 0x04 +#define GIT_GLOB_SPECIAL 0x08 +#define GIT_REGEX_SPECIAL 0x10 +#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0) +#define isascii(x) (((x) & ~0x7f) == 0) +#define isspace(x) sane_istest(x,GIT_SPACE) +#define isdigit(x) sane_istest(x,GIT_DIGIT) +#define isalpha(x) sane_istest(x,GIT_ALPHA) +#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) +#define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL) +#define is_regex_special(x) sane_istest(x,GIT_GLOB_SPECIAL | GIT_REGEX_SPECIAL) +#define tolower(x) sane_case((unsigned char)(x), 0x20) +#define toupper(x) sane_case((unsigned char)(x), 0) + +static inline int sane_case(int x, int high) +{ + if (sane_istest(x, GIT_ALPHA)) + x = (x & ~0x20) | high; + return x; +} + +static inline int strtoul_ui(char const *s, int base, unsigned int *result) +{ + unsigned long ul; + char *p; + + errno = 0; + ul = strtoul(s, &p, base); + if (errno || *p || p == s || (unsigned int) ul != ul) + return -1; + *result = ul; + return 0; +} + +static inline int strtol_i(char const *s, int base, int *result) +{ + long ul; + char *p; + + errno = 0; + ul = strtol(s, &p, base); + if (errno || *p || p == s || (int) ul != ul) + return -1; + *result = ul; + return 0; +} + +#ifdef INTERNAL_QSORT +void git_qsort(void *base, size_t nmemb, size_t size, + int(*compar)(const void *, const void *)); +#define qsort git_qsort +#endif + +#ifndef DIR_HAS_BSD_GROUP_SEMANTICS +# define FORCE_DIR_SET_GID S_ISGID +#else +# define FORCE_DIR_SET_GID 0 +#endif + +#ifdef NO_NSEC +#undef USE_NSEC +#define ST_CTIME_NSEC(st) 0 +#define ST_MTIME_NSEC(st) 0 +#else +#ifdef USE_ST_TIMESPEC +#define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctimespec.tv_nsec)) +#define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtimespec.tv_nsec)) +#else +#define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctim.tv_nsec)) +#define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtim.tv_nsec)) +#endif +#endif + +#endif diff --git a/Documentation/perf_counter/wrapper.c b/Documentation/perf_counter/wrapper.c new file mode 100644 index 0000000..6350d65 --- /dev/null +++ b/Documentation/perf_counter/wrapper.c @@ -0,0 +1,206 @@ +/* + * Various trivial helper wrappers around standard functions + */ +#include "cache.h" + +/* + * There's no pack memory to release - but stay close to the Git + * version so wrap this away: + */ +static inline void release_pack_memory(size_t size, int flag) +{ +} + +char *xstrdup(const char *str) +{ + char *ret = strdup(str); + if (!ret) { + release_pack_memory(strlen(str) + 1, -1); + ret = strdup(str); + if (!ret) + die("Out of memory, strdup failed"); + } + return ret; +} + +void *xmalloc(size_t size) +{ + void *ret = malloc(size); + if (!ret && !size) + ret = malloc(1); + if (!ret) { + release_pack_memory(size, -1); + ret = malloc(size); + if (!ret && !size) + ret = malloc(1); + if (!ret) + die("Out of memory, malloc failed"); + } +#ifdef XMALLOC_POISON + memset(ret, 0xA5, size); +#endif + return ret; +} + +/* + * xmemdupz() allocates (len + 1) bytes of memory, duplicates "len" bytes of + * "data" to the allocated memory, zero terminates the allocated memory, + * and returns a pointer to the allocated memory. If the allocation fails, + * the program dies. + */ +void *xmemdupz(const void *data, size_t len) +{ + char *p = xmalloc(len + 1); + memcpy(p, data, len); + p[len] = '\0'; + return p; +} + +char *xstrndup(const char *str, size_t len) +{ + char *p = memchr(str, '\0', len); + return xmemdupz(str, p ? p - str : len); +} + +void *xrealloc(void *ptr, size_t size) +{ + void *ret = realloc(ptr, size); + if (!ret && !size) + ret = realloc(ptr, 1); + if (!ret) { + release_pack_memory(size, -1); + ret = realloc(ptr, size); + if (!ret && !size) + ret = realloc(ptr, 1); + if (!ret) + die("Out of memory, realloc failed"); + } + return ret; +} + +void *xcalloc(size_t nmemb, size_t size) +{ + void *ret = calloc(nmemb, size); + if (!ret && (!nmemb || !size)) + ret = calloc(1, 1); + if (!ret) { + release_pack_memory(nmemb * size, -1); + ret = calloc(nmemb, size); + if (!ret && (!nmemb || !size)) + ret = calloc(1, 1); + if (!ret) + die("Out of memory, calloc failed"); + } + return ret; +} + +void *xmmap(void *start, size_t length, + int prot, int flags, int fd, off_t offset) +{ + void *ret = mmap(start, length, prot, flags, fd, offset); + if (ret == MAP_FAILED) { + if (!length) + return NULL; + release_pack_memory(length, fd); + ret = mmap(start, length, prot, flags, fd, offset); + if (ret == MAP_FAILED) + die("Out of memory? mmap failed: %s", strerror(errno)); + } + return ret; +} + +/* + * xread() is the same a read(), but it automatically restarts read() + * operations with a recoverable error (EAGAIN and EINTR). xread() + * DOES NOT GUARANTEE that "len" bytes is read even if the data is available. + */ +ssize_t xread(int fd, void *buf, size_t len) +{ + ssize_t nr; + while (1) { + nr = read(fd, buf, len); + if ((nr < 0) && (errno == EAGAIN || errno == EINTR)) + continue; + return nr; + } +} + +/* + * xwrite() is the same a write(), but it automatically restarts write() + * operations with a recoverable error (EAGAIN and EINTR). xwrite() DOES NOT + * GUARANTEE that "len" bytes is written even if the operation is successful. + */ +ssize_t xwrite(int fd, const void *buf, size_t len) +{ + ssize_t nr; + while (1) { + nr = write(fd, buf, len); + if ((nr < 0) && (errno == EAGAIN || errno == EINTR)) + continue; + return nr; + } +} + +ssize_t read_in_full(int fd, void *buf, size_t count) +{ + char *p = buf; + ssize_t total = 0; + + while (count > 0) { + ssize_t loaded = xread(fd, p, count); + if (loaded <= 0) + return total ? total : loaded; + count -= loaded; + p += loaded; + total += loaded; + } + + return total; +} + +ssize_t write_in_full(int fd, const void *buf, size_t count) +{ + const char *p = buf; + ssize_t total = 0; + + while (count > 0) { + ssize_t written = xwrite(fd, p, count); + if (written < 0) + return -1; + if (!written) { + errno = ENOSPC; + return -1; + } + count -= written; + p += written; + total += written; + } + + return total; +} + +int xdup(int fd) +{ + int ret = dup(fd); + if (ret < 0) + die("dup failed: %s", strerror(errno)); + return ret; +} + +FILE *xfdopen(int fd, const char *mode) +{ + FILE *stream = fdopen(fd, mode); + if (stream == NULL) + die("Out of memory? fdopen failed: %s", strerror(errno)); + return stream; +} + +int xmkstemp(char *template) +{ + int fd; + + fd = mkstemp(template); + if (fd < 0) + die("Unable to create temporary file: %s", strerror(errno)); + return fd; +} -- cgit v0.10.2 From 6f06ccbc86f8a02aa32271263249657ce484eb25 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 20 Apr 2009 15:22:22 +0200 Subject: perf_counter tools: clean up after introduction of the Git command framework Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile index 11809b9..1b60265 100644 --- a/Documentation/perf_counter/Makefile +++ b/Documentation/perf_counter/Makefile @@ -138,16 +138,6 @@ all:: # # Define NO_PERL if you do not want Perl scripts or libraries at all. # -# Define NO_TCLTK if you do not want Tcl/Tk GUI. -# -# The TCL_PATH variable governs the location of the Tcl interpreter -# used to optimize perf-gui for your system. Only used if NO_TCLTK -# is not set. Defaults to the bare 'tclsh'. -# -# The TCLTK_PATH variable governs the location of the Tcl/Tk interpreter. -# If not set it defaults to the bare 'wish'. If it is set to the empty -# string then NO_TCLTK will be forced (this is used by configure script). -# # Define INTERNAL_QSORT to use Git's implementation of qsort(), which # is a simplified version of the merge sort used in glibc. This is # recommended if Git triggers O(n^2) behavior in your platform's qsort(). @@ -215,12 +205,8 @@ TAR = tar FIND = find INSTALL = install RPMBUILD = rpmbuild -TCL_PATH = tclsh -TCLTK_PATH = wish PTHREAD_LIBS = -lpthread -export TCL_PATH TCLTK_PATH - # sparse is architecture-neutral, which means that we need to tell it # explicitly what architecture to check for. Fix this up for yours.. SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__ @@ -529,10 +515,6 @@ ifdef NO_EXTERNAL_GREP BASIC_CFLAGS += -DNO_EXTERNAL_GREP endif -ifeq ($(TCLTK_PATH),) -NO_TCLTK=NoThanks -endif - ifeq ($(PERL_PATH),) NO_PERL=NoThanks endif @@ -583,7 +565,6 @@ prefix_SQ = $(subst ','\'',$(prefix)) SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH)) PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH)) -TCLTK_PATH_SQ = $(subst ','\'',$(TCLTK_PATH)) LIBS = $(PERFLIBS) $(EXTLIBS) @@ -607,14 +588,6 @@ ifneq (,$X) endif all:: -ifndef NO_TCLTK - $(QUIET_SUBDIR0)perf-gui $(QUIET_SUBDIR1) perfexecdir='$(perfexec_instdir_SQ)' all - $(QUIET_SUBDIR0)perfk-perf $(QUIET_SUBDIR1) all -endif -ifndef NO_PERL - $(QUIET_SUBDIR0)perl $(QUIET_SUBDIR1) PERL_PATH='$(PERL_PATH_SQ)' prefix='$(prefix_SQ)' all -endif - $(QUIET_SUBDIR0)templates $(QUIET_SUBDIR1) please_set_SHELL_PATH_to_a_more_modern_shell: @$$(:) @@ -704,21 +677,6 @@ builtin-revert.o wt-status.o: wt-status.h $(LIB_FILE): $(LIB_OBJS) $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS) -doc: - $(MAKE) -C Documentation all - -man: - $(MAKE) -C Documentation man - -html: - $(MAKE) -C Documentation html - -info: - $(MAKE) -C Documentation info - -pdf: - $(MAKE) -C Documentation pdf - TAGS: $(RM) TAGS $(FIND) . -name '*.[hcS]' -print | xargs etags -a @@ -751,33 +709,12 @@ PERF-BUILD-OPTIONS: .FORCE-PERF-BUILD-OPTIONS @echo NO_CURL=\''$(subst ','\'',$(subst ','\'',$(NO_CURL)))'\' >>$@ @echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@ -### Detect Tck/Tk interpreter path changes -ifndef NO_TCLTK -TRACK_VARS = $(subst ','\'',-DTCLTK_PATH='$(TCLTK_PATH_SQ)') - -PERF-GUI-VARS: .FORCE-PERF-GUI-VARS - @VARS='$(TRACK_VARS)'; \ - if test x"$$VARS" != x"`cat $@ 2>/dev/null`" ; then \ - echo 1>&2 " * new Tcl/Tk interpreter location"; \ - echo "$$VARS" >$@; \ - fi - -.PHONY: .FORCE-PERF-GUI-VARS -endif - ### Testing rules -TEST_PROGRAMS += test-chmtime$X -TEST_PROGRAMS += test-ctype$X -TEST_PROGRAMS += test-date$X -TEST_PROGRAMS += test-delta$X -TEST_PROGRAMS += test-dump-cache-tree$X -TEST_PROGRAMS += test-genrandom$X -TEST_PROGRAMS += test-match-trees$X -TEST_PROGRAMS += test-parse-options$X -TEST_PROGRAMS += test-path-utils$X -TEST_PROGRAMS += test-sha1$X -TEST_PROGRAMS += test-sigchain$X +# +# None right now: +# +# TEST_PROGRAMS += test-something$X all:: $(TEST_PROGRAMS) @@ -787,25 +724,6 @@ all:: $(TEST_PROGRAMS) export NO_SVN_TESTS -test: all - $(MAKE) -C t/ all - -test-ctype$X: ctype.o - -test-date$X: date.o ctype.o - -test-delta$X: diff-delta.o patch-delta.o - -test-parse-options$X: parse-options.o - -.PRECIOUS: $(patsubst test-%$X,test-%.o,$(TEST_PROGRAMS)) - -test-%$X: test-%.o $(PERFLIBS) - $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) - -check-sha1:: test-sha1$X - ./test-sha1.sh - check: common-cmds.h if sparse; \ then \ @@ -845,10 +763,6 @@ install: all $(INSTALL) perf$X perf-upload-pack$X perf-receive-pack$X perf-upload-archive$X perf-shell$X perf-cvsserver '$(DESTDIR_SQ)$(bindir_SQ)' $(MAKE) -C templates DESTDIR='$(DESTDIR_SQ)' install $(MAKE) -C perl prefix='$(prefix_SQ)' DESTDIR='$(DESTDIR_SQ)' install -ifndef NO_TCLTK - $(MAKE) -C perfk-perf install - $(MAKE) -C perf-gui perfexecdir='$(perfexec_instdir_SQ)' install -endif ifneq (,$X) $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), $(RM) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/$p';) endif @@ -865,32 +779,6 @@ endif done } && \ ./check_bindir "z$$bindir" "z$$execdir" "$$bindir/perf-add$X" -install-doc: - $(MAKE) -C Documentation install - -install-man: - $(MAKE) -C Documentation install-man - -install-html: - $(MAKE) -C Documentation install-html - -install-info: - $(MAKE) -C Documentation install-info - -install-pdf: - $(MAKE) -C Documentation install-pdf - -quick-install-doc: - $(MAKE) -C Documentation quick-install - -quick-install-man: - $(MAKE) -C Documentation quick-install-man - -quick-install-html: - $(MAKE) -C Documentation quick-install-html - - - ### Maintainer's dist rules perf.spec: perf.spec.in @@ -904,38 +792,16 @@ dist: perf.spec perf-archive$(X) configure @mkdir -p $(PERF_TARNAME) @cp perf.spec configure $(PERF_TARNAME) @echo $(PERF_VERSION) > $(PERF_TARNAME)/version - @$(MAKE) -C perf-gui TARDIR=../$(PERF_TARNAME)/perf-gui dist-version $(TAR) rf $(PERF_TARNAME).tar \ $(PERF_TARNAME)/perf.spec \ $(PERF_TARNAME)/configure \ - $(PERF_TARNAME)/version \ - $(PERF_TARNAME)/perf-gui/version + $(PERF_TARNAME)/version @$(RM) -r $(PERF_TARNAME) gzip -f -9 $(PERF_TARNAME).tar rpm: dist $(RPMBUILD) -ta $(PERF_TARNAME).tar.gz -htmldocs = perf-htmldocs-$(PERF_VERSION) -manpages = perf-manpages-$(PERF_VERSION) -dist-doc: - $(RM) -r .doc-tmp-dir - mkdir .doc-tmp-dir - $(MAKE) -C Documentation WEBDOC_DEST=../.doc-tmp-dir install-webdoc - cd .doc-tmp-dir && $(TAR) cf ../$(htmldocs).tar . - gzip -n -9 -f $(htmldocs).tar - : - $(RM) -r .doc-tmp-dir - mkdir -p .doc-tmp-dir/man1 .doc-tmp-dir/man5 .doc-tmp-dir/man7 - $(MAKE) -C Documentation DESTDIR=./ \ - man1dir=../.doc-tmp-dir/man1 \ - man5dir=../.doc-tmp-dir/man5 \ - man7dir=../.doc-tmp-dir/man7 \ - install - cd .doc-tmp-dir && $(TAR) cf ../$(manpages).tar . - gzip -n -9 -f $(manpages).tar - $(RM) -r .doc-tmp-dir - ### Cleaning rules distclean: clean @@ -951,74 +817,13 @@ clean: $(RM) -r $(PERF_TARNAME) .doc-tmp-dir $(RM) $(PERF_TARNAME).tar.gz perf-core_$(PERF_VERSION)-*.tar.gz $(RM) $(htmldocs).tar.gz $(manpages).tar.gz - $(MAKE) -C Documentation/ clean - $(MAKE) -C templates/ clean - $(MAKE) -C t/ clean -ifndef NO_TCLTK - $(MAKE) -C perfk-perf clean - $(MAKE) -C perf-gui clean -endif - $(RM) PERF-VERSION-FILE PERF-CFLAGS PERF-GUI-VARS PERF-BUILD-OPTIONS + $(RM) PERF-VERSION-FILE PERF-CFLAGS PERF-BUILD-OPTIONS .PHONY: all install clean strip .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell .PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS .PHONY: .FORCE-PERF-BUILD-OPTIONS -### Check documentation -# -check-docs:: - @(for v in $(ALL_PROGRAMS) $(BUILT_INS) perf perfk; \ - do \ - case "$$v" in \ - perf-merge-octopus | perf-merge-ours | perf-merge-recursive | \ - perf-merge-resolve | perf-merge-subtree | \ - perf-fsck-objects | perf-init-db | \ - perf-?*--?* ) continue ;; \ - esac ; \ - test -f "Documentation/$$v.txt" || \ - echo "no doc: $$v"; \ - sed -e '/^#/d' command-list.txt | \ - grep -q "^$$v[ ]" || \ - case "$$v" in \ - perf) ;; \ - *) echo "no link: $$v";; \ - esac ; \ - done; \ - ( \ - sed -e '/^#/d' \ - -e 's/[ ].*//' \ - -e 's/^/listed /' command-list.txt; \ - ls -1 Documentation/perf*txt | \ - sed -e 's|Documentation/|documented |' \ - -e 's/\.txt//'; \ - ) | while read how cmd; \ - do \ - case "$$how,$$cmd" in \ - *,perf-citool | \ - *,perf-gui | \ - *,perf-help | \ - documented,perfattributes | \ - documented,perfignore | \ - documented,perfmodules | \ - documented,perfcli | \ - documented,perf-tools | \ - documented,perfcore-tutorial | \ - documented,perfcvs-migration | \ - documented,perfdiffcore | \ - documented,perfglossary | \ - documented,perfhooks | \ - documented,perfrepository-layout | \ - documented,perftutorial | \ - documented,perftutorial-2 | \ - sentinel,not,matching,is,ok ) continue ;; \ - esac; \ - case " $(ALL_PROGRAMS) $(BUILT_INS) perf perfk " in \ - *" $$cmd "*) ;; \ - *) echo "removed but $$how: $$cmd" ;; \ - esac; \ - done ) | sort - ### Make sure built-ins do not have dups and listed in perf.c # check-builtins:: diff --git a/Documentation/perf_counter/builtin-help.c b/Documentation/perf_counter/builtin-help.c index 125fcc2..a136d61 100644 --- a/Documentation/perf_counter/builtin-help.c +++ b/Documentation/perf_counter/builtin-help.c @@ -417,11 +417,9 @@ static void show_html_page(const char *perf_cmd) int cmd_help(int argc, const char **argv, const char *prefix) { - int nonperf; const char *alias; load_command_list("perf-", &main_cmds, &other_cmds); - /* setup_perf_directory_gently(&nonperf); */ perf_config(perf_help_config, NULL); argc = parse_options(argc, argv, builtin_help_options, diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c index 9d2c769..601bddb 100644 --- a/Documentation/perf_counter/builtin-top.c +++ b/Documentation/perf_counter/builtin-top.c @@ -63,15 +63,6 @@ #include "util.h" -#define _GNU_SOURCE -#include -#include -#include -#include -#include -#include -#include -#include #include #include #include @@ -103,8 +94,6 @@ #define PR_TASK_PERF_COUNTERS_DISABLE 31 #define PR_TASK_PERF_COUNTERS_ENABLE 32 -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) - #define rdclock() \ ({ \ struct timespec ts; \ @@ -1077,7 +1066,7 @@ static void process_event(uint64_t ip, int counter) record_ip(ip, counter); } -static void process_options(int argc, char *argv[]) +static void process_options(int argc, char **argv) { int error = 0, counter; @@ -1255,7 +1244,7 @@ static void mmap_read(struct mmap_data *md) event_t event_copy; - unsigned int size = event->header.size; + size_t size = event->header.size; /* * Event straddles the mmap boundary -- header should always @@ -1301,7 +1290,7 @@ static void mmap_read(struct mmap_data *md) md->prev = old; } -int cmd_top(int argc, const char **argv, const char *prefix) +int cmd_top(int argc, char **argv, const char *prefix) { struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; diff --git a/Documentation/perf_counter/cache.h b/Documentation/perf_counter/cache.h index dc08564..7108051 100644 --- a/Documentation/perf_counter/cache.h +++ b/Documentation/perf_counter/cache.h @@ -94,4 +94,24 @@ static inline int is_absolute_path(const char *path) { return path[0] == '/'; } + +const char *make_absolute_path(const char *path); +const char *make_nonrelative_path(const char *path); +const char *make_relative_path(const char *abs, const char *base); +int normalize_path_copy(char *dst, const char *src); +int longest_ancestor_length(const char *path, const char *prefix_list); +char *strip_path_suffix(const char *path, const char *suffix); + +extern char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2))); +extern char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2))); + +extern char *mksnpath(char *buf, size_t n, const char *fmt, ...) + __attribute__((format (printf, 3, 4))); +extern char *perf_snpath(char *buf, size_t n, const char *fmt, ...) + __attribute__((format (printf, 3, 4))); +extern char *perf_pathdup(const char *fmt, ...) + __attribute__((format (printf, 1, 2))); + +extern size_t strlcpy(char *dest, const char *src, size_t size); + #endif /* CACHE_H */ diff --git a/Documentation/perf_counter/config.c b/Documentation/perf_counter/config.c index 672d539..3dd13fa 100644 --- a/Documentation/perf_counter/config.c +++ b/Documentation/perf_counter/config.c @@ -15,7 +15,6 @@ static FILE *config_file; static const char *config_file_name; static int config_linenr; static int config_file_eof; -static int zlib_compression_seen; const char *config_exclusive_filename = NULL; @@ -533,14 +532,6 @@ static int store_aux(const char* key, const char* value, void *cb) return 0; } -static int write_error(const char *filename) -{ - error("failed to write new configuration file %s", filename); - - /* Same error code as "failed to rename". */ - return 4; -} - static int store_write_section(int fd, const char* key) { const char *dot; @@ -673,7 +664,7 @@ int perf_config_set_multivar(const char* key, const char* value, { int i, dot; int fd = -1, in_fd; - int ret; + int ret = 0; char* config_filename; const char* last_dot = strrchr(key, '.'); @@ -872,90 +863,6 @@ write_err_out: } -static int section_name_match (const char *buf, const char *name) -{ - int i = 0, j = 0, dot = 0; - for (; buf[i] && buf[i] != ']'; i++) { - if (!dot && isspace(buf[i])) { - dot = 1; - if (name[j++] != '.') - break; - for (i++; isspace(buf[i]); i++) - ; /* do nothing */ - if (buf[i] != '"') - break; - continue; - } - if (buf[i] == '\\' && dot) - i++; - else if (buf[i] == '"' && dot) { - for (i++; isspace(buf[i]); i++) - ; /* do_nothing */ - break; - } - if (buf[i] != name[j++]) - break; - } - return (buf[i] == ']' && name[j] == 0); -} - -/* if new_name == NULL, the section is removed instead */ -int perf_config_rename_section(const char *old_name, const char *new_name) -{ - int ret = 0, remove = 0; - char *config_filename; - int out_fd; - char buf[1024]; - - if (config_exclusive_filename) - config_filename = strdup(config_exclusive_filename); - else - config_filename = perf_pathdup("config"); - if (out_fd < 0) { - ret = error("could not lock config file %s", config_filename); - goto out; - } - - if (!(config_file = fopen(config_filename, "rb"))) { - /* no config file means nothing to rename, no error */ - goto unlock_and_out; - } - - while (fgets(buf, sizeof(buf), config_file)) { - int i; - int length; - for (i = 0; buf[i] && isspace(buf[i]); i++) - ; /* do nothing */ - if (buf[i] == '[') { - /* it's a section */ - if (section_name_match (&buf[i+1], old_name)) { - ret++; - if (new_name == NULL) { - remove = 1; - continue; - } - store.baselen = strlen(new_name); - if (!store_write_section(out_fd, new_name)) { - goto out; - } - continue; - } - remove = 0; - } - if (remove) - continue; - length = strlen(buf); - if (write_in_full(out_fd, buf, length) != length) { - goto out; - } - } - fclose(config_file); - unlock_and_out: - out: - free(config_filename); - return ret; -} - /* * Call this to report error for your variable that should not * get a boolean value (i.e. "[my] var" means "true"). diff --git a/Documentation/perf_counter/path.c b/Documentation/perf_counter/path.c index 891b612..a501a40 100644 --- a/Documentation/perf_counter/path.c +++ b/Documentation/perf_counter/path.c @@ -161,45 +161,6 @@ int perf_mkstemp(char *path, size_t len, const char *template) } -static char *user_path(char *buf, char *path, int sz) -{ - struct passwd *pw; - char *slash; - int len, baselen; - - if (!path || path[0] != '~') - return NULL; - path++; - slash = strchr(path, '/'); - if (path[0] == '/' || !path[0]) { - pw = getpwuid(getuid()); - } - else { - if (slash) { - *slash = 0; - pw = getpwnam(path); - *slash = '/'; - } - else - pw = getpwnam(path); - } - if (!pw || !pw->pw_dir || sz <= strlen(pw->pw_dir)) - return NULL; - baselen = strlen(pw->pw_dir); - memcpy(buf, pw->pw_dir, baselen); - while ((1 < baselen) && (buf[baselen-1] == '/')) { - buf[baselen-1] = 0; - baselen--; - } - if (slash && slash[1]) { - len = strlen(slash); - if (sz <= baselen + len) - return NULL; - memcpy(buf + baselen, slash, len + 1); - } - return buf; -} - const char *make_relative_path(const char *abs, const char *base) { static char buf[PATH_MAX + 1]; diff --git a/Documentation/perf_counter/perf.c b/Documentation/perf_counter/perf.c index 9256f6a..63f8a89 100644 --- a/Documentation/perf_counter/perf.c +++ b/Documentation/perf_counter/perf.c @@ -1,7 +1,7 @@ #include "builtin.h" #include "exec_cmd.h" #include "cache.h" -//#include "quote.h" +#include "quote.h" #include "run-command.h" const char perf_usage_string[] = @@ -132,7 +132,6 @@ static int handle_alias(int *argcp, const char ***argv) const char** new_argv; const char *alias_command; char *alias_string; - int unused_nonperf; alias_command = (*argv)[0]; alias_string = alias_lookup(alias_command); diff --git a/Documentation/perf_counter/util.h b/Documentation/perf_counter/util.h index 13f8bdc..36e40c3 100644 --- a/Documentation/perf_counter/util.h +++ b/Documentation/perf_counter/util.h @@ -295,6 +295,20 @@ static inline char *gitstrchrnul(const char *s, int c) } #endif +/* + * Wrappers: + */ +extern char *xstrdup(const char *str); +extern void *xmalloc(size_t size); +extern void *xmemdupz(const void *data, size_t len); +extern char *xstrndup(const char *str, size_t len); +extern void *xrealloc(void *ptr, size_t size); +extern void *xcalloc(size_t nmemb, size_t size); +extern void *xmmap(void *start, size_t length, int prot, int flags, int fd, off_t offset); +extern ssize_t xread(int fd, void *buf, size_t len); +extern ssize_t xwrite(int fd, const void *buf, size_t len); +extern int xdup(int fd); +extern FILE *xfdopen(int fd, const char *mode); static inline size_t xsize_t(off_t len) { return (size_t)len; -- cgit v0.10.2 From ddcacfa0febff6454dba6cea1931f3020a9f6c24 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 20 Apr 2009 15:37:32 +0200 Subject: perf_counter tools: separate kerneltop into 'perf top' and 'perf stat' Lets use the Git framework of built-in commands. Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile index 1b60265..fb8b717 100644 --- a/Documentation/perf_counter/Makefile +++ b/Documentation/perf_counter/Makefile @@ -309,6 +309,7 @@ LIB_OBJS += usage.o LIB_OBJS += wrapper.o BUILTIN_OBJS += builtin-help.o +BUILTIN_OBJS += builtin-stat.o BUILTIN_OBJS += builtin-top.o PERFLIBS = $(LIB_FILE) diff --git a/Documentation/perf_counter/builtin-stat.c b/Documentation/perf_counter/builtin-stat.c new file mode 100644 index 0000000..169a2d1 --- /dev/null +++ b/Documentation/perf_counter/builtin-stat.c @@ -0,0 +1,592 @@ +/* + * kerneltop.c: show top kernel functions - performance counters showcase + + Build with: + + cc -O6 -Wall -c -o kerneltop.o kerneltop.c -lrt + + Sample output: + +------------------------------------------------------------------------------ + KernelTop: 2669 irqs/sec [NMI, cache-misses/cache-refs], (all, cpu: 2) +------------------------------------------------------------------------------ + + weight RIP kernel function + ______ ________________ _______________ + + 35.20 - ffffffff804ce74b : skb_copy_and_csum_dev + 33.00 - ffffffff804cb740 : sock_alloc_send_skb + 31.26 - ffffffff804ce808 : skb_push + 22.43 - ffffffff80510004 : tcp_established_options + 19.00 - ffffffff8027d250 : find_get_page + 15.76 - ffffffff804e4fc9 : eth_type_trans + 15.20 - ffffffff804d8baa : dst_release + 14.86 - ffffffff804cf5d8 : skb_release_head_state + 14.00 - ffffffff802217d5 : read_hpet + 12.00 - ffffffff804ffb7f : __ip_local_out + 11.97 - ffffffff804fc0c8 : ip_local_deliver_finish + 8.54 - ffffffff805001a3 : ip_queue_xmit + */ + +/* + * perfstat: /usr/bin/time -alike performance counter statistics utility + + It summarizes the counter events of all tasks (and child tasks), + covering all CPUs that the command (or workload) executes on. + It only counts the per-task events of the workload started, + independent of how many other tasks run on those CPUs. + + Sample output: + + $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null + + Performance counter stats for 'ls': + + 163516953 instructions + 2295 cache-misses + 2855182 branch-misses + */ + + /* + * Copyright (C) 2008, Red Hat Inc, Ingo Molnar + * + * Improvements and fixes by: + * + * Arjan van de Ven + * Yanmin Zhang + * Wu Fengguang + * Mike Galbraith + * Paul Mackerras + * + * Released under the GPL v2. (and only v2, not any later version) + */ + +#include "util.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "../../include/linux/perf_counter.h" + + +/* + * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all + * counters in the current task. + */ +#define PR_TASK_PERF_COUNTERS_DISABLE 31 +#define PR_TASK_PERF_COUNTERS_ENABLE 32 + +#define rdclock() \ +({ \ + struct timespec ts; \ + \ + clock_gettime(CLOCK_MONOTONIC, &ts); \ + ts.tv_sec * 1000000000ULL + ts.tv_nsec; \ +}) + +/* + * Pick up some kernel type conventions: + */ +#define __user +#define asmlinkage + +#ifdef __x86_64__ +#define __NR_perf_counter_open 295 +#define rmb() asm volatile("lfence" ::: "memory") +#define cpu_relax() asm volatile("rep; nop" ::: "memory"); +#endif + +#ifdef __i386__ +#define __NR_perf_counter_open 333 +#define rmb() asm volatile("lfence" ::: "memory") +#define cpu_relax() asm volatile("rep; nop" ::: "memory"); +#endif + +#ifdef __powerpc__ +#define __NR_perf_counter_open 319 +#define rmb() asm volatile ("sync" ::: "memory") +#define cpu_relax() asm volatile ("" ::: "memory"); +#endif + +#define unlikely(x) __builtin_expect(!!(x), 0) +#define min(x, y) ({ \ + typeof(x) _min1 = (x); \ + typeof(y) _min2 = (y); \ + (void) (&_min1 == &_min2); \ + _min1 < _min2 ? _min1 : _min2; }) + +extern asmlinkage int sys_perf_counter_open( + struct perf_counter_hw_event *hw_event_uptr __user, + pid_t pid, + int cpu, + int group_fd, + unsigned long flags); + +#define MAX_COUNTERS 64 +#define MAX_NR_CPUS 256 + +#define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id)) + +static int system_wide = 0; + +static int nr_counters = 0; +static __u64 event_id[MAX_COUNTERS] = { + EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), + EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), + EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), + EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), + + EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), + EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), + EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), + EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), +}; +static int default_interval = 100000; +static int event_count[MAX_COUNTERS]; +static int fd[MAX_NR_CPUS][MAX_COUNTERS]; + +static int tid = -1; +static int profile_cpu = -1; +static int nr_cpus = 0; +static int nmi = 1; +static int group = 0; +static unsigned int page_size; + +static int zero; + +static int scale; + +static const unsigned int default_count[] = { + 1000000, + 1000000, + 10000, + 10000, + 1000000, + 10000, +}; + +static char *hw_event_names[] = { + "CPU cycles", + "instructions", + "cache references", + "cache misses", + "branches", + "branch misses", + "bus cycles", +}; + +static char *sw_event_names[] = { + "cpu clock ticks", + "task clock ticks", + "pagefaults", + "context switches", + "CPU migrations", + "minor faults", + "major faults", +}; + +struct event_symbol { + __u64 event; + char *symbol; +}; + +static struct event_symbol event_symbols[] = { + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cpu-cycles", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cycles", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), "instructions", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), "cache-references", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), "cache-misses", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branch-instructions", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branches", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES), "branch-misses", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES), "bus-cycles", }, + + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK), "cpu-clock", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), "task-clock", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "page-faults", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "faults", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN), "minor-faults", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ), "major-faults", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "context-switches", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "cs", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "cpu-migrations", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "migrations", }, +}; + +#define __PERF_COUNTER_FIELD(config, name) \ + ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT) + +#define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW) +#define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG) +#define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE) +#define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT) + +static void display_events_help(void) +{ + unsigned int i; + __u64 e; + + printf( + " -e EVENT --event=EVENT # symbolic-name abbreviations"); + + for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { + int type, id; + + e = event_symbols[i].event; + type = PERF_COUNTER_TYPE(e); + id = PERF_COUNTER_ID(e); + + printf("\n %d:%d: %-20s", + type, id, event_symbols[i].symbol); + } + + printf("\n" + " rNNN: raw PMU events (eventsel+umask)\n\n"); +} + +static void display_help(void) +{ + printf( + "Usage: perfstat [] \n\n" + "PerfStat Options (up to %d event types can be specified):\n\n", + MAX_COUNTERS); + + display_events_help(); + + printf( + " -l # scale counter values\n" + " -a # system-wide collection\n"); + exit(0); +} + +static char *event_name(int ctr) +{ + __u64 config = event_id[ctr]; + int type = PERF_COUNTER_TYPE(config); + int id = PERF_COUNTER_ID(config); + static char buf[32]; + + if (PERF_COUNTER_RAW(config)) { + sprintf(buf, "raw 0x%llx", PERF_COUNTER_CONFIG(config)); + return buf; + } + + switch (type) { + case PERF_TYPE_HARDWARE: + if (id < PERF_HW_EVENTS_MAX) + return hw_event_names[id]; + return "unknown-hardware"; + + case PERF_TYPE_SOFTWARE: + if (id < PERF_SW_EVENTS_MAX) + return sw_event_names[id]; + return "unknown-software"; + + default: + break; + } + + return "unknown"; +} + +/* + * Each event can have multiple symbolic names. + * Symbolic names are (almost) exactly matched. + */ +static __u64 match_event_symbols(char *str) +{ + __u64 config, id; + int type; + unsigned int i; + + if (sscanf(str, "r%llx", &config) == 1) + return config | PERF_COUNTER_RAW_MASK; + + if (sscanf(str, "%d:%llu", &type, &id) == 2) + return EID(type, id); + + for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { + if (!strncmp(str, event_symbols[i].symbol, + strlen(event_symbols[i].symbol))) + return event_symbols[i].event; + } + + return ~0ULL; +} + +static int parse_events(char *str) +{ + __u64 config; + +again: + if (nr_counters == MAX_COUNTERS) + return -1; + + config = match_event_symbols(str); + if (config == ~0ULL) + return -1; + + event_id[nr_counters] = config; + nr_counters++; + + str = strstr(str, ","); + if (str) { + str++; + goto again; + } + + return 0; +} + + +/* + * perfstat + */ + +char fault_here[1000000]; + +static void create_perfstat_counter(int counter) +{ + struct perf_counter_hw_event hw_event; + + memset(&hw_event, 0, sizeof(hw_event)); + hw_event.config = event_id[counter]; + hw_event.record_type = 0; + hw_event.nmi = 0; + if (scale) + hw_event.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING; + + if (system_wide) { + int cpu; + for (cpu = 0; cpu < nr_cpus; cpu ++) { + fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0); + if (fd[cpu][counter] < 0) { + printf("perfstat error: syscall returned with %d (%s)\n", + fd[cpu][counter], strerror(errno)); + exit(-1); + } + } + } else { + hw_event.inherit = 1; + hw_event.disabled = 1; + + fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0); + if (fd[0][counter] < 0) { + printf("perfstat error: syscall returned with %d (%s)\n", + fd[0][counter], strerror(errno)); + exit(-1); + } + } +} + +int do_perfstat(int argc, char *argv[]) +{ + unsigned long long t0, t1; + int counter; + ssize_t res; + int status; + int pid; + + if (!system_wide) + nr_cpus = 1; + + for (counter = 0; counter < nr_counters; counter++) + create_perfstat_counter(counter); + + argc -= optind; + argv += optind; + + if (!argc) + display_help(); + + /* + * Enable counters and exec the command: + */ + t0 = rdclock(); + prctl(PR_TASK_PERF_COUNTERS_ENABLE); + + if ((pid = fork()) < 0) + perror("failed to fork"); + if (!pid) { + if (execvp(argv[0], argv)) { + perror(argv[0]); + exit(-1); + } + } + while (wait(&status) >= 0) + ; + prctl(PR_TASK_PERF_COUNTERS_DISABLE); + t1 = rdclock(); + + fflush(stdout); + + fprintf(stderr, "\n"); + fprintf(stderr, " Performance counter stats for \'%s\':\n", + argv[0]); + fprintf(stderr, "\n"); + + for (counter = 0; counter < nr_counters; counter++) { + int cpu, nv; + __u64 count[3], single_count[3]; + int scaled; + + count[0] = count[1] = count[2] = 0; + nv = scale ? 3 : 1; + for (cpu = 0; cpu < nr_cpus; cpu ++) { + res = read(fd[cpu][counter], + single_count, nv * sizeof(__u64)); + assert(res == nv * sizeof(__u64)); + + count[0] += single_count[0]; + if (scale) { + count[1] += single_count[1]; + count[2] += single_count[2]; + } + } + + scaled = 0; + if (scale) { + if (count[2] == 0) { + fprintf(stderr, " %14s %-20s\n", + "", event_name(counter)); + continue; + } + if (count[2] < count[1]) { + scaled = 1; + count[0] = (unsigned long long) + ((double)count[0] * count[1] / count[2] + 0.5); + } + } + + if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK) || + event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) { + + double msecs = (double)count[0] / 1000000; + + fprintf(stderr, " %14.6f %-20s (msecs)", + msecs, event_name(counter)); + } else { + fprintf(stderr, " %14Ld %-20s (events)", + count[0], event_name(counter)); + } + if (scaled) + fprintf(stderr, " (scaled from %.2f%%)", + (double) count[2] / count[1] * 100); + fprintf(stderr, "\n"); + } + fprintf(stderr, "\n"); + fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n", + (double)(t1-t0)/1e6); + fprintf(stderr, "\n"); + + return 0; +} + +static void process_options(int argc, char **argv) +{ + int error = 0, counter; + + for (;;) { + int option_index = 0; + /** Options for getopt */ + static struct option long_options[] = { + {"count", required_argument, NULL, 'c'}, + {"cpu", required_argument, NULL, 'C'}, + {"delay", required_argument, NULL, 'd'}, + {"dump_symtab", no_argument, NULL, 'D'}, + {"event", required_argument, NULL, 'e'}, + {"filter", required_argument, NULL, 'f'}, + {"group", required_argument, NULL, 'g'}, + {"help", no_argument, NULL, 'h'}, + {"nmi", required_argument, NULL, 'n'}, + {"munmap_info", no_argument, NULL, 'U'}, + {"pid", required_argument, NULL, 'p'}, + {"realtime", required_argument, NULL, 'r'}, + {"scale", no_argument, NULL, 'l'}, + {"symbol", required_argument, NULL, 's'}, + {"stat", no_argument, NULL, 'S'}, + {"vmlinux", required_argument, NULL, 'x'}, + {"zero", no_argument, NULL, 'z'}, + {NULL, 0, NULL, 0 } + }; + int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMU", + long_options, &option_index); + if (c == -1) + break; + + switch (c) { + case 'a': system_wide = 1; break; + case 'c': default_interval = atoi(optarg); break; + case 'C': + /* CPU and PID are mutually exclusive */ + if (tid != -1) { + printf("WARNING: CPU switch overriding PID\n"); + sleep(1); + tid = -1; + } + profile_cpu = atoi(optarg); break; + + case 'e': error = parse_events(optarg); break; + + case 'g': group = atoi(optarg); break; + case 'h': display_help(); break; + case 'l': scale = 1; break; + case 'n': nmi = atoi(optarg); break; + case 'p': + /* CPU and PID are mutually exclusive */ + if (profile_cpu != -1) { + printf("WARNING: PID switch overriding CPU\n"); + sleep(1); + profile_cpu = -1; + } + tid = atoi(optarg); break; + case 'z': zero = 1; break; + default: error = 1; break; + } + } + if (error) + display_help(); + + if (!nr_counters) { + nr_counters = 8; + } + + for (counter = 0; counter < nr_counters; counter++) { + if (event_count[counter]) + continue; + + event_count[counter] = default_interval; + } +} + +int cmd_stat(int argc, char **argv, const char *prefix) +{ + page_size = sysconf(_SC_PAGE_SIZE); + + process_options(argc, argv); + + nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); + assert(nr_cpus <= MAX_NR_CPUS); + assert(nr_cpus >= 0); + + return do_perfstat(argc, argv); +} diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c index 601bddb..98e8690 100644 --- a/Documentation/perf_counter/builtin-top.c +++ b/Documentation/perf_counter/builtin-top.c @@ -28,25 +28,6 @@ 8.54 - ffffffff805001a3 : ip_queue_xmit */ -/* - * perfstat: /usr/bin/time -alike performance counter statistics utility - - It summarizes the counter events of all tasks (and child tasks), - covering all CPUs that the command (or workload) executes on. - It only counts the per-task events of the workload started, - independent of how many other tasks run on those CPUs. - - Sample output: - - $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null - - Performance counter stats for 'ls': - - 163516953 instructions - 2295 cache-misses - 2855182 branch-misses - */ - /* * Copyright (C) 2008, Red Hat Inc, Ingo Molnar * @@ -149,7 +130,6 @@ asmlinkage int sys_perf_counter_open( #define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id)) -static int run_perfstat = 0; static int system_wide = 0; static int nr_counters = 0; @@ -203,7 +183,7 @@ struct source_line { static struct source_line *lines; static struct source_line **lines_tail; -const unsigned int default_count[] = { +static const unsigned int default_count[] = { 1000000, 1000000, 10000, @@ -291,26 +271,8 @@ static void display_events_help(void) " rNNN: raw PMU events (eventsel+umask)\n\n"); } -static void display_perfstat_help(void) -{ - printf( - "Usage: perfstat [] \n\n" - "PerfStat Options (up to %d event types can be specified):\n\n", - MAX_COUNTERS); - - display_events_help(); - - printf( - " -l # scale counter values\n" - " -a # system-wide collection\n"); - exit(0); -} - static void display_help(void) { - if (run_perfstat) - return display_perfstat_help(); - printf( "Usage: kerneltop []\n" " Or: kerneltop -S [] COMMAND [ARGS]\n\n" @@ -320,8 +282,6 @@ static void display_help(void) display_events_help(); printf( - " -S --stat # perfstat COMMAND\n" - " -a # system-wide collection (for perfstat)\n\n" " -c CNT --count=CNT # event period to sample\n\n" " -C CPU --cpu=CPU # CPU (-1 for all) [default: -1]\n" " -p PID --pid=PID # PID of sampled task (-1 for all) [default: -1]\n\n" @@ -420,151 +380,6 @@ again: return 0; } - -/* - * perfstat - */ - -char fault_here[1000000]; - -static void create_perfstat_counter(int counter) -{ - struct perf_counter_hw_event hw_event; - - memset(&hw_event, 0, sizeof(hw_event)); - hw_event.config = event_id[counter]; - hw_event.record_type = 0; - hw_event.nmi = 0; - if (scale) - hw_event.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | - PERF_FORMAT_TOTAL_TIME_RUNNING; - - if (system_wide) { - int cpu; - for (cpu = 0; cpu < nr_cpus; cpu ++) { - fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0); - if (fd[cpu][counter] < 0) { - printf("perfstat error: syscall returned with %d (%s)\n", - fd[cpu][counter], strerror(errno)); - exit(-1); - } - } - } else { - hw_event.inherit = 1; - hw_event.disabled = 1; - - fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0); - if (fd[0][counter] < 0) { - printf("perfstat error: syscall returned with %d (%s)\n", - fd[0][counter], strerror(errno)); - exit(-1); - } - } -} - -int do_perfstat(int argc, char *argv[]) -{ - unsigned long long t0, t1; - int counter; - ssize_t res; - int status; - int pid; - - if (!system_wide) - nr_cpus = 1; - - for (counter = 0; counter < nr_counters; counter++) - create_perfstat_counter(counter); - - argc -= optind; - argv += optind; - - if (!argc) - display_help(); - - /* - * Enable counters and exec the command: - */ - t0 = rdclock(); - prctl(PR_TASK_PERF_COUNTERS_ENABLE); - - if ((pid = fork()) < 0) - perror("failed to fork"); - if (!pid) { - if (execvp(argv[0], argv)) { - perror(argv[0]); - exit(-1); - } - } - while (wait(&status) >= 0) - ; - prctl(PR_TASK_PERF_COUNTERS_DISABLE); - t1 = rdclock(); - - fflush(stdout); - - fprintf(stderr, "\n"); - fprintf(stderr, " Performance counter stats for \'%s\':\n", - argv[0]); - fprintf(stderr, "\n"); - - for (counter = 0; counter < nr_counters; counter++) { - int cpu, nv; - __u64 count[3], single_count[3]; - int scaled; - - count[0] = count[1] = count[2] = 0; - nv = scale ? 3 : 1; - for (cpu = 0; cpu < nr_cpus; cpu ++) { - res = read(fd[cpu][counter], - single_count, nv * sizeof(__u64)); - assert(res == nv * sizeof(__u64)); - - count[0] += single_count[0]; - if (scale) { - count[1] += single_count[1]; - count[2] += single_count[2]; - } - } - - scaled = 0; - if (scale) { - if (count[2] == 0) { - fprintf(stderr, " %14s %-20s\n", - "", event_name(counter)); - continue; - } - if (count[2] < count[1]) { - scaled = 1; - count[0] = (unsigned long long) - ((double)count[0] * count[1] / count[2] + 0.5); - } - } - - if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK) || - event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) { - - double msecs = (double)count[0] / 1000000; - - fprintf(stderr, " %14.6f %-20s (msecs)", - msecs, event_name(counter)); - } else { - fprintf(stderr, " %14Ld %-20s (events)", - count[0], event_name(counter)); - } - if (scaled) - fprintf(stderr, " (scaled from %.2f%%)", - (double) count[2] / count[1] * 100); - fprintf(stderr, "\n"); - } - fprintf(stderr, "\n"); - fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n", - (double)(t1-t0)/1e6); - fprintf(stderr, "\n"); - - return 0; -} - /* * Symbols */ @@ -805,7 +620,7 @@ static int read_symbol(FILE *in, struct sym_entry *s) return 0; } -int compare_addr(const void *__sym1, const void *__sym2) +static int compare_addr(const void *__sym1, const void *__sym2) { const struct sym_entry *sym1 = __sym1, *sym2 = __sym2; @@ -1070,9 +885,6 @@ static void process_options(int argc, char **argv) { int error = 0, counter; - if (strstr(argv[0], "perfstat")) - run_perfstat = 1; - for (;;) { int option_index = 0; /** Options for getopt */ @@ -1134,7 +946,6 @@ static void process_options(int argc, char **argv) tid = atoi(optarg); break; case 'r': realtime_prio = atoi(optarg); break; case 's': sym_filter = strdup(optarg); break; - case 'S': run_perfstat = 1; break; case 'x': vmlinux = strdup(optarg); break; case 'z': zero = 1; break; case 'm': mmap_pages = atoi(optarg); break; @@ -1147,12 +958,8 @@ static void process_options(int argc, char **argv) display_help(); if (!nr_counters) { - if (run_perfstat) - nr_counters = 8; - else { - nr_counters = 1; - event_id[0] = 0; - } + nr_counters = 1; + event_id[0] = 0; } for (counter = 0; counter < nr_counters; counter++) { @@ -1308,9 +1115,6 @@ int cmd_top(int argc, char **argv, const char *prefix) assert(nr_cpus <= MAX_NR_CPUS); assert(nr_cpus >= 0); - if (run_perfstat) - return do_perfstat(argc, argv); - if (tid != -1 || profile_cpu != -1) nr_cpus = 1; diff --git a/Documentation/perf_counter/builtin.h b/Documentation/perf_counter/builtin.h index 4163744..a3bb6cd 100644 --- a/Documentation/perf_counter/builtin.h +++ b/Documentation/perf_counter/builtin.h @@ -14,5 +14,6 @@ extern void prune_packed_objects(int); extern int read_line_with_nul(char *buf, int size, FILE *file); extern int check_pager_config(const char *cmd); -extern int cmd_top(int argc, const char **argv, const char *prefix); +extern int cmd_top(int argc, char **argv, const char *prefix); +extern int cmd_stat(int argc, char **argv, const char *prefix); #endif diff --git a/Documentation/perf_counter/command-list.txt b/Documentation/perf_counter/command-list.txt index 1eab365..52455d4 100644 --- a/Documentation/perf_counter/command-list.txt +++ b/Documentation/perf_counter/command-list.txt @@ -1,4 +1,5 @@ # List of known perf commands. # command name category [deprecated] [common] perf-top mainporcelain common +perf-stat mainporcelain common diff --git a/Documentation/perf_counter/kerneltop.c b/Documentation/perf_counter/kerneltop.c deleted file mode 100644 index 042c1b8..0000000 --- a/Documentation/perf_counter/kerneltop.c +++ /dev/null @@ -1,1409 +0,0 @@ -/* - * kerneltop.c: show top kernel functions - performance counters showcase - - Build with: - - cc -O6 -Wall -c -o kerneltop.o kerneltop.c -lrt - - Sample output: - ------------------------------------------------------------------------------- - KernelTop: 2669 irqs/sec [NMI, cache-misses/cache-refs], (all, cpu: 2) ------------------------------------------------------------------------------- - - weight RIP kernel function - ______ ________________ _______________ - - 35.20 - ffffffff804ce74b : skb_copy_and_csum_dev - 33.00 - ffffffff804cb740 : sock_alloc_send_skb - 31.26 - ffffffff804ce808 : skb_push - 22.43 - ffffffff80510004 : tcp_established_options - 19.00 - ffffffff8027d250 : find_get_page - 15.76 - ffffffff804e4fc9 : eth_type_trans - 15.20 - ffffffff804d8baa : dst_release - 14.86 - ffffffff804cf5d8 : skb_release_head_state - 14.00 - ffffffff802217d5 : read_hpet - 12.00 - ffffffff804ffb7f : __ip_local_out - 11.97 - ffffffff804fc0c8 : ip_local_deliver_finish - 8.54 - ffffffff805001a3 : ip_queue_xmit - */ - -/* - * perfstat: /usr/bin/time -alike performance counter statistics utility - - It summarizes the counter events of all tasks (and child tasks), - covering all CPUs that the command (or workload) executes on. - It only counts the per-task events of the workload started, - independent of how many other tasks run on those CPUs. - - Sample output: - - $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null - - Performance counter stats for 'ls': - - 163516953 instructions - 2295 cache-misses - 2855182 branch-misses - */ - - /* - * Copyright (C) 2008, Red Hat Inc, Ingo Molnar - * - * Improvements and fixes by: - * - * Arjan van de Ven - * Yanmin Zhang - * Wu Fengguang - * Mike Galbraith - * Paul Mackerras - * - * Released under the GPL v2. (and only v2, not any later version) - */ - -#define _GNU_SOURCE -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "../../include/linux/perf_counter.h" - - -/* - * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all - * counters in the current task. - */ -#define PR_TASK_PERF_COUNTERS_DISABLE 31 -#define PR_TASK_PERF_COUNTERS_ENABLE 32 - -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) - -#define rdclock() \ -({ \ - struct timespec ts; \ - \ - clock_gettime(CLOCK_MONOTONIC, &ts); \ - ts.tv_sec * 1000000000ULL + ts.tv_nsec; \ -}) - -/* - * Pick up some kernel type conventions: - */ -#define __user -#define asmlinkage - -#ifdef __x86_64__ -#define __NR_perf_counter_open 295 -#define rmb() asm volatile("lfence" ::: "memory") -#define cpu_relax() asm volatile("rep; nop" ::: "memory"); -#endif - -#ifdef __i386__ -#define __NR_perf_counter_open 333 -#define rmb() asm volatile("lfence" ::: "memory") -#define cpu_relax() asm volatile("rep; nop" ::: "memory"); -#endif - -#ifdef __powerpc__ -#define __NR_perf_counter_open 319 -#define rmb() asm volatile ("sync" ::: "memory") -#define cpu_relax() asm volatile ("" ::: "memory"); -#endif - -#define unlikely(x) __builtin_expect(!!(x), 0) -#define min(x, y) ({ \ - typeof(x) _min1 = (x); \ - typeof(y) _min2 = (y); \ - (void) (&_min1 == &_min2); \ - _min1 < _min2 ? _min1 : _min2; }) - -asmlinkage int sys_perf_counter_open( - struct perf_counter_hw_event *hw_event_uptr __user, - pid_t pid, - int cpu, - int group_fd, - unsigned long flags) -{ - return syscall( - __NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags); -} - -#define MAX_COUNTERS 64 -#define MAX_NR_CPUS 256 - -#define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id)) - -static int run_perfstat = 0; -static int system_wide = 0; - -static int nr_counters = 0; -static __u64 event_id[MAX_COUNTERS] = { - EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), - EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), - EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), - EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), - - EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), - EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), - EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), - EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), -}; -static int default_interval = 100000; -static int event_count[MAX_COUNTERS]; -static int fd[MAX_NR_CPUS][MAX_COUNTERS]; - -static __u64 count_filter = 100; - -static int tid = -1; -static int profile_cpu = -1; -static int nr_cpus = 0; -static int nmi = 1; -static unsigned int realtime_prio = 0; -static int group = 0; -static unsigned int page_size; -static unsigned int mmap_pages = 16; -static int use_mmap = 0; -static int use_munmap = 0; - -static char *vmlinux; - -static char *sym_filter; -static unsigned long filter_start; -static unsigned long filter_end; - -static int delay_secs = 2; -static int zero; -static int dump_symtab; - -static int scale; - -struct source_line { - uint64_t EIP; - unsigned long count; - char *line; - struct source_line *next; -}; - -static struct source_line *lines; -static struct source_line **lines_tail; - -const unsigned int default_count[] = { - 1000000, - 1000000, - 10000, - 10000, - 1000000, - 10000, -}; - -static char *hw_event_names[] = { - "CPU cycles", - "instructions", - "cache references", - "cache misses", - "branches", - "branch misses", - "bus cycles", -}; - -static char *sw_event_names[] = { - "cpu clock ticks", - "task clock ticks", - "pagefaults", - "context switches", - "CPU migrations", - "minor faults", - "major faults", -}; - -struct event_symbol { - __u64 event; - char *symbol; -}; - -static struct event_symbol event_symbols[] = { - {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cpu-cycles", }, - {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cycles", }, - {EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), "instructions", }, - {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), "cache-references", }, - {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), "cache-misses", }, - {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branch-instructions", }, - {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branches", }, - {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES), "branch-misses", }, - {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES), "bus-cycles", }, - - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK), "cpu-clock", }, - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), "task-clock", }, - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "page-faults", }, - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "faults", }, - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN), "minor-faults", }, - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ), "major-faults", }, - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "context-switches", }, - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "cs", }, - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "cpu-migrations", }, - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "migrations", }, -}; - -#define __PERF_COUNTER_FIELD(config, name) \ - ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT) - -#define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW) -#define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG) -#define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE) -#define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT) - -static void display_events_help(void) -{ - unsigned int i; - __u64 e; - - printf( - " -e EVENT --event=EVENT # symbolic-name abbreviations"); - - for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { - int type, id; - - e = event_symbols[i].event; - type = PERF_COUNTER_TYPE(e); - id = PERF_COUNTER_ID(e); - - printf("\n %d:%d: %-20s", - type, id, event_symbols[i].symbol); - } - - printf("\n" - " rNNN: raw PMU events (eventsel+umask)\n\n"); -} - -static void display_perfstat_help(void) -{ - printf( - "Usage: perfstat [] \n\n" - "PerfStat Options (up to %d event types can be specified):\n\n", - MAX_COUNTERS); - - display_events_help(); - - printf( - " -l # scale counter values\n" - " -a # system-wide collection\n"); - exit(0); -} - -static void display_help(void) -{ - if (run_perfstat) - return display_perfstat_help(); - - printf( - "Usage: kerneltop []\n" - " Or: kerneltop -S [] COMMAND [ARGS]\n\n" - "KernelTop Options (up to %d event types can be specified at once):\n\n", - MAX_COUNTERS); - - display_events_help(); - - printf( - " -S --stat # perfstat COMMAND\n" - " -a # system-wide collection (for perfstat)\n\n" - " -c CNT --count=CNT # event period to sample\n\n" - " -C CPU --cpu=CPU # CPU (-1 for all) [default: -1]\n" - " -p PID --pid=PID # PID of sampled task (-1 for all) [default: -1]\n\n" - " -l # show scale factor for RR events\n" - " -d delay --delay= # sampling/display delay [default: 2]\n" - " -f CNT --filter=CNT # min-event-count filter [default: 100]\n\n" - " -r prio --realtime= # event acquisition runs with SCHED_FIFO policy\n" - " -s symbol --symbol= # function to be showed annotated one-shot\n" - " -x path --vmlinux= # the vmlinux binary, required for -s use\n" - " -z --zero # zero counts after display\n" - " -D --dump_symtab # dump symbol table to stderr on startup\n" - " -m pages --mmap_pages= # number of mmap data pages\n" - " -M --mmap_info # print mmap info stream\n" - " -U --munmap_info # print munmap info stream\n" - ); - - exit(0); -} - -static char *event_name(int ctr) -{ - __u64 config = event_id[ctr]; - int type = PERF_COUNTER_TYPE(config); - int id = PERF_COUNTER_ID(config); - static char buf[32]; - - if (PERF_COUNTER_RAW(config)) { - sprintf(buf, "raw 0x%llx", PERF_COUNTER_CONFIG(config)); - return buf; - } - - switch (type) { - case PERF_TYPE_HARDWARE: - if (id < PERF_HW_EVENTS_MAX) - return hw_event_names[id]; - return "unknown-hardware"; - - case PERF_TYPE_SOFTWARE: - if (id < PERF_SW_EVENTS_MAX) - return sw_event_names[id]; - return "unknown-software"; - - default: - break; - } - - return "unknown"; -} - -/* - * Each event can have multiple symbolic names. - * Symbolic names are (almost) exactly matched. - */ -static __u64 match_event_symbols(char *str) -{ - __u64 config, id; - int type; - unsigned int i; - - if (sscanf(str, "r%llx", &config) == 1) - return config | PERF_COUNTER_RAW_MASK; - - if (sscanf(str, "%d:%llu", &type, &id) == 2) - return EID(type, id); - - for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { - if (!strncmp(str, event_symbols[i].symbol, - strlen(event_symbols[i].symbol))) - return event_symbols[i].event; - } - - return ~0ULL; -} - -static int parse_events(char *str) -{ - __u64 config; - -again: - if (nr_counters == MAX_COUNTERS) - return -1; - - config = match_event_symbols(str); - if (config == ~0ULL) - return -1; - - event_id[nr_counters] = config; - nr_counters++; - - str = strstr(str, ","); - if (str) { - str++; - goto again; - } - - return 0; -} - - -/* - * perfstat - */ - -char fault_here[1000000]; - -static void create_perfstat_counter(int counter) -{ - struct perf_counter_hw_event hw_event; - - memset(&hw_event, 0, sizeof(hw_event)); - hw_event.config = event_id[counter]; - hw_event.record_type = 0; - hw_event.nmi = 0; - if (scale) - hw_event.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | - PERF_FORMAT_TOTAL_TIME_RUNNING; - - if (system_wide) { - int cpu; - for (cpu = 0; cpu < nr_cpus; cpu ++) { - fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0); - if (fd[cpu][counter] < 0) { - printf("perfstat error: syscall returned with %d (%s)\n", - fd[cpu][counter], strerror(errno)); - exit(-1); - } - } - } else { - hw_event.inherit = 1; - hw_event.disabled = 1; - - fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0); - if (fd[0][counter] < 0) { - printf("perfstat error: syscall returned with %d (%s)\n", - fd[0][counter], strerror(errno)); - exit(-1); - } - } -} - -int do_perfstat(int argc, char *argv[]) -{ - unsigned long long t0, t1; - int counter; - ssize_t res; - int status; - int pid; - - if (!system_wide) - nr_cpus = 1; - - for (counter = 0; counter < nr_counters; counter++) - create_perfstat_counter(counter); - - argc -= optind; - argv += optind; - - if (!argc) - display_help(); - - /* - * Enable counters and exec the command: - */ - t0 = rdclock(); - prctl(PR_TASK_PERF_COUNTERS_ENABLE); - - if ((pid = fork()) < 0) - perror("failed to fork"); - if (!pid) { - if (execvp(argv[0], argv)) { - perror(argv[0]); - exit(-1); - } - } - while (wait(&status) >= 0) - ; - prctl(PR_TASK_PERF_COUNTERS_DISABLE); - t1 = rdclock(); - - fflush(stdout); - - fprintf(stderr, "\n"); - fprintf(stderr, " Performance counter stats for \'%s\':\n", - argv[0]); - fprintf(stderr, "\n"); - - for (counter = 0; counter < nr_counters; counter++) { - int cpu, nv; - __u64 count[3], single_count[3]; - int scaled; - - count[0] = count[1] = count[2] = 0; - nv = scale ? 3 : 1; - for (cpu = 0; cpu < nr_cpus; cpu ++) { - res = read(fd[cpu][counter], - single_count, nv * sizeof(__u64)); - assert(res == nv * sizeof(__u64)); - - count[0] += single_count[0]; - if (scale) { - count[1] += single_count[1]; - count[2] += single_count[2]; - } - } - - scaled = 0; - if (scale) { - if (count[2] == 0) { - fprintf(stderr, " %14s %-20s\n", - "", event_name(counter)); - continue; - } - if (count[2] < count[1]) { - scaled = 1; - count[0] = (unsigned long long) - ((double)count[0] * count[1] / count[2] + 0.5); - } - } - - if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK) || - event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) { - - double msecs = (double)count[0] / 1000000; - - fprintf(stderr, " %14.6f %-20s (msecs)", - msecs, event_name(counter)); - } else { - fprintf(stderr, " %14Ld %-20s (events)", - count[0], event_name(counter)); - } - if (scaled) - fprintf(stderr, " (scaled from %.2f%%)", - (double) count[2] / count[1] * 100); - fprintf(stderr, "\n"); - } - fprintf(stderr, "\n"); - fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n", - (double)(t1-t0)/1e6); - fprintf(stderr, "\n"); - - return 0; -} - -/* - * Symbols - */ - -static uint64_t min_ip; -static uint64_t max_ip = -1ll; - -struct sym_entry { - unsigned long long addr; - char *sym; - unsigned long count[MAX_COUNTERS]; - int skip; - struct source_line *source; -}; - -#define MAX_SYMS 100000 - -static int sym_table_count; - -struct sym_entry *sym_filter_entry; - -static struct sym_entry sym_table[MAX_SYMS]; - -static void show_details(struct sym_entry *sym); - -/* - * Ordering weight: count-1 * count-2 * ... / count-n - */ -static double sym_weight(const struct sym_entry *sym) -{ - double weight; - int counter; - - weight = sym->count[0]; - - for (counter = 1; counter < nr_counters-1; counter++) - weight *= sym->count[counter]; - - weight /= (sym->count[counter] + 1); - - return weight; -} - -static int compare(const void *__sym1, const void *__sym2) -{ - const struct sym_entry *sym1 = __sym1, *sym2 = __sym2; - - return sym_weight(sym1) < sym_weight(sym2); -} - -static long events; -static long userspace_events; -static const char CONSOLE_CLEAR[] = ""; - -static struct sym_entry tmp[MAX_SYMS]; - -static void print_sym_table(void) -{ - int i, printed; - int counter; - float events_per_sec = events/delay_secs; - float kevents_per_sec = (events-userspace_events)/delay_secs; - float sum_kevents = 0.0; - - events = userspace_events = 0; - memcpy(tmp, sym_table, sizeof(sym_table[0])*sym_table_count); - qsort(tmp, sym_table_count, sizeof(tmp[0]), compare); - - for (i = 0; i < sym_table_count && tmp[i].count[0]; i++) - sum_kevents += tmp[i].count[0]; - - write(1, CONSOLE_CLEAR, strlen(CONSOLE_CLEAR)); - - printf( -"------------------------------------------------------------------------------\n"); - printf( " KernelTop:%8.0f irqs/sec kernel:%4.1f%% [%s, ", - events_per_sec, - 100.0 - (100.0*((events_per_sec-kevents_per_sec)/events_per_sec)), - nmi ? "NMI" : "IRQ"); - - if (nr_counters == 1) - printf("%d ", event_count[0]); - - for (counter = 0; counter < nr_counters; counter++) { - if (counter) - printf("/"); - - printf("%s", event_name(counter)); - } - - printf( "], "); - - if (tid != -1) - printf(" (tid: %d", tid); - else - printf(" (all"); - - if (profile_cpu != -1) - printf(", cpu: %d)\n", profile_cpu); - else { - if (tid != -1) - printf(")\n"); - else - printf(", %d CPUs)\n", nr_cpus); - } - - printf("------------------------------------------------------------------------------\n\n"); - - if (nr_counters == 1) - printf(" events pcnt"); - else - printf(" weight events pcnt"); - - printf(" RIP kernel function\n" - " ______ ______ _____ ________________ _______________\n\n" - ); - - for (i = 0, printed = 0; i < sym_table_count; i++) { - float pcnt; - int count; - - if (printed <= 18 && tmp[i].count[0] >= count_filter) { - pcnt = 100.0 - (100.0*((sum_kevents-tmp[i].count[0])/sum_kevents)); - - if (nr_counters == 1) - printf("%19.2f - %4.1f%% - %016llx : %s\n", - sym_weight(tmp + i), - pcnt, tmp[i].addr, tmp[i].sym); - else - printf("%8.1f %10ld - %4.1f%% - %016llx : %s\n", - sym_weight(tmp + i), - tmp[i].count[0], - pcnt, tmp[i].addr, tmp[i].sym); - printed++; - } - /* - * Add decay to the counts: - */ - for (count = 0; count < nr_counters; count++) - sym_table[i].count[count] = zero ? 0 : sym_table[i].count[count] * 7 / 8; - } - - if (sym_filter_entry) - show_details(sym_filter_entry); - - { - struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; - - if (poll(&stdin_poll, 1, 0) == 1) { - printf("key pressed - exiting.\n"); - exit(0); - } - } -} - -static void *display_thread(void *arg) -{ - printf("KernelTop refresh period: %d seconds\n", delay_secs); - - while (!sleep(delay_secs)) - print_sym_table(); - - return NULL; -} - -static int read_symbol(FILE *in, struct sym_entry *s) -{ - static int filter_match = 0; - char *sym, stype; - char str[500]; - int rc, pos; - - rc = fscanf(in, "%llx %c %499s", &s->addr, &stype, str); - if (rc == EOF) - return -1; - - assert(rc == 3); - - /* skip until end of line: */ - pos = strlen(str); - do { - rc = fgetc(in); - if (rc == '\n' || rc == EOF || pos >= 499) - break; - str[pos] = rc; - pos++; - } while (1); - str[pos] = 0; - - sym = str; - - /* Filter out known duplicates and non-text symbols. */ - if (!strcmp(sym, "_text")) - return 1; - if (!min_ip && !strcmp(sym, "_stext")) - return 1; - if (!strcmp(sym, "_etext") || !strcmp(sym, "_sinittext")) - return 1; - if (stype != 'T' && stype != 't') - return 1; - if (!strncmp("init_module", sym, 11) || !strncmp("cleanup_module", sym, 14)) - return 1; - if (strstr(sym, "_text_start") || strstr(sym, "_text_end")) - return 1; - - s->sym = malloc(strlen(str)); - assert(s->sym); - - strcpy((char *)s->sym, str); - s->skip = 0; - - /* Tag events to be skipped. */ - if (!strcmp("default_idle", s->sym) || !strcmp("cpu_idle", s->sym)) - s->skip = 1; - else if (!strcmp("enter_idle", s->sym) || !strcmp("exit_idle", s->sym)) - s->skip = 1; - else if (!strcmp("mwait_idle", s->sym)) - s->skip = 1; - - if (filter_match == 1) { - filter_end = s->addr; - filter_match = -1; - if (filter_end - filter_start > 10000) { - printf("hm, too large filter symbol <%s> - skipping.\n", - sym_filter); - printf("symbol filter start: %016lx\n", filter_start); - printf(" end: %016lx\n", filter_end); - filter_end = filter_start = 0; - sym_filter = NULL; - sleep(1); - } - } - if (filter_match == 0 && sym_filter && !strcmp(s->sym, sym_filter)) { - filter_match = 1; - filter_start = s->addr; - } - - return 0; -} - -int compare_addr(const void *__sym1, const void *__sym2) -{ - const struct sym_entry *sym1 = __sym1, *sym2 = __sym2; - - return sym1->addr > sym2->addr; -} - -static void sort_symbol_table(void) -{ - int i, dups; - - do { - qsort(sym_table, sym_table_count, sizeof(sym_table[0]), compare_addr); - for (i = 0, dups = 0; i < sym_table_count; i++) { - if (sym_table[i].addr == sym_table[i+1].addr) { - sym_table[i+1].addr = -1ll; - dups++; - } - } - sym_table_count -= dups; - } while(dups); -} - -static void parse_symbols(void) -{ - struct sym_entry *last; - - FILE *kallsyms = fopen("/proc/kallsyms", "r"); - - if (!kallsyms) { - printf("Could not open /proc/kallsyms - no CONFIG_KALLSYMS_ALL=y?\n"); - exit(-1); - } - - while (!feof(kallsyms)) { - if (read_symbol(kallsyms, &sym_table[sym_table_count]) == 0) { - sym_table_count++; - assert(sym_table_count <= MAX_SYMS); - } - } - - sort_symbol_table(); - min_ip = sym_table[0].addr; - max_ip = sym_table[sym_table_count-1].addr; - last = sym_table + sym_table_count++; - - last->addr = -1ll; - last->sym = ""; - - if (filter_end) { - int count; - for (count=0; count < sym_table_count; count ++) { - if (!strcmp(sym_table[count].sym, sym_filter)) { - sym_filter_entry = &sym_table[count]; - break; - } - } - } - if (dump_symtab) { - int i; - - for (i = 0; i < sym_table_count; i++) - fprintf(stderr, "%llx %s\n", - sym_table[i].addr, sym_table[i].sym); - } -} - -/* - * Source lines - */ - -static void parse_vmlinux(char *filename) -{ - FILE *file; - char command[PATH_MAX*2]; - if (!filename) - return; - - sprintf(command, "objdump --start-address=0x%016lx --stop-address=0x%016lx -dS %s", filter_start, filter_end, filename); - - file = popen(command, "r"); - if (!file) - return; - - lines_tail = &lines; - while (!feof(file)) { - struct source_line *src; - size_t dummy = 0; - char *c; - - src = malloc(sizeof(struct source_line)); - assert(src != NULL); - memset(src, 0, sizeof(struct source_line)); - - if (getline(&src->line, &dummy, file) < 0) - break; - if (!src->line) - break; - - c = strchr(src->line, '\n'); - if (c) - *c = 0; - - src->next = NULL; - *lines_tail = src; - lines_tail = &src->next; - - if (strlen(src->line)>8 && src->line[8] == ':') - src->EIP = strtoull(src->line, NULL, 16); - if (strlen(src->line)>8 && src->line[16] == ':') - src->EIP = strtoull(src->line, NULL, 16); - } - pclose(file); -} - -static void record_precise_ip(uint64_t ip) -{ - struct source_line *line; - - for (line = lines; line; line = line->next) { - if (line->EIP == ip) - line->count++; - if (line->EIP > ip) - break; - } -} - -static void lookup_sym_in_vmlinux(struct sym_entry *sym) -{ - struct source_line *line; - char pattern[PATH_MAX]; - sprintf(pattern, "<%s>:", sym->sym); - - for (line = lines; line; line = line->next) { - if (strstr(line->line, pattern)) { - sym->source = line; - break; - } - } -} - -static void show_lines(struct source_line *line_queue, int line_queue_count) -{ - int i; - struct source_line *line; - - line = line_queue; - for (i = 0; i < line_queue_count; i++) { - printf("%8li\t%s\n", line->count, line->line); - line = line->next; - } -} - -#define TRACE_COUNT 3 - -static void show_details(struct sym_entry *sym) -{ - struct source_line *line; - struct source_line *line_queue = NULL; - int displayed = 0; - int line_queue_count = 0; - - if (!sym->source) - lookup_sym_in_vmlinux(sym); - if (!sym->source) - return; - - printf("Showing details for %s\n", sym->sym); - - line = sym->source; - while (line) { - if (displayed && strstr(line->line, ">:")) - break; - - if (!line_queue_count) - line_queue = line; - line_queue_count ++; - - if (line->count >= count_filter) { - show_lines(line_queue, line_queue_count); - line_queue_count = 0; - line_queue = NULL; - } else if (line_queue_count > TRACE_COUNT) { - line_queue = line_queue->next; - line_queue_count --; - } - - line->count = 0; - displayed++; - if (displayed > 300) - break; - line = line->next; - } -} - -/* - * Binary search in the histogram table and record the hit: - */ -static void record_ip(uint64_t ip, int counter) -{ - int left_idx, middle_idx, right_idx, idx; - unsigned long left, middle, right; - - record_precise_ip(ip); - - left_idx = 0; - right_idx = sym_table_count-1; - assert(ip <= max_ip && ip >= min_ip); - - while (left_idx + 1 < right_idx) { - middle_idx = (left_idx + right_idx) / 2; - - left = sym_table[ left_idx].addr; - middle = sym_table[middle_idx].addr; - right = sym_table[ right_idx].addr; - - if (!(left <= middle && middle <= right)) { - printf("%016lx...\n%016lx...\n%016lx\n", left, middle, right); - printf("%d %d %d\n", left_idx, middle_idx, right_idx); - } - assert(left <= middle && middle <= right); - if (!(left <= ip && ip <= right)) { - printf(" left: %016lx\n", left); - printf(" ip: %016lx\n", (unsigned long)ip); - printf("right: %016lx\n", right); - } - assert(left <= ip && ip <= right); - /* - * [ left .... target .... middle .... right ] - * => right := middle - */ - if (ip < middle) { - right_idx = middle_idx; - continue; - } - /* - * [ left .... middle ... target ... right ] - * => left := middle - */ - left_idx = middle_idx; - } - - idx = left_idx; - - if (!sym_table[idx].skip) - sym_table[idx].count[counter]++; - else events--; -} - -static void process_event(uint64_t ip, int counter) -{ - events++; - - if (ip < min_ip || ip > max_ip) { - userspace_events++; - return; - } - - record_ip(ip, counter); -} - -static void process_options(int argc, char *argv[]) -{ - int error = 0, counter; - - if (strstr(argv[0], "perfstat")) - run_perfstat = 1; - - for (;;) { - int option_index = 0; - /** Options for getopt */ - static struct option long_options[] = { - {"count", required_argument, NULL, 'c'}, - {"cpu", required_argument, NULL, 'C'}, - {"delay", required_argument, NULL, 'd'}, - {"dump_symtab", no_argument, NULL, 'D'}, - {"event", required_argument, NULL, 'e'}, - {"filter", required_argument, NULL, 'f'}, - {"group", required_argument, NULL, 'g'}, - {"help", no_argument, NULL, 'h'}, - {"nmi", required_argument, NULL, 'n'}, - {"mmap_info", no_argument, NULL, 'M'}, - {"mmap_pages", required_argument, NULL, 'm'}, - {"munmap_info", no_argument, NULL, 'U'}, - {"pid", required_argument, NULL, 'p'}, - {"realtime", required_argument, NULL, 'r'}, - {"scale", no_argument, NULL, 'l'}, - {"symbol", required_argument, NULL, 's'}, - {"stat", no_argument, NULL, 'S'}, - {"vmlinux", required_argument, NULL, 'x'}, - {"zero", no_argument, NULL, 'z'}, - {NULL, 0, NULL, 0 } - }; - int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMU", - long_options, &option_index); - if (c == -1) - break; - - switch (c) { - case 'a': system_wide = 1; break; - case 'c': default_interval = atoi(optarg); break; - case 'C': - /* CPU and PID are mutually exclusive */ - if (tid != -1) { - printf("WARNING: CPU switch overriding PID\n"); - sleep(1); - tid = -1; - } - profile_cpu = atoi(optarg); break; - case 'd': delay_secs = atoi(optarg); break; - case 'D': dump_symtab = 1; break; - - case 'e': error = parse_events(optarg); break; - - case 'f': count_filter = atoi(optarg); break; - case 'g': group = atoi(optarg); break; - case 'h': display_help(); break; - case 'l': scale = 1; break; - case 'n': nmi = atoi(optarg); break; - case 'p': - /* CPU and PID are mutually exclusive */ - if (profile_cpu != -1) { - printf("WARNING: PID switch overriding CPU\n"); - sleep(1); - profile_cpu = -1; - } - tid = atoi(optarg); break; - case 'r': realtime_prio = atoi(optarg); break; - case 's': sym_filter = strdup(optarg); break; - case 'S': run_perfstat = 1; break; - case 'x': vmlinux = strdup(optarg); break; - case 'z': zero = 1; break; - case 'm': mmap_pages = atoi(optarg); break; - case 'M': use_mmap = 1; break; - case 'U': use_munmap = 1; break; - default: error = 1; break; - } - } - if (error) - display_help(); - - if (!nr_counters) { - if (run_perfstat) - nr_counters = 8; - else { - nr_counters = 1; - event_id[0] = 0; - } - } - - for (counter = 0; counter < nr_counters; counter++) { - if (event_count[counter]) - continue; - - event_count[counter] = default_interval; - } -} - -struct mmap_data { - int counter; - void *base; - unsigned int mask; - unsigned int prev; -}; - -static unsigned int mmap_read_head(struct mmap_data *md) -{ - struct perf_counter_mmap_page *pc = md->base; - int head; - - head = pc->data_head; - rmb(); - - return head; -} - -struct timeval last_read, this_read; - -static void mmap_read(struct mmap_data *md) -{ - unsigned int head = mmap_read_head(md); - unsigned int old = md->prev; - unsigned char *data = md->base + page_size; - int diff; - - gettimeofday(&this_read, NULL); - - /* - * If we're further behind than half the buffer, there's a chance - * the writer will bite our tail and screw up the events under us. - * - * If we somehow ended up ahead of the head, we got messed up. - * - * In either case, truncate and restart at head. - */ - diff = head - old; - if (diff > md->mask / 2 || diff < 0) { - struct timeval iv; - unsigned long msecs; - - timersub(&this_read, &last_read, &iv); - msecs = iv.tv_sec*1000 + iv.tv_usec/1000; - - fprintf(stderr, "WARNING: failed to keep up with mmap data." - " Last read %lu msecs ago.\n", msecs); - - /* - * head points to a known good entry, start there. - */ - old = head; - } - - last_read = this_read; - - for (; old != head;) { - struct ip_event { - struct perf_event_header header; - __u64 ip; - __u32 pid, tid; - }; - struct mmap_event { - struct perf_event_header header; - __u32 pid, tid; - __u64 start; - __u64 len; - __u64 pgoff; - char filename[PATH_MAX]; - }; - - typedef union event_union { - struct perf_event_header header; - struct ip_event ip; - struct mmap_event mmap; - } event_t; - - event_t *event = (event_t *)&data[old & md->mask]; - - event_t event_copy; - - unsigned int size = event->header.size; - - /* - * Event straddles the mmap boundary -- header should always - * be inside due to u64 alignment of output. - */ - if ((old & md->mask) + size != ((old + size) & md->mask)) { - unsigned int offset = old; - unsigned int len = min(sizeof(*event), size), cpy; - void *dst = &event_copy; - - do { - cpy = min(md->mask + 1 - (offset & md->mask), len); - memcpy(dst, &data[offset & md->mask], cpy); - offset += cpy; - dst += cpy; - len -= cpy; - } while (len); - - event = &event_copy; - } - - old += size; - - if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) { - if (event->header.type & PERF_RECORD_IP) - process_event(event->ip.ip, md->counter); - } else { - switch (event->header.type) { - case PERF_EVENT_MMAP: - case PERF_EVENT_MUNMAP: - printf("%s: %Lu %Lu %Lu %s\n", - event->header.type == PERF_EVENT_MMAP - ? "mmap" : "munmap", - event->mmap.start, - event->mmap.len, - event->mmap.pgoff, - event->mmap.filename); - break; - } - } - } - - md->prev = old; -} - -int main(int argc, char *argv[]) -{ - struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; - struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; - struct perf_counter_hw_event hw_event; - pthread_t thread; - int i, counter, group_fd, nr_poll = 0; - unsigned int cpu; - int ret; - - page_size = sysconf(_SC_PAGE_SIZE); - - process_options(argc, argv); - - nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); - assert(nr_cpus <= MAX_NR_CPUS); - assert(nr_cpus >= 0); - - if (run_perfstat) - return do_perfstat(argc, argv); - - if (tid != -1 || profile_cpu != -1) - nr_cpus = 1; - - parse_symbols(); - if (vmlinux && sym_filter_entry) - parse_vmlinux(vmlinux); - - for (i = 0; i < nr_cpus; i++) { - group_fd = -1; - for (counter = 0; counter < nr_counters; counter++) { - - cpu = profile_cpu; - if (tid == -1 && profile_cpu == -1) - cpu = i; - - memset(&hw_event, 0, sizeof(hw_event)); - hw_event.config = event_id[counter]; - hw_event.irq_period = event_count[counter]; - hw_event.record_type = PERF_RECORD_IP | PERF_RECORD_TID; - hw_event.nmi = nmi; - hw_event.mmap = use_mmap; - hw_event.munmap = use_munmap; - - fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0); - if (fd[i][counter] < 0) { - int err = errno; - printf("kerneltop error: syscall returned with %d (%s)\n", - fd[i][counter], strerror(err)); - if (err == EPERM) - printf("Are you root?\n"); - exit(-1); - } - assert(fd[i][counter] >= 0); - fcntl(fd[i][counter], F_SETFL, O_NONBLOCK); - - /* - * First counter acts as the group leader: - */ - if (group && group_fd == -1) - group_fd = fd[i][counter]; - - event_array[nr_poll].fd = fd[i][counter]; - event_array[nr_poll].events = POLLIN; - nr_poll++; - - mmap_array[i][counter].counter = counter; - mmap_array[i][counter].prev = 0; - mmap_array[i][counter].mask = mmap_pages*page_size - 1; - mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size, - PROT_READ, MAP_SHARED, fd[i][counter], 0); - if (mmap_array[i][counter].base == MAP_FAILED) { - printf("kerneltop error: failed to mmap with %d (%s)\n", - errno, strerror(errno)); - exit(-1); - } - } - } - - if (pthread_create(&thread, NULL, display_thread, NULL)) { - printf("Could not create display thread.\n"); - exit(-1); - } - - if (realtime_prio) { - struct sched_param param; - - param.sched_priority = realtime_prio; - if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { - printf("Could not set realtime priority.\n"); - exit(-1); - } - } - - while (1) { - int hits = events; - - for (i = 0; i < nr_cpus; i++) { - for (counter = 0; counter < nr_counters; counter++) - mmap_read(&mmap_array[i][counter]); - } - - if (hits == events) - ret = poll(event_array, nr_poll, 100); - } - - return 0; -} diff --git a/Documentation/perf_counter/perf.c b/Documentation/perf_counter/perf.c index 63f8a89..ff8658f 100644 --- a/Documentation/perf_counter/perf.c +++ b/Documentation/perf_counter/perf.c @@ -249,6 +249,7 @@ static void handle_internal_command(int argc, const char **argv) const char *cmd = argv[0]; static struct cmd_struct commands[] = { { "top", cmd_top, 0 }, + { "stat", cmd_stat, 0 }, }; int i; static const char ext[] = STRIP_EXTENSION; -- cgit v0.10.2 From 1d8c8b209e9351a7de1307d7b9b6df4222b8d742 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 20 Apr 2009 15:52:29 +0200 Subject: perf_counter tools: add help texts Add Documentation/perf-stat.txt and Documentation/perf-top.txt. The template that was used for it: Documentation/git-add.txt from Git. Fix up small bugs to make these help texts show up both in the 'perf' common-command summary output screen, and on the individual help screens. Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/Documentation/perf-stat.txt b/Documentation/perf_counter/Documentation/perf-stat.txt new file mode 100644 index 0000000..7fcab27 --- /dev/null +++ b/Documentation/perf_counter/Documentation/perf-stat.txt @@ -0,0 +1,76 @@ +perf-stat(1) +========== + +NAME +---- +perf-stat - Run a command and gather performance counter statistics + +SYNOPSIS +-------- +[verse] +'perf stat' [-e | --event=EVENT] [-l] [-a] + +DESCRIPTION +----------- +This command runs a command and gathers performance counter statistics +from it. + + +OPTIONS +------- +...:: + Any command you can specify in a shell. + +-e:: +--event=:: + 0:0: cpu-cycles + 0:0: cycles + 0:1: instructions + 0:2: cache-references + 0:3: cache-misses + 0:4: branch-instructions + 0:4: branches + 0:5: branch-misses + 0:6: bus-cycles + 1:0: cpu-clock + 1:1: task-clock + 1:2: page-faults + 1:2: faults + 1:5: minor-faults + 1:6: major-faults + 1:3: context-switches + 1:3: cs + 1:4: cpu-migrations + 1:4: migrations + rNNN: raw PMU events (eventsel+umask) + +-a:: + system-wide collection + +-l:: + scale counter values + +Configuration +------------- + +EXAMPLES +-------- + +$ perf stat sleep 1 + + Performance counter stats for 'sleep': + + 0.678356 task clock ticks (msecs) + 7 context switches (events) + 4 CPU migrations (events) + 232 pagefaults (events) + 1810403 CPU cycles (events) + 946759 instructions (events) + 18952 cache references (events) + 4885 cache misses (events) + + Wall-clock time elapsed: 1001.252894 msecs + +SEE ALSO +-------- +linkperf:git-tops[1] diff --git a/Documentation/perf_counter/Documentation/perf-top.txt b/Documentation/perf_counter/Documentation/perf-top.txt new file mode 100644 index 0000000..057333b --- /dev/null +++ b/Documentation/perf_counter/Documentation/perf-top.txt @@ -0,0 +1,61 @@ +perf-top(1) +========== + +NAME +---- +perf-top - Run a command and profile it + +SYNOPSIS +-------- +[verse] +'perf top' [-e | --event=EVENT] [-l] [-a] + +DESCRIPTION +----------- +This command runs a command and gathers a performance counter profile +from it. + + +OPTIONS +------- +...:: + Any command you can specify in a shell. + +-e:: +--event=:: + 0:0: cpu-cycles + 0:0: cycles + 0:1: instructions + 0:2: cache-references + 0:3: cache-misses + 0:4: branch-instructions + 0:4: branches + 0:5: branch-misses + 0:6: bus-cycles + 1:0: cpu-clock + 1:1: task-clock + 1:2: page-faults + 1:2: faults + 1:5: minor-faults + 1:6: major-faults + 1:3: context-switches + 1:3: cs + 1:4: cpu-migrations + 1:4: migrations + rNNN: raw PMU events (eventsel+umask) + +-a:: + system-wide collection + +-l:: + scale counter values + +Configuration +------------- + +EXAMPLES +-------- + +SEE ALSO +-------- +linkperf:git-stat[1] diff --git a/Documentation/perf_counter/builtin.h b/Documentation/perf_counter/builtin.h index a3bb6cd..605323c 100644 --- a/Documentation/perf_counter/builtin.h +++ b/Documentation/perf_counter/builtin.h @@ -14,6 +14,6 @@ extern void prune_packed_objects(int); extern int read_line_with_nul(char *buf, int size, FILE *file); extern int check_pager_config(const char *cmd); -extern int cmd_top(int argc, char **argv, const char *prefix); -extern int cmd_stat(int argc, char **argv, const char *prefix); +extern int cmd_top(int argc, const char **argv, const char *prefix); +extern int cmd_stat(int argc, const char **argv, const char *prefix); #endif diff --git a/Documentation/perf_counter/generate-cmdlist.sh b/Documentation/perf_counter/generate-cmdlist.sh index 75c68d9..f06f6fd 100755 --- a/Documentation/perf_counter/generate-cmdlist.sh +++ b/Documentation/perf_counter/generate-cmdlist.sh @@ -9,16 +9,16 @@ struct cmdname_help static struct cmdname_help common_cmds[] = {" -sed -n -e 's/^git-\([^ ]*\)[ ].* common.*/\1/p' command-list.txt | +sed -n -e 's/^perf-\([^ ]*\)[ ].* common.*/\1/p' command-list.txt | sort | while read cmd do sed -n ' - /^NAME/,/git-'"$cmd"'/H + /^NAME/,/perf-'"$cmd"'/H ${ x - s/.*git-'"$cmd"' - \(.*\)/ {"'"$cmd"'", "\1"},/ + s/.*perf-'"$cmd"' - \(.*\)/ {"'"$cmd"'", "\1"},/ p - }' "Documentation/git-$cmd.txt" + }' "Documentation/perf-$cmd.txt" done echo "};" -- cgit v0.10.2 From e33e0a43736307512422e41aee6e24d5a8c39181 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 20 Apr 2009 15:58:01 +0200 Subject: perf_counter tools: add 'perf record' command Move perf-record.c into the perf suite of commands. Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/Documentation/perf-record.txt b/Documentation/perf_counter/Documentation/perf-record.txt new file mode 100644 index 0000000..d07700e --- /dev/null +++ b/Documentation/perf_counter/Documentation/perf-record.txt @@ -0,0 +1,63 @@ +perf-record(1) +========== + +NAME +---- +perf-record - Run a command and record its profile into output.perf + +SYNOPSIS +-------- +[verse] +'perf record' [-e | --event=EVENT] [-l] [-a] + +DESCRIPTION +----------- +This command runs a command and gathers a performance counter profile +from it, into output.perf - without displaying anything. + +This file can then be inspected later on, using 'perf report'. + + +OPTIONS +------- +...:: + Any command you can specify in a shell. + +-e:: +--event=:: + 0:0: cpu-cycles + 0:0: cycles + 0:1: instructions + 0:2: cache-references + 0:3: cache-misses + 0:4: branch-instructions + 0:4: branches + 0:5: branch-misses + 0:6: bus-cycles + 1:0: cpu-clock + 1:1: task-clock + 1:2: page-faults + 1:2: faults + 1:5: minor-faults + 1:6: major-faults + 1:3: context-switches + 1:3: cs + 1:4: cpu-migrations + 1:4: migrations + rNNN: raw PMU events (eventsel+umask) + +-a:: + system-wide collection + +-l:: + scale counter values + +Configuration +------------- + +EXAMPLES +-------- + +SEE ALSO +-------- +linkperf:git-stat[1] diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile index fb8b717..b6c665e 100644 --- a/Documentation/perf_counter/Makefile +++ b/Documentation/perf_counter/Makefile @@ -309,6 +309,7 @@ LIB_OBJS += usage.o LIB_OBJS += wrapper.o BUILTIN_OBJS += builtin-help.o +BUILTIN_OBJS += builtin-record.o BUILTIN_OBJS += builtin-stat.o BUILTIN_OBJS += builtin-top.o diff --git a/Documentation/perf_counter/builtin-record.c b/Documentation/perf_counter/builtin-record.c new file mode 100644 index 0000000..4a50abf --- /dev/null +++ b/Documentation/perf_counter/builtin-record.c @@ -0,0 +1,506 @@ + + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "../../include/linux/perf_counter.h" + + +/* + * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all + * counters in the current task. + */ +#define PR_TASK_PERF_COUNTERS_DISABLE 31 +#define PR_TASK_PERF_COUNTERS_ENABLE 32 + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +#define rdclock() \ +({ \ + struct timespec ts; \ + \ + clock_gettime(CLOCK_MONOTONIC, &ts); \ + ts.tv_sec * 1000000000ULL + ts.tv_nsec; \ +}) + +/* + * Pick up some kernel type conventions: + */ +#define __user +#define asmlinkage + +#ifdef __x86_64__ +#define __NR_perf_counter_open 295 +#define rmb() asm volatile("lfence" ::: "memory") +#define cpu_relax() asm volatile("rep; nop" ::: "memory"); +#endif + +#ifdef __i386__ +#define __NR_perf_counter_open 333 +#define rmb() asm volatile("lfence" ::: "memory") +#define cpu_relax() asm volatile("rep; nop" ::: "memory"); +#endif + +#ifdef __powerpc__ +#define __NR_perf_counter_open 319 +#define rmb() asm volatile ("sync" ::: "memory") +#define cpu_relax() asm volatile ("" ::: "memory"); +#endif + +#define unlikely(x) __builtin_expect(!!(x), 0) +#define min(x, y) ({ \ + typeof(x) _min1 = (x); \ + typeof(y) _min2 = (y); \ + (void) (&_min1 == &_min2); \ + _min1 < _min2 ? _min1 : _min2; }) + +extern asmlinkage int sys_perf_counter_open( + struct perf_counter_hw_event *hw_event_uptr __user, + pid_t pid, + int cpu, + int group_fd, + unsigned long flags); + +#define MAX_COUNTERS 64 +#define MAX_NR_CPUS 256 + +#define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id)) + +static int nr_counters = 0; +static __u64 event_id[MAX_COUNTERS] = { }; +static int default_interval = 100000; +static int event_count[MAX_COUNTERS]; +static int fd[MAX_NR_CPUS][MAX_COUNTERS]; +static int nr_cpus = 0; +static unsigned int page_size; +static unsigned int mmap_pages = 16; +static int output; +static char *output_name = "output.perf"; +static int group = 0; +static unsigned int realtime_prio = 0; + +const unsigned int default_count[] = { + 1000000, + 1000000, + 10000, + 10000, + 1000000, + 10000, +}; + +struct event_symbol { + __u64 event; + char *symbol; +}; + +static struct event_symbol event_symbols[] = { + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cpu-cycles", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cycles", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), "instructions", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), "cache-references", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), "cache-misses", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branch-instructions", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branches", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES), "branch-misses", }, + {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES), "bus-cycles", }, + + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK), "cpu-clock", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), "task-clock", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "page-faults", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "faults", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN), "minor-faults", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ), "major-faults", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "context-switches", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "cs", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "cpu-migrations", }, + {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "migrations", }, +}; + +/* + * Each event can have multiple symbolic names. + * Symbolic names are (almost) exactly matched. + */ +static __u64 match_event_symbols(char *str) +{ + __u64 config, id; + int type; + unsigned int i; + + if (sscanf(str, "r%llx", &config) == 1) + return config | PERF_COUNTER_RAW_MASK; + + if (sscanf(str, "%d:%llu", &type, &id) == 2) + return EID(type, id); + + for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { + if (!strncmp(str, event_symbols[i].symbol, + strlen(event_symbols[i].symbol))) + return event_symbols[i].event; + } + + return ~0ULL; +} + +static int parse_events(char *str) +{ + __u64 config; + +again: + if (nr_counters == MAX_COUNTERS) + return -1; + + config = match_event_symbols(str); + if (config == ~0ULL) + return -1; + + event_id[nr_counters] = config; + nr_counters++; + + str = strstr(str, ","); + if (str) { + str++; + goto again; + } + + return 0; +} + +#define __PERF_COUNTER_FIELD(config, name) \ + ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT) + +#define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW) +#define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG) +#define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE) +#define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT) + +static void display_events_help(void) +{ + unsigned int i; + __u64 e; + + printf( + " -e EVENT --event=EVENT # symbolic-name abbreviations"); + + for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { + int type, id; + + e = event_symbols[i].event; + type = PERF_COUNTER_TYPE(e); + id = PERF_COUNTER_ID(e); + + printf("\n %d:%d: %-20s", + type, id, event_symbols[i].symbol); + } + + printf("\n" + " rNNN: raw PMU events (eventsel+umask)\n\n"); +} + +static void display_help(void) +{ + printf( + "Usage: perf-record []\n" + "perf-record Options (up to %d event types can be specified at once):\n\n", + MAX_COUNTERS); + + display_events_help(); + + printf( + " -c CNT --count=CNT # event period to sample\n" + " -m pages --mmap_pages= # number of mmap data pages\n" + " -o file --output= # output file\n" + " -r prio --realtime= # use RT prio\n" + ); + + exit(0); +} + +static void process_options(int argc, char *argv[]) +{ + int error = 0, counter; + + for (;;) { + int option_index = 0; + /** Options for getopt */ + static struct option long_options[] = { + {"count", required_argument, NULL, 'c'}, + {"event", required_argument, NULL, 'e'}, + {"mmap_pages", required_argument, NULL, 'm'}, + {"output", required_argument, NULL, 'o'}, + {"realtime", required_argument, NULL, 'r'}, + {NULL, 0, NULL, 0 } + }; + int c = getopt_long(argc, argv, "+:c:e:m:o:r:", + long_options, &option_index); + if (c == -1) + break; + + switch (c) { + case 'c': default_interval = atoi(optarg); break; + case 'e': error = parse_events(optarg); break; + case 'm': mmap_pages = atoi(optarg); break; + case 'o': output_name = strdup(optarg); break; + case 'r': realtime_prio = atoi(optarg); break; + default: error = 1; break; + } + } + if (error) + display_help(); + + if (!nr_counters) { + nr_counters = 1; + event_id[0] = 0; + } + + for (counter = 0; counter < nr_counters; counter++) { + if (event_count[counter]) + continue; + + event_count[counter] = default_interval; + } +} + +struct mmap_data { + int counter; + void *base; + unsigned int mask; + unsigned int prev; +}; + +static unsigned int mmap_read_head(struct mmap_data *md) +{ + struct perf_counter_mmap_page *pc = md->base; + int head; + + head = pc->data_head; + rmb(); + + return head; +} + +static long events; +static struct timeval last_read, this_read; + +static void mmap_read(struct mmap_data *md) +{ + unsigned int head = mmap_read_head(md); + unsigned int old = md->prev; + unsigned char *data = md->base + page_size; + unsigned long size; + void *buf; + int diff; + + gettimeofday(&this_read, NULL); + + /* + * If we're further behind than half the buffer, there's a chance + * the writer will bite our tail and screw up the events under us. + * + * If we somehow ended up ahead of the head, we got messed up. + * + * In either case, truncate and restart at head. + */ + diff = head - old; + if (diff > md->mask / 2 || diff < 0) { + struct timeval iv; + unsigned long msecs; + + timersub(&this_read, &last_read, &iv); + msecs = iv.tv_sec*1000 + iv.tv_usec/1000; + + fprintf(stderr, "WARNING: failed to keep up with mmap data." + " Last read %lu msecs ago.\n", msecs); + + /* + * head points to a known good entry, start there. + */ + old = head; + } + + last_read = this_read; + + if (old != head) + events++; + + size = head - old; + + if ((old & md->mask) + size != (head & md->mask)) { + buf = &data[old & md->mask]; + size = md->mask + 1 - (old & md->mask); + old += size; + while (size) { + int ret = write(output, buf, size); + if (ret < 0) { + perror("failed to write"); + exit(-1); + } + size -= ret; + buf += ret; + } + } + + buf = &data[old & md->mask]; + size = head - old; + old += size; + while (size) { + int ret = write(output, buf, size); + if (ret < 0) { + perror("failed to write"); + exit(-1); + } + size -= ret; + buf += ret; + } + + md->prev = old; +} + +static volatile int done = 0; + +static void sigchld_handler(int sig) +{ + if (sig == SIGCHLD) + done = 1; +} + +int cmd_record(int argc, const char **argv) +{ + struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; + struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; + struct perf_counter_hw_event hw_event; + int i, counter, group_fd, nr_poll = 0; + pid_t pid; + int ret; + + page_size = sysconf(_SC_PAGE_SIZE); + + process_options(argc, argv); + + nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); + assert(nr_cpus <= MAX_NR_CPUS); + assert(nr_cpus >= 0); + + output = open(output_name, O_CREAT|O_RDWR, S_IRWXU); + if (output < 0) { + perror("failed to create output file"); + exit(-1); + } + + argc -= optind; + argv += optind; + + for (i = 0; i < nr_cpus; i++) { + group_fd = -1; + for (counter = 0; counter < nr_counters; counter++) { + + memset(&hw_event, 0, sizeof(hw_event)); + hw_event.config = event_id[counter]; + hw_event.irq_period = event_count[counter]; + hw_event.record_type = PERF_RECORD_IP | PERF_RECORD_TID; + hw_event.nmi = 1; + hw_event.mmap = 1; + hw_event.comm = 1; + + fd[i][counter] = sys_perf_counter_open(&hw_event, -1, i, group_fd, 0); + if (fd[i][counter] < 0) { + int err = errno; + printf("kerneltop error: syscall returned with %d (%s)\n", + fd[i][counter], strerror(err)); + if (err == EPERM) + printf("Are you root?\n"); + exit(-1); + } + assert(fd[i][counter] >= 0); + fcntl(fd[i][counter], F_SETFL, O_NONBLOCK); + + /* + * First counter acts as the group leader: + */ + if (group && group_fd == -1) + group_fd = fd[i][counter]; + + event_array[nr_poll].fd = fd[i][counter]; + event_array[nr_poll].events = POLLIN; + nr_poll++; + + mmap_array[i][counter].counter = counter; + mmap_array[i][counter].prev = 0; + mmap_array[i][counter].mask = mmap_pages*page_size - 1; + mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size, + PROT_READ, MAP_SHARED, fd[i][counter], 0); + if (mmap_array[i][counter].base == MAP_FAILED) { + printf("kerneltop error: failed to mmap with %d (%s)\n", + errno, strerror(errno)); + exit(-1); + } + } + } + + signal(SIGCHLD, sigchld_handler); + + pid = fork(); + if (pid < 0) + perror("failed to fork"); + + if (!pid) { + if (execvp(argv[0], argv)) { + perror(argv[0]); + exit(-1); + } + } + + if (realtime_prio) { + struct sched_param param; + + param.sched_priority = realtime_prio; + if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { + printf("Could not set realtime priority.\n"); + exit(-1); + } + } + + /* + * TODO: store the current /proc/$/maps information somewhere + */ + + while (!done) { + int hits = events; + + for (i = 0; i < nr_cpus; i++) { + for (counter = 0; counter < nr_counters; counter++) + mmap_read(&mmap_array[i][counter]); + } + + if (hits == events) + ret = poll(event_array, nr_poll, 100); + } + + return 0; +} diff --git a/Documentation/perf_counter/builtin.h b/Documentation/perf_counter/builtin.h index 605323c..5854b17 100644 --- a/Documentation/perf_counter/builtin.h +++ b/Documentation/perf_counter/builtin.h @@ -14,6 +14,7 @@ extern void prune_packed_objects(int); extern int read_line_with_nul(char *buf, int size, FILE *file); extern int check_pager_config(const char *cmd); -extern int cmd_top(int argc, const char **argv, const char *prefix); +extern int cmd_record(int argc, const char **argv, const char *prefix); extern int cmd_stat(int argc, const char **argv, const char *prefix); +extern int cmd_top(int argc, const char **argv, const char *prefix); #endif diff --git a/Documentation/perf_counter/command-list.txt b/Documentation/perf_counter/command-list.txt index 52455d4..d15210a 100644 --- a/Documentation/perf_counter/command-list.txt +++ b/Documentation/perf_counter/command-list.txt @@ -1,5 +1,6 @@ # List of known perf commands. # command name category [deprecated] [common] -perf-top mainporcelain common +perf-record mainporcelain common perf-stat mainporcelain common +perf-top mainporcelain common diff --git a/Documentation/perf_counter/perf-record.c b/Documentation/perf_counter/perf-record.c deleted file mode 100644 index 614de7c..0000000 --- a/Documentation/perf_counter/perf-record.c +++ /dev/null @@ -1,530 +0,0 @@ - - -#define _GNU_SOURCE -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "../../include/linux/perf_counter.h" - - -/* - * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all - * counters in the current task. - */ -#define PR_TASK_PERF_COUNTERS_DISABLE 31 -#define PR_TASK_PERF_COUNTERS_ENABLE 32 - -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) - -#define rdclock() \ -({ \ - struct timespec ts; \ - \ - clock_gettime(CLOCK_MONOTONIC, &ts); \ - ts.tv_sec * 1000000000ULL + ts.tv_nsec; \ -}) - -/* - * Pick up some kernel type conventions: - */ -#define __user -#define asmlinkage - -#ifdef __x86_64__ -#define __NR_perf_counter_open 295 -#define rmb() asm volatile("lfence" ::: "memory") -#define cpu_relax() asm volatile("rep; nop" ::: "memory"); -#endif - -#ifdef __i386__ -#define __NR_perf_counter_open 333 -#define rmb() asm volatile("lfence" ::: "memory") -#define cpu_relax() asm volatile("rep; nop" ::: "memory"); -#endif - -#ifdef __powerpc__ -#define __NR_perf_counter_open 319 -#define rmb() asm volatile ("sync" ::: "memory") -#define cpu_relax() asm volatile ("" ::: "memory"); -#endif - -#define unlikely(x) __builtin_expect(!!(x), 0) -#define min(x, y) ({ \ - typeof(x) _min1 = (x); \ - typeof(y) _min2 = (y); \ - (void) (&_min1 == &_min2); \ - _min1 < _min2 ? _min1 : _min2; }) - -asmlinkage int sys_perf_counter_open( - struct perf_counter_hw_event *hw_event_uptr __user, - pid_t pid, - int cpu, - int group_fd, - unsigned long flags) -{ - return syscall( - __NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags); -} - -#define MAX_COUNTERS 64 -#define MAX_NR_CPUS 256 - -#define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id)) - -static int nr_counters = 0; -static __u64 event_id[MAX_COUNTERS] = { }; -static int default_interval = 100000; -static int event_count[MAX_COUNTERS]; -static int fd[MAX_NR_CPUS][MAX_COUNTERS]; -static int nr_cpus = 0; -static unsigned int page_size; -static unsigned int mmap_pages = 16; -static int output; -static char *output_name = "output.perf"; -static int group = 0; -static unsigned int realtime_prio = 0; - -const unsigned int default_count[] = { - 1000000, - 1000000, - 10000, - 10000, - 1000000, - 10000, -}; - -static char *hw_event_names[] = { - "CPU cycles", - "instructions", - "cache references", - "cache misses", - "branches", - "branch misses", - "bus cycles", -}; - -static char *sw_event_names[] = { - "cpu clock ticks", - "task clock ticks", - "pagefaults", - "context switches", - "CPU migrations", - "minor faults", - "major faults", -}; - -struct event_symbol { - __u64 event; - char *symbol; -}; - -static struct event_symbol event_symbols[] = { - {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cpu-cycles", }, - {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES), "cycles", }, - {EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS), "instructions", }, - {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES), "cache-references", }, - {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES), "cache-misses", }, - {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branch-instructions", }, - {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS), "branches", }, - {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES), "branch-misses", }, - {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES), "bus-cycles", }, - - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK), "cpu-clock", }, - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK), "task-clock", }, - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "page-faults", }, - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS), "faults", }, - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN), "minor-faults", }, - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ), "major-faults", }, - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "context-switches", }, - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES), "cs", }, - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "cpu-migrations", }, - {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS), "migrations", }, -}; - -/* - * Each event can have multiple symbolic names. - * Symbolic names are (almost) exactly matched. - */ -static __u64 match_event_symbols(char *str) -{ - __u64 config, id; - int type; - unsigned int i; - - if (sscanf(str, "r%llx", &config) == 1) - return config | PERF_COUNTER_RAW_MASK; - - if (sscanf(str, "%d:%llu", &type, &id) == 2) - return EID(type, id); - - for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { - if (!strncmp(str, event_symbols[i].symbol, - strlen(event_symbols[i].symbol))) - return event_symbols[i].event; - } - - return ~0ULL; -} - -static int parse_events(char *str) -{ - __u64 config; - -again: - if (nr_counters == MAX_COUNTERS) - return -1; - - config = match_event_symbols(str); - if (config == ~0ULL) - return -1; - - event_id[nr_counters] = config; - nr_counters++; - - str = strstr(str, ","); - if (str) { - str++; - goto again; - } - - return 0; -} - -#define __PERF_COUNTER_FIELD(config, name) \ - ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT) - -#define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW) -#define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG) -#define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE) -#define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT) - -static void display_events_help(void) -{ - unsigned int i; - __u64 e; - - printf( - " -e EVENT --event=EVENT # symbolic-name abbreviations"); - - for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { - int type, id; - - e = event_symbols[i].event; - type = PERF_COUNTER_TYPE(e); - id = PERF_COUNTER_ID(e); - - printf("\n %d:%d: %-20s", - type, id, event_symbols[i].symbol); - } - - printf("\n" - " rNNN: raw PMU events (eventsel+umask)\n\n"); -} - -static void display_help(void) -{ - printf( - "Usage: perf-record []\n" - "perf-record Options (up to %d event types can be specified at once):\n\n", - MAX_COUNTERS); - - display_events_help(); - - printf( - " -c CNT --count=CNT # event period to sample\n" - " -m pages --mmap_pages= # number of mmap data pages\n" - " -o file --output= # output file\n" - " -r prio --realtime= # use RT prio\n" - ); - - exit(0); -} - -static void process_options(int argc, char *argv[]) -{ - int error = 0, counter; - - for (;;) { - int option_index = 0; - /** Options for getopt */ - static struct option long_options[] = { - {"count", required_argument, NULL, 'c'}, - {"event", required_argument, NULL, 'e'}, - {"mmap_pages", required_argument, NULL, 'm'}, - {"output", required_argument, NULL, 'o'}, - {"realtime", required_argument, NULL, 'r'}, - {NULL, 0, NULL, 0 } - }; - int c = getopt_long(argc, argv, "+:c:e:m:o:r:", - long_options, &option_index); - if (c == -1) - break; - - switch (c) { - case 'c': default_interval = atoi(optarg); break; - case 'e': error = parse_events(optarg); break; - case 'm': mmap_pages = atoi(optarg); break; - case 'o': output_name = strdup(optarg); break; - case 'r': realtime_prio = atoi(optarg); break; - default: error = 1; break; - } - } - if (error) - display_help(); - - if (!nr_counters) { - nr_counters = 1; - event_id[0] = 0; - } - - for (counter = 0; counter < nr_counters; counter++) { - if (event_count[counter]) - continue; - - event_count[counter] = default_interval; - } -} - -struct mmap_data { - int counter; - void *base; - unsigned int mask; - unsigned int prev; -}; - -static unsigned int mmap_read_head(struct mmap_data *md) -{ - struct perf_counter_mmap_page *pc = md->base; - int head; - - head = pc->data_head; - rmb(); - - return head; -} - -static long events; -static struct timeval last_read, this_read; - -static void mmap_read(struct mmap_data *md) -{ - unsigned int head = mmap_read_head(md); - unsigned int old = md->prev; - unsigned char *data = md->base + page_size; - unsigned long size; - void *buf; - int diff; - - gettimeofday(&this_read, NULL); - - /* - * If we're further behind than half the buffer, there's a chance - * the writer will bite our tail and screw up the events under us. - * - * If we somehow ended up ahead of the head, we got messed up. - * - * In either case, truncate and restart at head. - */ - diff = head - old; - if (diff > md->mask / 2 || diff < 0) { - struct timeval iv; - unsigned long msecs; - - timersub(&this_read, &last_read, &iv); - msecs = iv.tv_sec*1000 + iv.tv_usec/1000; - - fprintf(stderr, "WARNING: failed to keep up with mmap data." - " Last read %lu msecs ago.\n", msecs); - - /* - * head points to a known good entry, start there. - */ - old = head; - } - - last_read = this_read; - - if (old != head) - events++; - - size = head - old; - - if ((old & md->mask) + size != (head & md->mask)) { - buf = &data[old & md->mask]; - size = md->mask + 1 - (old & md->mask); - old += size; - while (size) { - int ret = write(output, buf, size); - if (ret < 0) { - perror("failed to write"); - exit(-1); - } - size -= ret; - buf += ret; - } - } - - buf = &data[old & md->mask]; - size = head - old; - old += size; - while (size) { - int ret = write(output, buf, size); - if (ret < 0) { - perror("failed to write"); - exit(-1); - } - size -= ret; - buf += ret; - } - - md->prev = old; -} - -static volatile int done = 0; - -static void sigchld_handler(int sig) -{ - if (sig == SIGCHLD) - done = 1; -} - -int main(int argc, char *argv[]) -{ - struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; - struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; - struct perf_counter_hw_event hw_event; - int i, counter, group_fd, nr_poll = 0; - pid_t pid; - int ret; - - page_size = sysconf(_SC_PAGE_SIZE); - - process_options(argc, argv); - - nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); - assert(nr_cpus <= MAX_NR_CPUS); - assert(nr_cpus >= 0); - - output = open(output_name, O_CREAT|O_RDWR, S_IRWXU); - if (output < 0) { - perror("failed to create output file"); - exit(-1); - } - - argc -= optind; - argv += optind; - - for (i = 0; i < nr_cpus; i++) { - group_fd = -1; - for (counter = 0; counter < nr_counters; counter++) { - - memset(&hw_event, 0, sizeof(hw_event)); - hw_event.config = event_id[counter]; - hw_event.irq_period = event_count[counter]; - hw_event.record_type = PERF_RECORD_IP | PERF_RECORD_TID; - hw_event.nmi = 1; - hw_event.mmap = 1; - hw_event.comm = 1; - - fd[i][counter] = sys_perf_counter_open(&hw_event, -1, i, group_fd, 0); - if (fd[i][counter] < 0) { - int err = errno; - printf("kerneltop error: syscall returned with %d (%s)\n", - fd[i][counter], strerror(err)); - if (err == EPERM) - printf("Are you root?\n"); - exit(-1); - } - assert(fd[i][counter] >= 0); - fcntl(fd[i][counter], F_SETFL, O_NONBLOCK); - - /* - * First counter acts as the group leader: - */ - if (group && group_fd == -1) - group_fd = fd[i][counter]; - - event_array[nr_poll].fd = fd[i][counter]; - event_array[nr_poll].events = POLLIN; - nr_poll++; - - mmap_array[i][counter].counter = counter; - mmap_array[i][counter].prev = 0; - mmap_array[i][counter].mask = mmap_pages*page_size - 1; - mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size, - PROT_READ, MAP_SHARED, fd[i][counter], 0); - if (mmap_array[i][counter].base == MAP_FAILED) { - printf("kerneltop error: failed to mmap with %d (%s)\n", - errno, strerror(errno)); - exit(-1); - } - } - } - - signal(SIGCHLD, sigchld_handler); - - pid = fork(); - if (pid < 0) - perror("failed to fork"); - - if (!pid) { - if (execvp(argv[0], argv)) { - perror(argv[0]); - exit(-1); - } - } - - if (realtime_prio) { - struct sched_param param; - - param.sched_priority = realtime_prio; - if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { - printf("Could not set realtime priority.\n"); - exit(-1); - } - } - - /* - * TODO: store the current /proc/$/maps information somewhere - */ - - while (!done) { - int hits = events; - - for (i = 0; i < nr_cpus; i++) { - for (counter = 0; counter < nr_counters; counter++) - mmap_read(&mmap_array[i][counter]); - } - - if (hits == events) - ret = poll(event_array, nr_poll, 100); - } - - return 0; -} diff --git a/Documentation/perf_counter/perf.c b/Documentation/perf_counter/perf.c index ff8658f..e849dd6 100644 --- a/Documentation/perf_counter/perf.c +++ b/Documentation/perf_counter/perf.c @@ -248,8 +248,9 @@ static void handle_internal_command(int argc, const char **argv) { const char *cmd = argv[0]; static struct cmd_struct commands[] = { - { "top", cmd_top, 0 }, + { "record", cmd_record, 0 }, { "stat", cmd_stat, 0 }, + { "top", cmd_top, 0 }, }; int i; static const char ext[] = STRIP_EXTENSION; -- cgit v0.10.2 From cc13a5913797365b683212f5fca5fb04bb3582eb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 20 Apr 2009 16:01:30 +0200 Subject: perf_counter tools: fix --version Hook up the 'perf version' built-in command. Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/builtin.h b/Documentation/perf_counter/builtin.h index 5854b17..aec5ae3 100644 --- a/Documentation/perf_counter/builtin.h +++ b/Documentation/perf_counter/builtin.h @@ -17,4 +17,5 @@ extern int check_pager_config(const char *cmd); extern int cmd_record(int argc, const char **argv, const char *prefix); extern int cmd_stat(int argc, const char **argv, const char *prefix); extern int cmd_top(int argc, const char **argv, const char *prefix); +extern int cmd_version(int argc, const char **argv, const char *prefix); #endif diff --git a/Documentation/perf_counter/perf.c b/Documentation/perf_counter/perf.c index e849dd6..20d508c 100644 --- a/Documentation/perf_counter/perf.c +++ b/Documentation/perf_counter/perf.c @@ -5,7 +5,7 @@ #include "run-command.h" const char perf_usage_string[] = - "perf [--version] [--exec-path[=PERF_EXEC_PATH]] [--html-path] [-p|--paginate|--no-pager] [--bare] [--perf-dir=PERF_DIR] [--work-tree=PERF_WORK_TREE] [--help] COMMAND [ARGS]"; + "perf [--version] [--help] COMMAND [ARGS]"; const char perf_more_info_string[] = "See 'perf help COMMAND' for more information on a specific command."; @@ -251,6 +251,7 @@ static void handle_internal_command(int argc, const char **argv) { "record", cmd_record, 0 }, { "stat", cmd_stat, 0 }, { "top", cmd_top, 0 }, + { "version", cmd_version, 0 }, }; int i; static const char ext[] = STRIP_EXTENSION; -- cgit v0.10.2 From 6142fdd968c76a0f2ee753c39bd5be8d1bb4ef04 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 20 Apr 2009 16:05:55 +0200 Subject: perf_counter tools: add 'perf help' Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/Documentation/perf-help.txt b/Documentation/perf_counter/Documentation/perf-help.txt new file mode 100644 index 0000000..f85fed5 --- /dev/null +++ b/Documentation/perf_counter/Documentation/perf-help.txt @@ -0,0 +1,38 @@ +perf-help(1) +=========== + +NAME +---- +perf-help - display help information about perf + +SYNOPSIS +-------- +'perf help' [-a|--all] [COMMAND] + +DESCRIPTION +----------- + +With no options and no COMMAND given, the synopsis of the 'perf' +command and a list of the most commonly used perf commands are printed +on the standard output. + +If the option '--all' or '-a' is given, then all available commands are +printed on the standard output. + +If a perf command is named, a manual page for that command is brought +up. The 'man' program is used by default for this purpose, but this +can be overridden by other options or configuration variables. + +Note that `perf --help ...` is identical to `perf help ...` because the +former is internally converted into the latter. + +OPTIONS +------- +-a:: +--all:: + Prints all the available commands on the standard output. This + option supersedes any other option. + +PERF +---- +Part of the linkperf:perf[1] suite diff --git a/Documentation/perf_counter/builtin.h b/Documentation/perf_counter/builtin.h index aec5ae3..800f86c 100644 --- a/Documentation/perf_counter/builtin.h +++ b/Documentation/perf_counter/builtin.h @@ -14,6 +14,7 @@ extern void prune_packed_objects(int); extern int read_line_with_nul(char *buf, int size, FILE *file); extern int check_pager_config(const char *cmd); +extern int cmd_help(int argc, const char **argv, const char *prefix); extern int cmd_record(int argc, const char **argv, const char *prefix); extern int cmd_stat(int argc, const char **argv, const char *prefix); extern int cmd_top(int argc, const char **argv, const char *prefix); diff --git a/Documentation/perf_counter/perf.c b/Documentation/perf_counter/perf.c index 20d508c..8d6faec 100644 --- a/Documentation/perf_counter/perf.c +++ b/Documentation/perf_counter/perf.c @@ -248,6 +248,7 @@ static void handle_internal_command(int argc, const char **argv) { const char *cmd = argv[0]; static struct cmd_struct commands[] = { + { "help", cmd_help, 0 }, { "record", cmd_record, 0 }, { "stat", cmd_stat, 0 }, { "top", cmd_top, 0 }, -- cgit v0.10.2 From 125e702b09a28a502e145fb434678ee27720fc48 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 20 Apr 2009 16:13:46 +0200 Subject: perf_counter tools: fix 'make install' Remove Git leftovers from this area. Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile index b6c665e..690045e 100644 --- a/Documentation/perf_counter/Makefile +++ b/Documentation/perf_counter/Makefile @@ -255,9 +255,9 @@ EXTRA_PROGRAMS = PROGRAMS += $(EXTRA_PROGRAMS) # -# None right now: +# Single 'perf' binary right now: # -# PROGRAMS += perf-fast-import$X +PROGRAMS += perf # List built-in command $C whose implementation cmd_$C() is not in # builtin-$C.o but is linked in as part of some other command. @@ -762,24 +762,9 @@ install: all $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)' $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' $(INSTALL) $(ALL_PROGRAMS) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' - $(INSTALL) perf$X perf-upload-pack$X perf-receive-pack$X perf-upload-archive$X perf-shell$X perf-cvsserver '$(DESTDIR_SQ)$(bindir_SQ)' - $(MAKE) -C templates DESTDIR='$(DESTDIR_SQ)' install - $(MAKE) -C perl prefix='$(prefix_SQ)' DESTDIR='$(DESTDIR_SQ)' install ifneq (,$X) $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), $(RM) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/$p';) endif - bindir=$$(cd '$(DESTDIR_SQ)$(bindir_SQ)' && pwd) && \ - execdir=$$(cd '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' && pwd) && \ - { $(RM) "$$execdir/perf-add$X" && \ - ln "$$bindir/perf$X" "$$execdir/perf-add$X" 2>/dev/null || \ - cp "$$bindir/perf$X" "$$execdir/perf-add$X"; } && \ - { for p in $(filter-out perf-add$X,$(BUILT_INS)); do \ - $(RM) "$$execdir/$$p" && \ - ln "$$execdir/perf-add$X" "$$execdir/$$p" 2>/dev/null || \ - ln -s "perf-add$X" "$$execdir/$$p" 2>/dev/null || \ - cp "$$execdir/perf-add$X" "$$execdir/$$p" || exit; \ - done } && \ - ./check_bindir "z$$bindir" "z$$execdir" "$$bindir/perf-add$X" ### Maintainer's dist rules -- cgit v0.10.2 From a7abe97fd8e7a6ccabba5a04a9f17be9211d418c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 20 Apr 2009 10:59:34 -0400 Subject: tracing: rename EVENT_TRACER config to ENABLE_EVENT_TRACING Currently we have two configs: EVENT_TRACING and EVENT_TRACER. All tracers enable EVENT_TRACING. The EVENT_TRACER is only a convenience to enable the EVENT_TRACING when no other tracers are enabled. The names EVENT_TRACER and EVENT_TRACING are too similar and confusing. This patch renames EVENT_TRACER to ENABLE_EVENT_TRACING to be more appropriate to what it actually does, as well as add a comment in the help menu to explain the option's purpose. [ Impact: rename config option to reduce confusion ] Reported-by: Ingo Molnar Signed-off-by: Steven Rostedt diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 3fa36d2..450d3c2 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -183,7 +183,7 @@ config CONTEXT_SWITCH_TRACER This tracer gets called from the context switch and records all switching of tasks. -config EVENT_TRACER +config ENABLE_EVENT_TRACING bool "Trace various events in the kernel" select TRACING help @@ -191,6 +191,10 @@ config EVENT_TRACER allowing the user to pick and choose which trace point they want to trace. + Note, all tracers enable event tracing. This option is + only a convenience to enable event tracing when no other + tracers are selected. + config FTRACE_SYSCALLS bool "Trace syscalls" depends on HAVE_FTRACE_SYSCALLS -- cgit v0.10.2 From 28d20e2d6e94434827e11c310788b87204b84559 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 20 Apr 2009 12:12:44 -0400 Subject: tracing/events: call the correct event trace selftest init function The late_initcall calls a helper function instead of the proper init event selftest function. This update may have been lost due to conflicting merges. [ Impact: fix compiler warning and call extended event trace self tests ] Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 64f9d6d..98daf5d 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1199,6 +1199,6 @@ static __init int event_trace_self_tests_init(void) return 0; } -late_initcall(event_trace_self_tests); +late_initcall(event_trace_self_tests_init); #endif -- cgit v0.10.2 From 23de29de2d8b227943be191d59fb6d983996d55e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 20 Apr 2009 12:59:29 -0400 Subject: tracing: remove dangling semicolon Due to a cut and paste error, the trace_seq_putc had a semicolon after the prototype but before the stub function when tracing is disabled. [Impact: fix compile error ] Signed-off-by: Steven Rostedt diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h index 37db9bd..ba9627f 100644 --- a/include/linux/trace_seq.h +++ b/include/linux/trace_seq.h @@ -63,7 +63,7 @@ static inline int trace_seq_puts(struct trace_seq *s, const char *str) { return 0; } -static inline int trace_seq_putc(struct trace_seq *s, unsigned char c); +static inline int trace_seq_putc(struct trace_seq *s, unsigned char c) { return 0; } -- cgit v0.10.2 From cd0f2d4736ae8efabc60e54ecc8f677d0eddce02 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 20 Apr 2009 16:56:59 +0100 Subject: ASoC: Factor out generic widget power checks This will form a basis for further power check refactoring: the overall goal of these changes is to allow us to check power separately to applying it, allowing improvements in the power sequencing algorithms. Signed-off-by: Mark Brown diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index a6d7337..28e6e32 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -581,6 +581,19 @@ static int dapm_generic_apply_power(struct snd_soc_dapm_widget *w) return 0; } +/* Generic check to see if a widget should be powered. + */ +static int dapm_generic_check_power(struct snd_soc_dapm_widget *w) +{ + int in, out; + + in = is_connected_input_ep(w); + dapm_clear_walk(w->codec); + out = is_connected_output_ep(w); + dapm_clear_walk(w->codec); + return out != 0 && in != 0; +} + /* * Scan a single DAPM widget for a complete audio path and update the * power status appropriately. @@ -653,11 +666,7 @@ static int dapm_power_widget(struct snd_soc_codec *codec, int event, } /* all other widgets */ - in = is_connected_input_ep(w); - dapm_clear_walk(w->codec); - out = is_connected_output_ep(w); - dapm_clear_walk(w->codec); - power = (out != 0 && in != 0) ? 1 : 0; + power = dapm_generic_check_power(w); power_change = (w->power == power) ? 0 : 1; w->power = power; -- cgit v0.10.2 From 6ea31b9f0a0307e16656af27fcda3160e2a64a1b Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 20 Apr 2009 17:15:41 +0100 Subject: ASoC: Factor out DAPM power checks for DACs and ADCs This also switches us to using a switch statement for the widget type in dapm_power_widget(). Signed-off-by: Mark Brown diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 28e6e32..22522e2 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -594,6 +594,34 @@ static int dapm_generic_check_power(struct snd_soc_dapm_widget *w) return out != 0 && in != 0; } +/* Check to see if an ADC has power */ +static int dapm_adc_check_power(struct snd_soc_dapm_widget *w) +{ + int in; + + if (w->active) { + in = is_connected_input_ep(w); + dapm_clear_walk(w->codec); + return in != 0; + } else { + return dapm_generic_check_power(w); + } +} + +/* Check to see if a DAC has power */ +static int dapm_dac_check_power(struct snd_soc_dapm_widget *w) +{ + int out; + + if (w->active) { + out = is_connected_output_ep(w); + dapm_clear_walk(w->codec); + return out != 0; + } else { + return dapm_generic_check_power(w); + } +} + /* * Scan a single DAPM widget for a complete audio path and update the * power status appropriately. @@ -601,36 +629,23 @@ static int dapm_generic_check_power(struct snd_soc_dapm_widget *w) static int dapm_power_widget(struct snd_soc_codec *codec, int event, struct snd_soc_dapm_widget *w) { - int in, out, power_change, power, ret; + int power, ret; - /* vmid - no action */ - if (w->id == snd_soc_dapm_vmid) + /* Work out the new power state */ + switch (w->id) { + case snd_soc_dapm_vmid: + /* No action required */ return 0; - /* active ADC */ - if (w->id == snd_soc_dapm_adc && w->active) { - in = is_connected_input_ep(w); - dapm_clear_walk(w->codec); - power = (in != 0) ? 1 : 0; - if (power == w->power) - return 0; - w->power = power; - return dapm_generic_apply_power(w); - } + case snd_soc_dapm_adc: + power = dapm_adc_check_power(w); + break; - /* active DAC */ - if (w->id == snd_soc_dapm_dac && w->active) { - out = is_connected_output_ep(w); - dapm_clear_walk(w->codec); - power = (out != 0) ? 1 : 0; - if (power == w->power) - return 0; - w->power = power; - return dapm_generic_apply_power(w); - } + case snd_soc_dapm_dac: + power = dapm_dac_check_power(w); + break; - /* pre and post event widgets */ - if (w->id == snd_soc_dapm_pre) { + case snd_soc_dapm_pre: if (!w->event) return 0; @@ -646,8 +661,8 @@ static int dapm_power_widget(struct snd_soc_codec *codec, int event, return ret; } return 0; - } - if (w->id == snd_soc_dapm_post) { + + case snd_soc_dapm_post: if (!w->event) return 0; @@ -663,15 +678,15 @@ static int dapm_power_widget(struct snd_soc_codec *codec, int event, return ret; } return 0; - } - /* all other widgets */ - power = dapm_generic_check_power(w); - power_change = (w->power == power) ? 0 : 1; - w->power = power; + default: + power = dapm_generic_check_power(w); + break; + } - if (!power_change) + if (w->power == power) return 0; + w->power = power; return dapm_generic_apply_power(w); } -- cgit v0.10.2 From b75576d76d4be50196773f36709cb7a4f5ac2ab7 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 20 Apr 2009 17:56:13 +0100 Subject: ASoC: Make the DAPM power check an operation on the widget Rather than having switch statements at point of use make the DAPM power check a member of the widget structure and set it when we instantiate the widget. Signed-off-by: Mark Brown diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index fcc929d..839a97b 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -367,6 +367,8 @@ struct snd_soc_dapm_widget { unsigned char suspend:1; /* was active before suspend */ unsigned char pmdown:1; /* waiting for timeout */ + int (*power_check)(struct snd_soc_dapm_widget *w); + /* external events */ unsigned short event_flags; /* flags to specify event types */ int (*event)(struct snd_soc_dapm_widget*, struct snd_kcontrol *, int); diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 22522e2..d3d1735 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -631,20 +631,7 @@ static int dapm_power_widget(struct snd_soc_codec *codec, int event, { int power, ret; - /* Work out the new power state */ switch (w->id) { - case snd_soc_dapm_vmid: - /* No action required */ - return 0; - - case snd_soc_dapm_adc: - power = dapm_adc_check_power(w); - break; - - case snd_soc_dapm_dac: - power = dapm_dac_check_power(w); - break; - case snd_soc_dapm_pre: if (!w->event) return 0; @@ -680,10 +667,13 @@ static int dapm_power_widget(struct snd_soc_codec *codec, int event, return 0; default: - power = dapm_generic_check_power(w); break; } + if (!w->power_check) + return 0; + + power = w->power_check(w); if (w->power == power) return 0; w->power = power; @@ -1147,15 +1137,22 @@ int snd_soc_dapm_new_widgets(struct snd_soc_codec *codec) case snd_soc_dapm_switch: case snd_soc_dapm_mixer: case snd_soc_dapm_mixer_named_ctl: + w->power_check = dapm_generic_check_power; dapm_new_mixer(codec, w); break; case snd_soc_dapm_mux: case snd_soc_dapm_value_mux: + w->power_check = dapm_generic_check_power; dapm_new_mux(codec, w); break; case snd_soc_dapm_adc: + w->power_check = dapm_adc_check_power; + break; case snd_soc_dapm_dac: + w->power_check = dapm_dac_check_power; + break; case snd_soc_dapm_pga: + w->power_check = dapm_generic_check_power; dapm_new_pga(codec, w); break; case snd_soc_dapm_input: @@ -1165,6 +1162,8 @@ int snd_soc_dapm_new_widgets(struct snd_soc_codec *codec) case snd_soc_dapm_hp: case snd_soc_dapm_mic: case snd_soc_dapm_line: + w->power_check = dapm_generic_check_power; + break; case snd_soc_dapm_vmid: case snd_soc_dapm_pre: case snd_soc_dapm_post: -- cgit v0.10.2 From 17487bfeb6cfb05920e6a9d5a54f345f2917b4e7 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 20 Apr 2009 13:24:21 -0400 Subject: tracing: fix recursive test level calculation The recursive tests to detect same level recursion in the ring buffers did not account for the hard/softirq_counts to be shifted. Thus the numbers could be larger than then mask to be tested. This patch includes the shift for the calculation of the irq depth. [ Impact: stop false positives in trace recursion detection ] Signed-off-by: Steven Rostedt diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index e145969..aa40ae9 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1483,7 +1483,9 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, static int trace_irq_level(void) { - return hardirq_count() + softirq_count() + in_nmi(); + return (hardirq_count() >> HARDIRQ_SHIFT) + + (softirq_count() >> + SOFTIRQ_SHIFT) + + !!in_nmi(); } static int trace_recursive_lock(void) -- cgit v0.10.2 From e395898e98119085f666febbc7b631dd69bc637f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 20 Apr 2009 13:32:44 -0400 Subject: tracing: remove recursive test from ring_buffer_event_discard The ring_buffer_event_discard is not tied to ring_buffer_lock_reserve. It can be called inside or outside the reserve/commit. Even if it is called inside the reserve/commit the commit part must also be called. Only ring_buffer_discard_commit can be used as a replacement for ring_buffer_unlock_commit. This patch removes the trace_recursive_unlock from ring_buffer_event_discard since it would be the wrong place to do so. [Impact: prevent breakage in trace recursive testing ] Cc: Frederic Weisbecker Signed-off-by: Steven Rostedt diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index aa40ae9..a699767 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1667,7 +1667,6 @@ static inline void rb_event_discard(struct ring_buffer_event *event) void ring_buffer_event_discard(struct ring_buffer_event *event) { rb_event_discard(event); - trace_recursive_unlock(); } EXPORT_SYMBOL_GPL(ring_buffer_event_discard); -- cgit v0.10.2 From f1f9b3b1795da8625e0e6096813c9d18d4a344ce Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 20 Apr 2009 20:38:21 +0200 Subject: perfcounters, sched: remove __task_delta_exec() This function was left orphan by the latest round of sw-counter cleanups. [ Impact: remove unused kernel function ] Signed-off-by: Ingo Molnar diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 080d1fd..a77c600 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -85,7 +85,6 @@ static inline unsigned int kstat_irqs(unsigned int irq) /* * Lock/unlock the current runqueue - to extract task statistics: */ -extern unsigned long long __task_delta_exec(struct task_struct *tsk, int update); extern unsigned long long task_delta_exec(struct task_struct *); extern void account_user_time(struct task_struct *, cputime_t, cputime_t); diff --git a/kernel/sched.c b/kernel/sched.c index b66a08c..a69278e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4550,29 +4550,6 @@ EXPORT_PER_CPU_SYMBOL(kstat); * Return any ns on the sched_clock that have not yet been banked in * @p in case that task is currently running. */ -unsigned long long __task_delta_exec(struct task_struct *p, int update) -{ - s64 delta_exec; - struct rq *rq; - - rq = task_rq(p); - WARN_ON_ONCE(!runqueue_is_locked()); - WARN_ON_ONCE(!task_current(rq, p)); - - if (update) - update_rq_clock(rq); - - delta_exec = rq->clock - p->se.exec_start; - - WARN_ON_ONCE(delta_exec < 0); - - return delta_exec; -} - -/* - * Return any ns on the sched_clock that have not yet been banked in - * @p in case that task is currently running. - */ unsigned long long task_delta_exec(struct task_struct *p) { unsigned long flags; -- cgit v0.10.2 From ff743345bf7685a207868048a70e23164c4785e5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 13 Mar 2009 12:21:26 +0100 Subject: sched: remove extra call overhead for schedule() Lai Jiangshan's patch reminded me that I promised Nick to remove that extra call overhead in schedule(). Signed-off-by: Peter Zijlstra LKML-Reference: <20090313112300.927414207@chello.nl> Signed-off-by: Ingo Molnar diff --git a/kernel/mutex.c b/kernel/mutex.c index 5d79781..e1fb735 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -248,7 +248,9 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, /* didnt get the lock, go to sleep: */ spin_unlock_mutex(&lock->wait_lock, flags); - __schedule(); + preempt_enable_no_resched(); + schedule(); + preempt_disable(); spin_lock_mutex(&lock->wait_lock, flags); } diff --git a/kernel/sched.c b/kernel/sched.c index 7601cee..797f6fd 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5131,13 +5131,15 @@ pick_next_task(struct rq *rq) /* * schedule() is the main scheduler function. */ -asmlinkage void __sched __schedule(void) +asmlinkage void __sched schedule(void) { struct task_struct *prev, *next; unsigned long *switch_count; struct rq *rq; int cpu; +need_resched: + preempt_disable(); cpu = smp_processor_id(); rq = cpu_rq(cpu); rcu_qsctr_inc(cpu); @@ -5194,15 +5196,9 @@ need_resched_nonpreemptible: if (unlikely(reacquire_kernel_lock(current) < 0)) goto need_resched_nonpreemptible; -} -asmlinkage void __sched schedule(void) -{ -need_resched: - preempt_disable(); - __schedule(); preempt_enable_no_resched(); - if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) + if (need_resched()) goto need_resched; } EXPORT_SYMBOL(schedule); -- cgit v0.10.2 From aa18efb2a2f07e1cf062039848e9d369bb358724 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 20 Apr 2009 16:16:11 -0400 Subject: tracing: use recursive counter over irq level Althought using the irq level (hardirq_count, softirq_count and in_nmi) was nice to detect bad recursion right away, but since the counters are not atomically updated with respect to the interrupts, the function tracer might trigger the test from an interrupt handler before the hardirq_count is updated. This will trigger a false warning. This patch converts the recursive detection to a simple counter. If the depth is greater than 16 then the recursive detection will trigger. 16 is more than enough for any nested interrupts. [ Impact: fix false positive trace recursion detection ] Signed-off-by: Steven Rostedt diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index a699767..7bcfd3e 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1481,47 +1481,34 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, return event; } -static int trace_irq_level(void) -{ - return (hardirq_count() >> HARDIRQ_SHIFT) + - (softirq_count() >> + SOFTIRQ_SHIFT) + - !!in_nmi(); -} +#define TRACE_RECURSIVE_DEPTH 16 static int trace_recursive_lock(void) { - int level; - - level = trace_irq_level(); + current->trace_recursion++; - if (unlikely(current->trace_recursion & (1 << level))) { - /* Disable all tracing before we do anything else */ - tracing_off_permanent(); + if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH)) + return 0; - printk_once(KERN_WARNING "Tracing recursion: " - "HC[%lu]:SC[%lu]:NMI[%lu]\n", - hardirq_count() >> HARDIRQ_SHIFT, - softirq_count() >> SOFTIRQ_SHIFT, - in_nmi()); + /* Disable all tracing before we do anything else */ + tracing_off_permanent(); - WARN_ON_ONCE(1); - return -1; - } + printk_once(KERN_WARNING "Tracing recursion: depth[%d]:" + "HC[%lu]:SC[%lu]:NMI[%lu]\n", + current->trace_recursion, + hardirq_count() >> HARDIRQ_SHIFT, + softirq_count() >> SOFTIRQ_SHIFT, + in_nmi()); - current->trace_recursion |= 1 << level; - - return 0; + WARN_ON_ONCE(1); + return -1; } static void trace_recursive_unlock(void) { - int level; - - level = trace_irq_level(); - - WARN_ON_ONCE(!current->trace_recursion & (1 << level)); + WARN_ON_ONCE(!current->trace_recursion); - current->trace_recursion &= ~(1 << level); + current->trace_recursion--; } static DEFINE_PER_CPU(int, rb_need_resched); -- cgit v0.10.2 From cb4764a6dbffd9bb3cf759421ae82384071a933d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 20 Apr 2009 18:16:44 -0400 Subject: tracing: use nowakeup version of commit for function event trace tests The startup tests for the event tracer also runs with the function tracer enabled. The "wakeup" version of the trace commit was used which can grab spinlocks. If a task was preempted by an NMI that called a function being traced, it could deadlock due to the function tracer trying to grab the same lock. Thanks to Frederic Weisbecker for pointing out where the bug was. Reported-by: Ingo Molnar Reported-by: Frederic Weisbecker Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 98daf5d..672b195 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1164,7 +1164,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip) entry->ip = ip; entry->parent_ip = parent_ip; - trace_current_buffer_unlock_commit(event, flags, pc); + trace_nowake_buffer_unlock_commit(event, flags, pc); out: atomic_dec(&per_cpu(test_event_disable, cpu)); -- cgit v0.10.2 From 2ecf0a57c60dcb588f310d94412118e15c510532 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 21 Apr 2009 12:16:56 +0900 Subject: ide-dma: don't reset request fields on dma_timeout_retry() Impact: drop unnecessary code Now that everything uses bio and block operations, there is no need to reset request fields manually when retrying a request. Every field is guaranteed to be always valid. Drop unnecessary request field resetting from ide_dma_timeout_retry(). Signed-off-by: Tejun Heo diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c index a0b8cab1..d9123ec 100644 --- a/drivers/ide/ide-dma.c +++ b/drivers/ide/ide-dma.c @@ -510,23 +510,11 @@ ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error) /* * un-busy drive etc and make sure request is sane */ - rq = hwif->rq; - if (!rq) - goto out; - - hwif->rq = NULL; - - rq->errors = 0; - - if (!rq->bio) - goto out; - - rq->sector = rq->bio->bi_sector; - rq->current_nr_sectors = bio_iovec(rq->bio)->bv_len >> 9; - rq->hard_cur_sectors = rq->current_nr_sectors; - rq->buffer = bio_data(rq->bio); -out: + if (rq) { + hwif->rq = NULL; + rq->errors = 0; + } return ret; } -- cgit v0.10.2 From 37fc334cc8eb84f5fe0a5a1cbe6a6a68049e142a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 21 Apr 2009 13:27:16 +0800 Subject: crypto: api - Fix algorithm module auto-loading The commit a760a6656e6f00bb0144a42a048cf0266646e22c (crypto: api - Fix module load deadlock with fallback algorithms) broke the auto-loading of algorithms that require fallbacks. The problem is that the fallback mask check is missing an and which cauess bits that should be considered to interfere with the result. Reported-by: Chuck Ebbert Signed-off-by: Herbert Xu diff --git a/crypto/api.c b/crypto/api.c index 314dab9..fd2545d 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -221,7 +221,8 @@ struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask) request_module(name); - if (!((type ^ CRYPTO_ALG_NEED_FALLBACK) & mask) && + if (!((type ^ CRYPTO_ALG_NEED_FALLBACK) & mask & + CRYPTO_ALG_NEED_FALLBACK) && snprintf(tmp, sizeof(tmp), "%s-all", name) < sizeof(tmp)) request_module(tmp); -- cgit v0.10.2 From acd246b7494c629aa617da49716409566cf52149 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 21 Apr 2009 13:55:20 +0800 Subject: crypto: padlock - Revert aes-all alias to aes Since the padlock-aes driver doesn't require a fallback (it's only padlock-sha that does), it should use the aes alias rather than aes-all so that ones that do need a fallback can use it. Signed-off-by: Herbert Xu diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index 3f0fdd1..856b3cc 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -489,4 +489,4 @@ MODULE_DESCRIPTION("VIA PadLock AES algorithm support"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Michal Ludvig"); -MODULE_ALIAS("aes-all"); +MODULE_ALIAS("aes"); -- cgit v0.10.2 From 6e29ec5701e9d44fa02b96c1c5c45f7516182b65 Mon Sep 17 00:00:00 2001 From: Gautham R Shenoy Date: Tue, 21 Apr 2009 08:40:49 +0530 Subject: sched: Replace first_cpu() with cpumask_first() in ILB nomination code Stephen Rothwell reported this build warning: > kernel/sched.c: In function 'find_new_ilb': > kernel/sched.c:4355: warning: passing argument 1 of '__first_cpu' from incompatible pointer type > > Possibly caused by commit f711f6090a81cbd396b63de90f415d33f563af9b > ("sched: Nominate idle load balancer from a semi-idle package") from > the sched tree. Should this call to first_cpu be cpumask_first? For !(CONFIG_SCHED_MC || CONFIG_SCHED_SMT), find_new_ilb() nominates the Idle load balancer as the first cpu from the nohz.cpu_mask. This code uses the older API first_cpu(). Replace it with cpumask_first(), which is the correct API here. [ Impact: cleanup, address build warning ] Reported-by: Stephen Rothwell Signed-off-by: Gautham R Shenoy Cc: Rusty Russell LKML-Reference: <20090421031049.GA4140@in.ibm.com> Signed-off-by: Ingo Molnar diff --git a/kernel/sched.c b/kernel/sched.c index 797f6fd..54d67b9 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4356,7 +4356,7 @@ out_done: #else /* (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */ static inline int find_new_ilb(int call_cpu) { - return first_cpu(nohz.cpu_mask); + return cpumask_first(nohz.cpu_mask); } #endif -- cgit v0.10.2 From cd474f2d548af3c0eb932d9d47ec11483861aa6f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 21 Apr 2009 08:53:08 +0200 Subject: ALSA: Remove deprecated snd_card_new() Signed-off-by: Takashi Iwai diff --git a/include/sound/core.h b/include/sound/core.h index 3dea798..a26bbdc 100644 --- a/include/sound/core.h +++ b/include/sound/core.h @@ -300,16 +300,6 @@ int snd_card_create(int idx, const char *id, struct module *module, int extra_size, struct snd_card **card_ret); -static inline __deprecated -struct snd_card *snd_card_new(int idx, const char *id, - struct module *module, int extra_size) -{ - struct snd_card *card; - if (snd_card_create(idx, id, module, extra_size, &card) < 0) - return NULL; - return card; -} - int snd_card_disconnect(struct snd_card *card); int snd_card_free(struct snd_card *card); int snd_card_free_when_closed(struct snd_card *card); -- cgit v0.10.2 From ef9dfa4b1052af23a603de382d4665b2d1fccc61 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 21 Apr 2009 08:53:41 +0200 Subject: ALSA: Remove deprecated include/sound/driver.h Signed-off-by: Takashi Iwai diff --git a/include/sound/driver.h b/include/sound/driver.h deleted file mode 100644 index f035943..0000000 --- a/include/sound/driver.h +++ /dev/null @@ -1 +0,0 @@ -#warning "This file is deprecated" -- cgit v0.10.2 From fc1edaf9e7cc4d4696f83dee495b8f158d01c4eb Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 20 Apr 2009 13:02:27 -0700 Subject: x86: x2apic, IR: Clean up X86_X2APIC and INTR_REMAP config checks Add x2apic_supported() to clean up CONFIG_X86_X2APIC checks. Fix CONFIG_INTR_REMAP checks. [ Impact: cleanup ] Signed-off-by: Suresh Siddha Cc: dwmw2@infradead.org Cc: Suresh Siddha Cc: Weidong Han LKML-Reference: <20090420200450.128993000@linux-os.sc.intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index fbdd654..3738438 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -107,8 +107,7 @@ extern u32 native_safe_apic_wait_icr_idle(void); extern void native_apic_icr_write(u32 low, u32 id); extern u64 native_apic_icr_read(void); -#define EIM_8BIT_APIC_ID 0 -#define EIM_32BIT_APIC_ID 1 +extern int x2apic_mode; #ifdef CONFIG_X86_X2APIC /* @@ -166,7 +165,7 @@ static inline u64 native_x2apic_icr_read(void) return val; } -extern int x2apic, x2apic_phys; +extern int x2apic_phys; extern void check_x2apic(void); extern void enable_x2apic(void); extern void x2apic_icr_write(u32 low, u32 id); @@ -182,6 +181,8 @@ static inline int x2apic_enabled(void) return 1; return 0; } + +#define x2apic_supported() (cpu_has_x2apic) #else static inline void check_x2apic(void) { @@ -194,9 +195,8 @@ static inline int x2apic_enabled(void) return 0; } -#define x2apic 0 #define x2apic_preenabled 0 - +#define x2apic_supported() 0 #endif extern void enable_IR_x2apic(void); diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 9d826e4..34eaa37 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -161,7 +161,6 @@ extern int io_apic_set_pci_routing(int ioapic, int pin, int irq, extern int (*ioapic_renumber_irq)(int ioapic, int irq); extern void ioapic_init_mappings(void); -#ifdef CONFIG_X86_64 extern struct IO_APIC_route_entry **alloc_ioapic_entries(void); extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries); extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); @@ -169,7 +168,6 @@ extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); extern void reinit_intr_remapped_IO_APIC(int intr_remapping, struct IO_APIC_route_entry **ioapic_entries); -#endif extern void probe_nr_irqs_gsi(void); diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 0396760..f275e22 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -1,6 +1,6 @@ #ifndef _ASM_X86_IRQ_REMAPPING_H #define _ASM_X86_IRQ_REMAPPING_H -#define IRTE_DEST(dest) ((x2apic) ? dest : dest << 8) +#define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8) #endif /* _ASM_X86_IRQ_REMAPPING_H */ diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 7b41a32..2b30e52 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -134,8 +134,8 @@ static __init int setup_apicpmtimer(char *s) __setup("apicpmtimer", setup_apicpmtimer); #endif +int x2apic_mode; #ifdef CONFIG_X86_X2APIC -int x2apic; /* x2apic enabled before OS handover */ static int x2apic_preenabled; static int disable_x2apic; @@ -858,7 +858,7 @@ void clear_local_APIC(void) u32 v; /* APIC hasn't been mapped yet */ - if (!x2apic && !apic_phys) + if (!x2apic_mode && !apic_phys) return; maxlvt = lapic_get_maxlvt(); @@ -1330,7 +1330,7 @@ void check_x2apic(void) { if (x2apic_enabled()) { pr_info("x2apic enabled by BIOS, switching to x2apic ops\n"); - x2apic_preenabled = x2apic = 1; + x2apic_preenabled = x2apic_mode = 1; } } @@ -1338,7 +1338,7 @@ void enable_x2apic(void) { int msr, msr2; - if (!x2apic) + if (!x2apic_mode) return; rdmsr(MSR_IA32_APICBASE, msr, msr2); @@ -1390,25 +1390,17 @@ void __init enable_IR_x2apic(void) mask_IO_APIC_setup(ioapic_entries); mask_8259A(); -#ifdef CONFIG_X86_X2APIC - if (cpu_has_x2apic) - ret = enable_intr_remapping(EIM_32BIT_APIC_ID); - else -#endif - ret = enable_intr_remapping(EIM_8BIT_APIC_ID); - + ret = enable_intr_remapping(x2apic_supported()); if (ret) goto end_restore; pr_info("Enabled Interrupt-remapping\n"); -#ifdef CONFIG_X86_X2APIC - if (cpu_has_x2apic && !x2apic) { - x2apic = 1; + if (x2apic_supported() && !x2apic_mode) { + x2apic_mode = 1; enable_x2apic(); pr_info("Enabled x2apic\n"); } -#endif end_restore: if (ret) @@ -1576,7 +1568,7 @@ void __init early_init_lapic_mapping(void) */ void __init init_apic_mappings(void) { - if (x2apic) { + if (x2apic_mode) { boot_cpu_physical_apicid = read_apic_id(); return; } @@ -2010,10 +2002,10 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) local_irq_save(flags); disable_local_APIC(); -#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) disable_intr_remapping(); -#endif + local_irq_restore(flags); return 0; } @@ -2023,8 +2015,6 @@ static int lapic_resume(struct sys_device *dev) unsigned int l, h; unsigned long flags; int maxlvt; - -#ifdef CONFIG_INTR_REMAP int ret; struct IO_APIC_route_entry **ioapic_entries = NULL; @@ -2050,17 +2040,8 @@ static int lapic_resume(struct sys_device *dev) mask_8259A(); } - if (x2apic) + if (x2apic_mode) enable_x2apic(); -#else - if (!apic_pm_state.active) - return 0; - - local_irq_save(flags); - if (x2apic) - enable_x2apic(); -#endif - else { /* * Make sure the APICBASE points to the right address @@ -2098,18 +2079,12 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_ESR, 0); apic_read(APIC_ESR); -#ifdef CONFIG_INTR_REMAP if (intr_remapping_enabled) { - if (x2apic) - reenable_intr_remapping(EIM_32BIT_APIC_ID); - else - reenable_intr_remapping(EIM_8BIT_APIC_ID); - + reenable_intr_remapping(x2apic_mode); unmask_8259A(); restore_IO_APIC_setup(ioapic_entries); free_ioapic_entries(ioapic_entries); } -#endif local_irq_restore(flags); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index ea22a86..3a45d2e 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -736,7 +736,6 @@ static int __init ioapic_pirq_setup(char *str) __setup("pirq=", ioapic_pirq_setup); #endif /* CONFIG_X86_32 */ -#ifdef CONFIG_INTR_REMAP struct IO_APIC_route_entry **alloc_ioapic_entries(void) { int apic; @@ -857,7 +856,6 @@ void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries) kfree(ioapic_entries); } -#endif /* * Find the IRQ entry number of a certain pin. diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index 1783652..bc3e880 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -50,7 +50,7 @@ static struct apic *apic_probe[] __initdata = { void __init default_setup_apic_routing(void) { #ifdef CONFIG_X86_X2APIC - if (x2apic && (apic != &apic_x2apic_phys && + if (x2apic_mode && (apic != &apic_x2apic_phys && #ifdef CONFIG_X86_UV apic != &apic_x2apic_uv_x && #endif diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 06f592a..10ff5c4 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -158,6 +158,8 @@ static inline struct intel_iommu *map_ioapic_to_ir(int apic) } #define irq_remapped(irq) (0) #define enable_intr_remapping(mode) (-1) +#define disable_intr_remapping() (0) +#define reenable_intr_remapping(mode) (0) #define intr_remapping_enabled (0) #endif -- cgit v0.10.2 From 25629d810a52176758401184d9b437fbb7f79195 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 20 Apr 2009 13:02:28 -0700 Subject: x86: x2apic, IR: Move eoi_ioapic_irq() into a CONFIG_INTR_REMAP section Address the following complier warning: arch/x86/kernel/apic/io_apic.c:2543: warning: `eoi_ioapic_irq' defined but not used By moving that function (and eoi_ioapic_irq()) into an existing #ifdef CONFIG_INTR_REMAP section of the code. [ Impact: cleanup ] Signed-off-by: Suresh Siddha Cc: dwmw2@infradead.org Cc: Weidong Han LKML-Reference: <20090420200450.271099000@linux-os.sc.intel.com> Signed-off-by: Ingo Molnar Cc: Weidong Han diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 3a45d2e..4baa9cb 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2517,39 +2517,6 @@ static void irq_complete_move(struct irq_desc **descp) static inline void irq_complete_move(struct irq_desc **descp) {} #endif -static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) -{ - int apic, pin; - struct irq_pin_list *entry; - - entry = cfg->irq_2_pin; - for (;;) { - - if (!entry) - break; - - apic = entry->apic; - pin = entry->pin; - io_apic_eoi(apic, pin); - entry = entry->next; - } -} - -static void -eoi_ioapic_irq(struct irq_desc *desc) -{ - struct irq_cfg *cfg; - unsigned long flags; - unsigned int irq; - - irq = desc->irq; - cfg = desc->chip_data; - - spin_lock_irqsave(&ioapic_lock, flags); - __eoi_ioapic_irq(irq, cfg); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - static void ack_apic_edge(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); @@ -2659,6 +2626,39 @@ static void ack_apic_level(unsigned int irq) } #ifdef CONFIG_INTR_REMAP +static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) +{ + int apic, pin; + struct irq_pin_list *entry; + + entry = cfg->irq_2_pin; + for (;;) { + + if (!entry) + break; + + apic = entry->apic; + pin = entry->pin; + io_apic_eoi(apic, pin); + entry = entry->next; + } +} + +static void +eoi_ioapic_irq(struct irq_desc *desc) +{ + struct irq_cfg *cfg; + unsigned long flags; + unsigned int irq; + + irq = desc->irq; + cfg = desc->chip_data; + + spin_lock_irqsave(&ioapic_lock, flags); + __eoi_ioapic_irq(irq, cfg); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + static void ir_ack_apic_edge(unsigned int irq) { ack_APIC_irq(); -- cgit v0.10.2 From 39d83a5d684a457046aa2a6dac60f105966e78e9 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 20 Apr 2009 13:02:29 -0700 Subject: x86: x2apic, IR: Clean up panic() with nox2apic boot option Instead of panic() ignore the "nox2apic" boot option when BIOS has already enabled x2apic prior to OS handover. [ Impact: printk warning instead of panic() when BIOS has enabled x2apic already ] Signed-off-by: Suresh Siddha Cc: dwmw2@infradead.org Cc: Weidong Han LKML-Reference: <20090420200450.425091000@linux-os.sc.intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 2b30e52..d32f558 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -141,8 +141,12 @@ static int x2apic_preenabled; static int disable_x2apic; static __init int setup_nox2apic(char *str) { - if (x2apic_enabled()) - panic("Bios already enabled x2apic, can't enforce nox2apic"); + if (x2apic_enabled()) { + pr_warning("Bios already enabled x2apic, " + "can't enforce nox2apic"); + return 0; + } + disable_x2apic = 1; setup_clear_cpu_cap(X86_FEATURE_X2APIC); return 0; -- cgit v0.10.2 From 9d6c26e73bd248c286bb3597aaf788716e8fcceb Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 20 Apr 2009 13:02:31 -0700 Subject: x86: x2apic, IR: Make config X86_UV dependent on X86_X2APIC Instead of selecting X86_X2APIC, make config X86_UV dependent on X86_X2APIC. This will eliminate enabling CONFIG_X86_X2APIC with out enabling CONFIG_INTR_REMAP. [ Impact: cleanup ] Signed-off-by: Suresh Siddha Acked-by: Jack Steiner Cc: dwmw2@infradead.org Cc: Suresh Siddha Cc: Weidong Han LKML-Reference: <20090420200450.694598000@linux-os.sc.intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c9086e6..58fb7b3 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -354,7 +354,7 @@ config X86_UV depends on X86_64 depends on X86_EXTENDED_PLATFORM depends on NUMA - select X86_X2APIC + depends on X86_X2APIC ---help--- This option is needed in order to support SGI Ultraviolet systems. If you don't have one of these, you should say N here. -- cgit v0.10.2 From 89388913f2c88a2cd15d24abab571b17a2596127 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 21 Apr 2009 11:39:27 +0300 Subject: x86: unify noexec handling This patch unifies noexec handling on 32-bit and 64-bit. [ Impact: cleanup ] Signed-off-by: Pekka Enberg [ mingo@elte.hu: build fix ] LKML-Reference: <1240303167.771.69.camel@penberg-laptop> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index b8238dc..4d258ad 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -273,7 +273,6 @@ typedef struct page *pgtable_t; extern pteval_t __supported_pte_mask; extern int nx_enabled; -extern void set_nx(void); #define pgprot_writecombine pgprot_writecombine extern pgprot_t pgprot_writecombine(pgprot_t prot); diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index fd3da1d..fedde53 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -22,6 +22,69 @@ int direct_gbpages #endif ; +int nx_enabled; + +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) +static int disable_nx __cpuinitdata; + +/* + * noexec = on|off + * + * Control non-executable mappings for processes. + * + * on Enable + * off Disable + */ +static int __init noexec_setup(char *str) +{ + if (!str) + return -EINVAL; + if (!strncmp(str, "on", 2)) { + __supported_pte_mask |= _PAGE_NX; + disable_nx = 0; + } else if (!strncmp(str, "off", 3)) { + disable_nx = 1; + __supported_pte_mask &= ~_PAGE_NX; + } + return 0; +} +early_param("noexec", noexec_setup); +#endif + +#ifdef CONFIG_X86_PAE +static void __init set_nx(void) +{ + unsigned int v[4], l, h; + + if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { + cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); + + if ((v[3] & (1 << 20)) && !disable_nx) { + rdmsr(MSR_EFER, l, h); + l |= EFER_NX; + wrmsr(MSR_EFER, l, h); + nx_enabled = 1; + __supported_pte_mask |= _PAGE_NX; + } + } +} +#else +static inline void set_nx(void) +{ +} +#endif + +#ifdef CONFIG_X86_64 +void __cpuinit check_efer(void) +{ + unsigned long efer; + + rdmsrl(MSR_EFER, efer); + if (!(efer & EFER_NX) || disable_nx) + __supported_pte_mask &= ~_PAGE_NX; +} +#endif + static void __init find_early_table_space(unsigned long end, int use_pse, int use_gbpages) { @@ -158,12 +221,9 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, use_gbpages = direct_gbpages; #endif -#ifdef CONFIG_X86_32 -#ifdef CONFIG_X86_PAE set_nx(); if (nx_enabled) printk(KERN_INFO "NX (Execute Disable) protection: active\n"); -#endif /* Enable PSE if available */ if (cpu_has_pse) @@ -174,7 +234,6 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, set_in_cr4(X86_CR4_PGE); __supported_pte_mask |= _PAGE_GLOBAL; } -#endif if (use_gbpages) page_size_mask |= 1 << PG_LEVEL_1G; diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 749559e..2b27120 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -587,61 +587,9 @@ void zap_low_mappings(void) flush_tlb_all(); } -int nx_enabled; - pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP); EXPORT_SYMBOL_GPL(__supported_pte_mask); -#ifdef CONFIG_X86_PAE - -static int disable_nx __initdata; - -/* - * noexec = on|off - * - * Control non executable mappings. - * - * on Enable - * off Disable - */ -static int __init noexec_setup(char *str) -{ - if (!str || !strcmp(str, "on")) { - if (cpu_has_nx) { - __supported_pte_mask |= _PAGE_NX; - disable_nx = 0; - } - } else { - if (!strcmp(str, "off")) { - disable_nx = 1; - __supported_pte_mask &= ~_PAGE_NX; - } else { - return -EINVAL; - } - } - - return 0; -} -early_param("noexec", noexec_setup); - -void __init set_nx(void) -{ - unsigned int v[4], l, h; - - if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { - cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); - - if ((v[3] & (1 << 20)) && !disable_nx) { - rdmsr(MSR_EFER, l, h); - l |= EFER_NX; - wrmsr(MSR_EFER, l, h); - nx_enabled = 1; - __supported_pte_mask |= _PAGE_NX; - } - } -} -#endif - /* user-defined highmem size */ static unsigned int highmem_pages = -1; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 1753e80..a4e7846 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -85,39 +85,6 @@ early_param("gbpages", parse_direct_gbpages_on); pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; EXPORT_SYMBOL_GPL(__supported_pte_mask); -static int disable_nx __cpuinitdata; - -/* - * noexec=on|off - * Control non-executable mappings for 64-bit processes. - * - * on Enable (default) - * off Disable - */ -static int __init nonx_setup(char *str) -{ - if (!str) - return -EINVAL; - if (!strncmp(str, "on", 2)) { - __supported_pte_mask |= _PAGE_NX; - disable_nx = 0; - } else if (!strncmp(str, "off", 3)) { - disable_nx = 1; - __supported_pte_mask &= ~_PAGE_NX; - } - return 0; -} -early_param("noexec", nonx_setup); - -void __cpuinit check_efer(void) -{ - unsigned long efer; - - rdmsrl(MSR_EFER, efer); - if (!(efer & EFER_NX) || disable_nx) - __supported_pte_mask &= ~_PAGE_NX; -} - int force_personality32; /* -- cgit v0.10.2 From 92c7c8a7d6e03eb4c0a3c5888e35dbc45f24744c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 24 Mar 2009 07:32:14 +0100 Subject: ALSA: hda - Cache PCM and STREAM parameters queries Cache quries for PCM and STREAM parameters as well as ampcap and pincap sharing the hash table. This will reduce the superfluous access of the same codec verbs. Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index a4e5e59..3d8bf39 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -1053,6 +1053,8 @@ EXPORT_SYMBOL_HDA(snd_hda_codec_cleanup_stream); /* FIXME: more better hash key? */ #define HDA_HASH_KEY(nid,dir,idx) (u32)((nid) + ((idx) << 16) + ((dir) << 24)) #define HDA_HASH_PINCAP_KEY(nid) (u32)((nid) + (0x02 << 24)) +#define HDA_HASH_PARPCM_KEY(nid) (u32)((nid) + (0x03 << 24)) +#define HDA_HASH_PARSTR_KEY(nid) (u32)((nid) + (0x04 << 24)) #define INFO_AMP_CAPS (1<<0) #define INFO_AMP_VOL(ch) (1 << (1 + (ch))) @@ -1143,19 +1145,32 @@ int snd_hda_override_amp_caps(struct hda_codec *codec, hda_nid_t nid, int dir, } EXPORT_SYMBOL_HDA(snd_hda_override_amp_caps); -u32 snd_hda_query_pin_caps(struct hda_codec *codec, hda_nid_t nid) +static unsigned int +query_caps_hash(struct hda_codec *codec, hda_nid_t nid, u32 key, + unsigned int (*func)(struct hda_codec *, hda_nid_t)) { struct hda_amp_info *info; - info = get_alloc_amp_hash(codec, HDA_HASH_PINCAP_KEY(nid)); + info = get_alloc_amp_hash(codec, key); if (!info) return 0; if (!info->head.val) { - info->amp_caps = snd_hda_param_read(codec, nid, AC_PAR_PIN_CAP); info->head.val |= INFO_AMP_CAPS; + info->amp_caps = func(codec, nid); } return info->amp_caps; } + +static unsigned int read_pin_cap(struct hda_codec *codec, hda_nid_t nid) +{ + return snd_hda_param_read(codec, nid, AC_PAR_PIN_CAP); +} + +u32 snd_hda_query_pin_caps(struct hda_codec *codec, hda_nid_t nid) +{ + return query_caps_hash(codec, nid, HDA_HASH_PINCAP_KEY(nid), + read_pin_cap); +} EXPORT_SYMBOL_HDA(snd_hda_query_pin_caps); /* @@ -2538,6 +2553,41 @@ unsigned int snd_hda_calc_stream_format(unsigned int rate, } EXPORT_SYMBOL_HDA(snd_hda_calc_stream_format); +static unsigned int get_pcm_param(struct hda_codec *codec, hda_nid_t nid) +{ + unsigned int val = 0; + if (nid != codec->afg && + (get_wcaps(codec, nid) & AC_WCAP_FORMAT_OVRD)) + val = snd_hda_param_read(codec, nid, AC_PAR_PCM); + if (!val || val == -1) + val = snd_hda_param_read(codec, codec->afg, AC_PAR_PCM); + if (!val || val == -1) + return 0; + return val; +} + +static unsigned int query_pcm_param(struct hda_codec *codec, hda_nid_t nid) +{ + return query_caps_hash(codec, nid, HDA_HASH_PARPCM_KEY(nid), + get_pcm_param); +} + +static unsigned int get_stream_param(struct hda_codec *codec, hda_nid_t nid) +{ + unsigned int streams = snd_hda_param_read(codec, nid, AC_PAR_STREAM); + if (!streams || streams == -1) + streams = snd_hda_param_read(codec, codec->afg, AC_PAR_STREAM); + if (!streams || streams == -1) + return 0; + return streams; +} + +static unsigned int query_stream_param(struct hda_codec *codec, hda_nid_t nid) +{ + return query_caps_hash(codec, nid, HDA_HASH_PARSTR_KEY(nid), + get_stream_param); +} + /** * snd_hda_query_supported_pcm - query the supported PCM rates and formats * @codec: the HDA codec @@ -2556,15 +2606,8 @@ static int snd_hda_query_supported_pcm(struct hda_codec *codec, hda_nid_t nid, { unsigned int i, val, wcaps; - val = 0; wcaps = get_wcaps(codec, nid); - if (nid != codec->afg && (wcaps & AC_WCAP_FORMAT_OVRD)) { - val = snd_hda_param_read(codec, nid, AC_PAR_PCM); - if (val == -1) - return -EIO; - } - if (!val) - val = snd_hda_param_read(codec, codec->afg, AC_PAR_PCM); + val = query_pcm_param(codec, nid); if (ratesp) { u32 rates = 0; @@ -2586,15 +2629,9 @@ static int snd_hda_query_supported_pcm(struct hda_codec *codec, hda_nid_t nid, u64 formats = 0; unsigned int streams, bps; - streams = snd_hda_param_read(codec, nid, AC_PAR_STREAM); - if (streams == -1) + streams = query_stream_param(codec, nid); + if (!streams) return -EIO; - if (!streams) { - streams = snd_hda_param_read(codec, codec->afg, - AC_PAR_STREAM); - if (streams == -1) - return -EIO; - } bps = 0; if (streams & AC_SUPFMT_PCM) { @@ -2668,17 +2705,9 @@ int snd_hda_is_supported_format(struct hda_codec *codec, hda_nid_t nid, int i; unsigned int val = 0, rate, stream; - if (nid != codec->afg && - (get_wcaps(codec, nid) & AC_WCAP_FORMAT_OVRD)) { - val = snd_hda_param_read(codec, nid, AC_PAR_PCM); - if (val == -1) - return 0; - } - if (!val) { - val = snd_hda_param_read(codec, codec->afg, AC_PAR_PCM); - if (val == -1) - return 0; - } + val = query_pcm_param(codec, nid); + if (!val) + return 0; rate = format & 0xff00; for (i = 0; i < AC_PAR_PCM_RATE_BITS; i++) @@ -2690,12 +2719,8 @@ int snd_hda_is_supported_format(struct hda_codec *codec, hda_nid_t nid, if (i >= AC_PAR_PCM_RATE_BITS) return 0; - stream = snd_hda_param_read(codec, nid, AC_PAR_STREAM); - if (stream == -1) - return 0; - if (!stream && nid != codec->afg) - stream = snd_hda_param_read(codec, codec->afg, AC_PAR_STREAM); - if (!stream || stream == -1) + stream = query_stream_param(codec, nid); + if (!stream) return 0; if (stream & AC_SUPFMT_PCM) { -- cgit v0.10.2 From b613291fb21a2d74eb8323d97fe9aa5d281b306c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 24 Mar 2009 07:36:09 +0100 Subject: ALSA: hda - Retry codec-verbs at errors The current error-recovery scheme for the codec communication errors doesn't work always well. Especially falling back to the single-command mode causes the fatal problem on many systems. In this patch, the problematic verb is re-issued again after the error (even with polling mode) instead of the single-cmd mode. The single-cmd mode will be used only when specified via the command option explicitly, mainly just for testing. Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 3d8bf39..1736ccb 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -174,14 +174,23 @@ unsigned int snd_hda_codec_read(struct hda_codec *codec, hda_nid_t nid, unsigned int verb, unsigned int parm) { struct hda_bus *bus = codec->bus; - unsigned int res; + unsigned int cmd, res; + int repeated = 0; - res = make_codec_cmd(codec, nid, direct, verb, parm); + cmd = make_codec_cmd(codec, nid, direct, verb, parm); snd_hda_power_up(codec); mutex_lock(&bus->cmd_mutex); - if (!bus->ops.command(bus, res)) + again: + if (!bus->ops.command(bus, cmd)) { res = bus->ops.get_response(bus); - else + if (res == -1 && bus->rirb_error) { + if (repeated++ < 1) { + snd_printd(KERN_WARNING "hda_codec: " + "Trying verb 0x%08x again\n", cmd); + goto again; + } + } + } else res = (unsigned int)-1; mutex_unlock(&bus->cmd_mutex); snd_hda_power_down(codec); diff --git a/sound/pci/hda/hda_codec.h b/sound/pci/hda/hda_codec.h index 2fdecf4..cd8979c 100644 --- a/sound/pci/hda/hda_codec.h +++ b/sound/pci/hda/hda_codec.h @@ -623,6 +623,7 @@ struct hda_bus { /* misc op flags */ unsigned int needs_damn_long_delay :1; unsigned int shutdown :1; /* being unloaded */ + unsigned int rirb_error:1; /* error in codec communication */ }; /* diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 30829ee..803b720 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -604,6 +604,7 @@ static unsigned int azx_rirb_get_response(struct hda_bus *bus) } if (!chip->rirb.cmds) { smp_rmb(); + bus->rirb_error = 0; return chip->rirb.res; /* the last value */ } if (time_after(jiffies, timeout)) @@ -623,8 +624,10 @@ static unsigned int azx_rirb_get_response(struct hda_bus *bus) chip->irq = -1; pci_disable_msi(chip->pci); chip->msi = 0; - if (azx_acquire_irq(chip, 1) < 0) + if (azx_acquire_irq(chip, 1) < 0) { + bus->rirb_error = 1; return -1; + } goto again; } @@ -644,14 +647,12 @@ static unsigned int azx_rirb_get_response(struct hda_bus *bus) return -1; } - snd_printk(KERN_ERR "hda_intel: azx_get_response timeout, " - "switching to single_cmd mode: last cmd=0x%08x\n", - chip->last_cmd); - chip->rirb.rp = azx_readb(chip, RIRBWP); - chip->rirb.cmds = 0; - /* switch to single_cmd mode */ - chip->single_cmd = 1; - azx_free_cmd_io(chip); + snd_printk(KERN_ERR "hda_intel: azx_get_response timeout (ERROR): " + "last cmd=0x%08x\n", chip->last_cmd); + spin_lock_irq(&chip->reg_lock); + chip->rirb.cmds = 0; /* reset the index */ + bus->rirb_error = 1; + spin_unlock_irq(&chip->reg_lock); return -1; } -- cgit v0.10.2 From 586be3fcf97eec22fbc0ef6d67e823706aea7167 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 24 Mar 2009 07:43:24 +0100 Subject: ALSA: hda - Add debug prints for Realtek auto-init Added a couple of debug prints to show the checked id numbers in alc_subsystem_id(). Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 8209779..ee92c73 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -1022,6 +1022,9 @@ static void alc_subsystem_id(struct hda_codec *codec, if (codec->vendor_id == 0x10ec0260) nid = 0x17; ass = snd_hda_codec_get_pincfg(codec, nid); + snd_printd("realtek: No valid SSID, " + "checking pincfg 0x%08x for NID 0x%x\n", + nid, ass); if (!(ass & 1) && !(ass & 0x100000)) return; if ((ass >> 30) != 1) /* no physical connection */ @@ -1036,6 +1039,8 @@ static void alc_subsystem_id(struct hda_codec *codec, if (((ass >> 16) & 0xf) != tmp) return; do_sku: + snd_printd("realtek: Enabling init ASM_ID=0x%04x CODEC_ID=%08x\n", + ass & 0xffff, codec->vendor_id); /* * 0 : override * 1 : Swap Jack -- cgit v0.10.2 From e8082f3f5a17d7a7bfc7dd1050a3f958dc034e9a Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 21 Apr 2009 17:11:46 +0800 Subject: tracing/filters: don't remove old filters when failed to write subsys->filter If writing subsys->filter returns EINVAL or ENOSPC, the original filters in subsys/ and subsys/events/ will be removed. This is definitely wrong. [ Impact: fix filter setting semantics on error condition ] Signed-off-by: Li Zefan Cc: Tom Zanussi Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <49ED8DD2.2070700@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 672b195..9ea55a7 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -600,7 +600,6 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, err = filter_add_subsystem_pred(system, pred); if (err < 0) { - filter_free_subsystem_preds(system); filter_free_pred(pred); return err; } -- cgit v0.10.2 From f66578a7637b87810cbb9041c4e3a77fd2fa4706 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 21 Apr 2009 17:12:11 +0800 Subject: tracing/filters: allow user-input to be integer-like string Suppose we would like to trace all tasks named '123', but this will fail: # echo 'parent_comm == 123' > events/sched/sched_process_fork/filter bash: echo: write error: Invalid argument Don't guess the type of the filter pred in filter_parse(), but instead we check it in __filter_add_pred(). [ Impact: extend allowed filter field string values ] Signed-off-by: Li Zefan Cc: Tom Zanussi Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <49ED8DEB.6000700@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index e0fcfd2..6541828 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -313,6 +313,7 @@ static int __filter_add_pred(struct ftrace_event_call *call, { struct ftrace_event_field *field; filter_pred_fn_t fn; + unsigned long long val; field = find_event_field(call, pred->field_name); if (!field) @@ -322,14 +323,13 @@ static int __filter_add_pred(struct ftrace_event_call *call, pred->offset = field->offset; if (is_string_field(field->type)) { - if (!pred->str_len) - return -EINVAL; fn = filter_pred_string; pred->str_len = field->size; return filter_add_pred_fn(call, pred, fn); } else { - if (pred->str_len) + if (strict_strtoull(pred->str_val, 0, &val)) return -EINVAL; + pred->val = val; } switch (field->size) { @@ -413,12 +413,16 @@ int filter_add_subsystem_pred(struct event_subsystem *system, return 0; } +/* + * The filter format can be + * - 0, which means remove all filter preds + * - [||/&&] ==/!= + */ int filter_parse(char **pbuf, struct filter_pred *pred) { - char *tmp, *tok, *val_str = NULL; + char *tok, *val_str = NULL; int tok_n = 0; - /* field ==/!= number, or/and field ==/!= number, number */ while ((tok = strsep(pbuf, " \n"))) { if (tok_n == 0) { if (!strcmp(tok, "0")) { @@ -478,19 +482,13 @@ int filter_parse(char **pbuf, struct filter_pred *pred) return -EINVAL; } + strcpy(pred->str_val, val_str); + pred->str_len = strlen(val_str); + pred->field_name = kstrdup(pred->field_name, GFP_KERNEL); if (!pred->field_name) return -ENOMEM; - pred->str_len = 0; - pred->val = simple_strtoull(val_str, &tmp, 0); - if (tmp == val_str) { - strncpy(pred->str_val, val_str, MAX_FILTER_STR_VAL); - pred->str_len = strlen(val_str); - pred->str_val[pred->str_len] = '\0'; - } else if (*tmp != '\0') - return -EINVAL; - return 0; } -- cgit v0.10.2 From a3b48c88f2d5a34c0e25aec0a3dab8069e5a9a72 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 21 Apr 2009 13:37:29 +0200 Subject: ALSA: hda - minor optimization in hda_set_power_state() Check the target power-state before checking EAPD exception to reduce unneeded verb executions. Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index b649033..b91f6ed 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -2348,7 +2348,8 @@ static void hda_set_power_state(struct hda_codec *codec, hda_nid_t fg, if (wcaps & AC_WCAP_POWER) { unsigned int wid_type = (wcaps & AC_WCAP_TYPE) >> AC_WCAP_TYPE_SHIFT; - if (wid_type == AC_WID_PIN) { + if (power_state == AC_PWRST_D3 && + wid_type == AC_WID_PIN) { unsigned int pincap; /* * don't power down the widget if it controls @@ -2360,7 +2361,7 @@ static void hda_set_power_state(struct hda_codec *codec, hda_nid_t fg, nid, 0, AC_VERB_GET_EAPD_BTLENABLE, 0); eapd &= 0x02; - if (power_state == AC_PWRST_D3 && eapd) + if (eapd) continue; } } -- cgit v0.10.2 From 3554228d4289098a8fe5cfd87512ec32a19bbe5a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 21 Apr 2009 09:41:26 -0400 Subject: ring-buffer: only warn on wrap if buffer is bigger than two pages On boot up, to save memory, ftrace allocates the minimum buffer which is two pages. Ftrace also goes through a series of tests (when configured) on boot up. These tests can fill up a page within a single interrupt. The ring buffer also has a WARN_ON when it detects that the buffer was completely filled within a single commit (other commits are allowed to be nested). Combine the small buffer on start up, with the tests that can fill more than a single page within an interrupt, this can trigger the WARN_ON. This patch makes the WARN_ON only happen when the ring buffer consists of more than two pages. [ Impact: prevent false WARN_ON in ftrace startup tests ] Reported-by: Ingo Molnar LKML-Reference: <20090421094616.GA14561@elte.hu> Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 7bcfd3e..61dbdf2 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1241,7 +1241,8 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, * about it. */ if (unlikely(next_page == commit_page)) { - WARN_ON_ONCE(1); + /* This can easily happen on small ring buffers */ + WARN_ON_ONCE(buffer->pages > 2); goto out_reset; } -- cgit v0.10.2 From dfed0ef9b3ff9e37903920b6938ed33344ad0b3d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 21 Apr 2009 18:33:12 +0200 Subject: ALSA: hda - Fix a typo in debug print for realtek auto-detection The NID and ASS numbers were swapped... Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index a6ec87a..8877120 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -1026,7 +1026,7 @@ static void alc_subsystem_id(struct hda_codec *codec, ass = snd_hda_codec_get_pincfg(codec, nid); snd_printd("realtek: No valid SSID, " "checking pincfg 0x%08x for NID 0x%x\n", - nid, ass); + ass, nid); if (!(ass & 1) && !(ass & 0x100000)) return; if ((ass >> 30) != 1) /* no physical connection */ -- cgit v0.10.2 From 19bdc9d061bcb71efd2b53083d96b59bbe1a1751 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Fri, 17 Apr 2009 05:26:31 +0000 Subject: clocksource: sh_cmt clocksource support Add clocksource support to the sh_cmt driver. With this in place we can do tickless with a single CMT channel. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c index 02bae39..c247564 100644 --- a/drivers/clocksource/sh_cmt.c +++ b/drivers/clocksource/sh_cmt.c @@ -47,6 +47,7 @@ struct sh_cmt_priv { unsigned long rate; spinlock_t lock; struct clock_event_device ced; + struct clocksource cs; unsigned long total_cycles; }; @@ -376,6 +377,68 @@ static void sh_cmt_stop(struct sh_cmt_priv *p, unsigned long flag) spin_unlock_irqrestore(&p->lock, flags); } +static struct sh_cmt_priv *cs_to_sh_cmt(struct clocksource *cs) +{ + return container_of(cs, struct sh_cmt_priv, cs); +} + +static cycle_t sh_cmt_clocksource_read(struct clocksource *cs) +{ + struct sh_cmt_priv *p = cs_to_sh_cmt(cs); + unsigned long flags, raw; + unsigned long value; + int has_wrapped; + + spin_lock_irqsave(&p->lock, flags); + value = p->total_cycles; + raw = sh_cmt_get_counter(p, &has_wrapped); + + if (unlikely(has_wrapped)) + raw = p->match_value; + spin_unlock_irqrestore(&p->lock, flags); + + return value + raw; +} + +static int sh_cmt_clocksource_enable(struct clocksource *cs) +{ + struct sh_cmt_priv *p = cs_to_sh_cmt(cs); + int ret; + + p->total_cycles = 0; + + ret = sh_cmt_start(p, FLAG_CLOCKSOURCE); + if (ret) + return ret; + + /* TODO: calculate good shift from rate and counter bit width */ + cs->shift = 0; + cs->mult = clocksource_hz2mult(p->rate, cs->shift); + return 0; +} + +static void sh_cmt_clocksource_disable(struct clocksource *cs) +{ + sh_cmt_stop(cs_to_sh_cmt(cs), FLAG_CLOCKSOURCE); +} + +static int sh_cmt_register_clocksource(struct sh_cmt_priv *p, + char *name, unsigned long rating) +{ + struct clocksource *cs = &p->cs; + + cs->name = name; + cs->rating = rating; + cs->read = sh_cmt_clocksource_read; + cs->enable = sh_cmt_clocksource_enable; + cs->disable = sh_cmt_clocksource_disable; + cs->mask = CLOCKSOURCE_MASK(sizeof(unsigned long) * 8); + cs->flags = CLOCK_SOURCE_IS_CONTINUOUS; + pr_info("sh_cmt: %s used as clock source\n", cs->name); + clocksource_register(cs); + return 0; +} + static struct sh_cmt_priv *ced_to_sh_cmt(struct clock_event_device *ced) { return container_of(ced, struct sh_cmt_priv, ced); @@ -483,6 +546,9 @@ int sh_cmt_register(struct sh_cmt_priv *p, char *name, if (clockevent_rating) sh_cmt_register_clockevent(p, name, clockevent_rating); + if (clocksource_rating) + sh_cmt_register_clocksource(p, name, clocksource_rating); + return 0; } -- cgit v0.10.2 From 7a4f453b6d7379a7c380825949977c5a838aa012 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 22 Apr 2009 16:53:34 +0800 Subject: tracing/events: make struct trace_entry->type to be int type struct trace_entry->type is unsigned char, while trace event's id is int type, thus for a event with id >= 256, it's entry->type is cast to (id % 256), and then we can't see the trace output of this event. # insmod trace-events-sample.ko # echo foo_bar > /mnt/tracing/set_event # cat /debug/tracing/events/trace-events-sample/foo_bar/id 256 # cat /mnt/tracing/trace_pipe <...>-3548 [001] 215.091142: Unknown type 0 <...>-3548 [001] 216.089207: Unknown type 0 <...>-3548 [001] 217.087271: Unknown type 0 <...>-3548 [001] 218.085332: Unknown type 0 [ Impact: fix output for trace events with id >= 256 ] Signed-off-by: Li Zefan Acked-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Tom Zanussi LKML-Reference: <49EEDB0E.5070207@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 75f3ac0..2a4a407 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -16,7 +16,7 @@ struct dentry; * bash-15816 [01] 235.197585: idle_cpu <- irq_enter */ struct trace_entry { - unsigned char type; + int type; unsigned char flags; unsigned char preempt_count; int pid; @@ -73,7 +73,7 @@ enum print_line_t { struct ring_buffer_event * -trace_current_buffer_lock_reserve(unsigned char type, unsigned long len, +trace_current_buffer_lock_reserve(int type, unsigned long len, unsigned long flags, int pc); void trace_current_buffer_unlock_commit(struct ring_buffer_event *event, unsigned long flags, int pc); diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 39a3351..15ef08d 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -198,7 +198,7 @@ ftrace_define_fields_##call(void) \ struct ftrace_event_call *event_call = &event_##call; \ int ret; \ \ - __common_field(unsigned char, type); \ + __common_field(int, type); \ __common_field(unsigned char, flags); \ __common_field(unsigned char, preempt_count); \ __common_field(int, pid); \ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b9a3adc..b6183bc 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -838,7 +838,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, } struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, - unsigned char type, + int type, unsigned long len, unsigned long flags, int pc) { @@ -881,7 +881,7 @@ void trace_buffer_unlock_commit(struct trace_array *tr, } struct ring_buffer_event * -trace_current_buffer_lock_reserve(unsigned char type, unsigned long len, +trace_current_buffer_lock_reserve(int type, unsigned long len, unsigned long flags, int pc) { return trace_buffer_lock_reserve(&global_trace, diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 247948e..7d55bcf 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -422,7 +422,7 @@ void init_tracer_sysprof_debugfs(struct dentry *d_tracer); struct ring_buffer_event; struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, - unsigned char type, + int type, unsigned long len, unsigned long flags, int pc); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 9ea55a7..5d6e879 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -398,7 +398,7 @@ static int trace_write_header(struct trace_seq *s) "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" "\n", - FIELD(unsigned char, type), + FIELD(int, type), FIELD(unsigned char, flags), FIELD(unsigned char, preempt_count), FIELD(int, pid), -- cgit v0.10.2 From ff166cb57a17124af75714a9c11f448f56f1a4a3 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 20 Apr 2009 13:02:30 -0700 Subject: x86: x2apic, IR: remove reinit_intr_remapped_IO_APIC() When interrupt-remapping is enabled, we are relying on setup_IO_APIC_irqs() to configure remapped entries in the IO-APIC, which comes little bit later after enabling interrupt-remapping. Meanwhile, restoration of old io-apic entries after enabling interrupt-remapping will not make the interrupts through io-apic functional anyway. So remove the unnecessary reinit_intr_remapped_IO_APIC() step. The longer story: When interrupt-remapping is enabled, IO-APIC entries need to be setup in the re-mappable format (pointing to interrupt-remapping table entries setup by the OS). This remapping configuration is happening in the same place where we traditionally configure IO-APIC (i.e., in setup_IO_APIC_irqs()). So when we enable interrupt-remapping successfully, there is no need to restore old io-apic RTE entries before we actually do a complete configuration shortly in setup_IO_APIC_irqs(). Old IO-APIC RTE's may be in traditional format (non re-mappable) or in re-mappable format pointing to interrupt-remapping table entries setup by BIOS. Restoring both of these will not make IO-APIC functional. We have to rely on setup_IO_APIC_irqs() for proper configuration by OS. So I am removing this unnecessary and broken step. [ Impact: remove unnecessary/broken IO-APIC setup step ] Signed-off-by: Suresh Siddha Acked-by: Weidong Han Cc: dwmw2@infradead.org LKML-Reference: <20090420200450.552359000@linux-os.sc.intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 34eaa37..1cf1450 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -166,8 +166,6 @@ extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries); extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); -extern void reinit_intr_remapped_IO_APIC(int intr_remapping, - struct IO_APIC_route_entry **ioapic_entries); extern void probe_nr_irqs_gsi(void); diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index d32f558..1386dbe 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1412,8 +1412,6 @@ end_restore: * IR enabling failed */ restore_IO_APIC_setup(ioapic_entries); - else - reinit_intr_remapped_IO_APIC(x2apic_preenabled, ioapic_entries); unmask_8259A(); local_irq_restore(flags); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 4baa9cb..8aef5f9 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -833,20 +833,6 @@ int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries) return 0; } -void reinit_intr_remapped_IO_APIC(int intr_remapping, - struct IO_APIC_route_entry **ioapic_entries) - -{ - /* - * for now plain restore of previous settings. - * TBD: In the case of OS enabling interrupt-remapping, - * IO-APIC RTE's need to be setup to point to interrupt-remapping - * table entries. for now, do a plain restore, and wait for - * the setup_IO_APIC_irqs() to do proper initialization. - */ - restore_IO_APIC_setup(ioapic_entries); -} - void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries) { int apic; -- cgit v0.10.2 From 9cbf117662e24c6d33245666804487f92c21b59d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 19 Apr 2009 04:51:29 +0200 Subject: tracing/events: provide string with undefined size support This patch provides the support for dynamic size strings on event tracing. The key concept is to use a structure with an ending char array field of undefined size and use such ability to allocate the minimal size on the ring buffer to make one or more string entries fit inside, as opposite to a fixed length strings with upper bound. The strings themselves are represented using fields which have an offset value from the beginning of the entry. This patch provides three new macros: __string(item, src) This one declares a string to the structure inside TP_STRUCT__entry. You need to provide the name of the string field and the source that will be copied inside. This will also add the dynamic size of the string needed for the ring buffer entry allocation. A stack allocated structure is used to temporarily store the offset of each strings, avoiding double calls to strlen() on each event insertion. __get_str(field) This one will give you a pointer to the string you have created. This is an abstract helper to resolve the absolute address given the field name which is a relative address from the beginning of the trace_structure. __assign_str(dst, src) Use this macro to automatically perform the string copy from src to dst. src must be a variable to assign and dst is the name of a __string field. Example on how to use it: TRACE_EVENT(my_event, TP_PROTO(char *src1, char *src2), TP_ARGS(src1, src2), TP_STRUCT__entry( __string(str1, src1) __string(str2, src2) ), TP_fast_assign( __assign_str(str1, src1); __assign_str(str2, src2); ), TP_printk("%s %s", __get_str(src1), __get_str(src2)) ) Of course you can mix-up any __field or __array inside this TRACE_EVENT. The position of the __string or __assign_str doesn't matter. Changes in v2: Address the suggestion of Steven Rostedt: drop the opening_string() macro and redefine __ending_string() to get the size of the string to be copied instead of overwritting the whole ring buffer allocation. Changes in v3: Address other suggestions of Steven Rostedt and Peter Zijlstra with some changes: drop the __ending_string and the need to have only one string field. Use offsets instead of absolute addresses. [ Impact: allow more compact memory usage for string tracing ] Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Li Zefan Cc: Peter Zijlstra diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 15ef08d..5a7d18c 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -27,6 +27,9 @@ #undef __field #define __field(type, item) type item; +#undef __string +#define __string(item, src) int __str_loc_##item; + #undef TP_STRUCT__entry #define TP_STRUCT__entry(args...) args @@ -35,14 +38,53 @@ struct ftrace_raw_##name { \ struct trace_entry ent; \ tstruct \ + char __str_data[0]; \ }; \ static struct ftrace_event_call event_##name #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + /* * Stage 2 of the trace events. * + * Include the following: + * + * struct ftrace_str_offsets_ { + * int ; + * int ; + * [...] + * }; + * + * The __string() macro will create each int , this is to + * keep the offset of each string from the beggining of the event + * once we perform the strlen() of the src strings. + * + */ + +#undef TRACE_FORMAT +#define TRACE_FORMAT(call, proto, args, fmt) + +#undef __array +#define __array(type, item, len) + +#undef __field +#define __field(type, item); + +#undef __string +#define __string(item, src) int item; + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ + struct ftrace_str_offsets_##call { \ + tstruct; \ + }; + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +/* + * Stage 3 of the trace events. + * * Override the macros in to include the following: * * enum print_line_t @@ -80,6 +122,9 @@ #undef TP_printk #define TP_printk(fmt, args...) fmt "\n", args +#undef __get_str +#define __get_str(field) (char *)__entry + __entry->__str_loc_##field + #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ enum print_line_t \ @@ -146,6 +191,16 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ if (!ret) \ return 0; +#undef __string +#define __string(item, src) \ + ret = trace_seq_printf(s, "\tfield: __str_loc " #item ";\t" \ + "offset:%u;tsize:%u;\n", \ + (unsigned int)offsetof(typeof(field), \ + __str_loc_##item), \ + (unsigned int)sizeof(field.__str_loc_##item)); \ + if (!ret) \ + return 0; + #undef __entry #define __entry REC @@ -189,6 +244,12 @@ ftrace_format_##call(struct trace_seq *s) \ if (ret) \ return ret; +#undef __string +#define __string(item, src) \ + ret = trace_define_field(event_call, "__str_loc", #item, \ + offsetof(typeof(field), __str_loc_##item), \ + sizeof(field.__str_loc_##item)); + #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, func, print) \ int \ @@ -212,7 +273,7 @@ ftrace_define_fields_##call(void) \ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* - * Stage 3 of the trace events. + * Stage 4 of the trace events. * * Override the macros in to include the following: * @@ -409,6 +470,23 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #undef __entry #define __entry entry +#undef __field +#define __field(type, item) + +#undef __array +#define __array(type, item, len) + +#undef __string +#define __string(item, src) \ + __str_offsets.item = __str_size + \ + offsetof(typeof(*entry), __str_data); \ + __str_size += strlen(src) + 1; + +#undef __assign_str +#define __assign_str(dst, src) \ + __entry->__str_loc_##dst = __str_offsets.dst; \ + strcpy(__get_str(dst), src); + #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ _TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \ @@ -417,18 +495,22 @@ static struct ftrace_event_call event_##call; \ \ static void ftrace_raw_event_##call(proto) \ { \ + struct ftrace_str_offsets_##call __maybe_unused __str_offsets; \ struct ftrace_event_call *call = &event_##call; \ struct ring_buffer_event *event; \ struct ftrace_raw_##call *entry; \ unsigned long irq_flags; \ + int __str_size = 0; \ int pc; \ \ local_save_flags(irq_flags); \ pc = preempt_count(); \ \ + tstruct; \ + \ event = trace_current_buffer_lock_reserve(event_##call.id, \ - sizeof(struct ftrace_raw_##call), \ - irq_flags, pc); \ + sizeof(struct ftrace_raw_##call) + __str_size,\ + irq_flags, pc); \ if (!event) \ return; \ entry = ring_buffer_event_data(event); \ -- cgit v0.10.2 From 7e7ca9a22dbbc5c91763cd16923c7509918709b6 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 19 Apr 2009 04:54:49 +0200 Subject: tracing/lock: provide lock_acquired event support for dynamic size string Now that we can support the dynamic sized string, make the lock tracing able to use it, making it safe against modules removal and consuming the right amount of memory needed for each lock name Changes in v2: adapt to the __ending_string() updates and the opening_string() removal. [ Impact: protect lock tracer against module removal ] Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Steven Rostedt diff --git a/include/trace/events/lockdep.h b/include/trace/events/lockdep.h index 45e326b..3ca315c 100644 --- a/include/trace/events/lockdep.h +++ b/include/trace/events/lockdep.h @@ -38,16 +38,16 @@ TRACE_EVENT(lock_acquired, TP_ARGS(lock, ip, waittime), TP_STRUCT__entry( - __field(const char *, name) + __string(name, lock->name) __field(unsigned long, wait_usec) __field(unsigned long, wait_nsec_rem) ), TP_fast_assign( - __entry->name = lock->name; + __assign_str(name, lock->name); __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC); __entry->wait_usec = (unsigned long) waittime; ), - TP_printk("%s (%lu.%03lu us)", __entry->name, __entry->wait_usec, + TP_printk("%s (%lu.%03lu us)", __get_str(name), __entry->wait_usec, __entry->wait_nsec_rem) ); -- cgit v0.10.2 From 6a74aa40907757ec98d8710ff66cd4cfe064e7d8 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 22 Apr 2009 00:41:09 +0200 Subject: tracing/events: protect __get_str() The __get_str() macro is used in a code part then its content should be protected with parenthesis. [ Impact: make macro definition more robust ] Reported-by: Steven Rostedt Signed-off-by: Frederic Weisbecker diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 5a7d18c..a77f71a 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -123,7 +123,7 @@ #define TP_printk(fmt, args...) fmt "\n", args #undef __get_str -#define __get_str(field) (char *)__entry + __entry->__str_loc_##field +#define __get_str(field) ((char *)__entry + __entry->__str_loc_##field) #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ -- cgit v0.10.2 From 1b4246a1fc487370665bf6c55aac8eae379642c0 Mon Sep 17 00:00:00 2001 From: Joonyoung Shim Date: Wed, 22 Apr 2009 10:56:50 +0900 Subject: ASoC: OMAP: Add checking to detect bufferless pcms Add checking in hw_params and prepare to detect bufferless pcms(i.e. BT <--> codec). Signed-off-by: Joonyoung Shim Signed-off-by: Mark Brown diff --git a/sound/soc/omap/omap-pcm.c b/sound/soc/omap/omap-pcm.c index 07cf7f4..6454e15 100644 --- a/sound/soc/omap/omap-pcm.c +++ b/sound/soc/omap/omap-pcm.c @@ -87,8 +87,10 @@ static int omap_pcm_hw_params(struct snd_pcm_substream *substream, struct omap_pcm_dma_data *dma_data = rtd->dai->cpu_dai->dma_data; int err = 0; + /* return if this is a bufferless transfer e.g. + * codec <--> BT codec or GSM modem -- lg FIXME */ if (!dma_data) - return -ENODEV; + return 0; snd_pcm_set_runtime_buffer(substream, &substream->dma_buffer); runtime->dma_bytes = params_buffer_bytes(params); @@ -134,6 +136,11 @@ static int omap_pcm_prepare(struct snd_pcm_substream *substream) struct omap_pcm_dma_data *dma_data = prtd->dma_data; struct omap_dma_channel_params dma_params; + /* return if this is a bufferless transfer e.g. + * codec <--> BT codec or GSM modem -- lg FIXME */ + if (!prtd->dma_data) + return 0; + memset(&dma_params, 0, sizeof(dma_params)); /* * Note: Regardless of interface data formats supported by OMAP McBSP -- cgit v0.10.2 From 246d0a17f5e09af0794960164269fc8988a8811c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 22 Apr 2009 18:24:55 +0100 Subject: ASoC: Add power supply widget to DAPM Many modern CODECs have shared resources on chip which must be enabled for portions of the chip to work but which can be disabled at other times in order to achieve power savings. Examples of such resources include power supplies and some internal clocks. Since these widgets are dependencies for the audio path but do not carry audio signals they require slightly different handling to most widgets - they do not contribute to the audio path and so should not be counted as either inputs or outputs during path walks. Cases where one supply provides a supply for another will require additional work. There is also room for more optimisation of the graph walking to avoid repeated checks for the same thing. Signed-off-by: Mark Brown diff --git a/Documentation/sound/alsa/soc/dapm.txt b/Documentation/sound/alsa/soc/dapm.txt index 9e67632..9ac842b 100644 --- a/Documentation/sound/alsa/soc/dapm.txt +++ b/Documentation/sound/alsa/soc/dapm.txt @@ -62,6 +62,7 @@ Audio DAPM widgets fall into a number of types:- o Mic - Mic (and optional Jack) o Line - Line Input/Output (and optional Jack) o Speaker - Speaker + o Supply - Power or clock supply widget used by other widgets. o Pre - Special PRE widget (exec before all others) o Post - Special POST widget (exec after all others) diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index 839a97b..533f9f2 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -154,12 +154,16 @@ .shift = wshift, .invert = winvert, \ .event = wevent, .event_flags = wflags} -/* generic register modifier widget */ +/* generic widgets */ #define SND_SOC_DAPM_REG(wid, wname, wreg, wshift, wmask, won_val, woff_val) \ { .id = wid, .name = wname, .kcontrols = NULL, .num_kcontrols = 0, \ .reg = -((wreg) + 1), .shift = wshift, .mask = wmask, \ .on_val = won_val, .off_val = woff_val, .event = dapm_reg_event, \ .event_flags = SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD} +#define SND_SOC_DAPM_SUPPLY(wname, wreg, wshift, winvert, wevent, wflags) \ +{ .id = snd_soc_dapm_supply, .name = wname, .reg = wreg, \ + .shift = wshift, .invert = winvert, .event = wevent, \ + .event_flags = wflags} /* dapm kcontrol types */ #define SOC_DAPM_SINGLE(xname, reg, shift, max, invert) \ @@ -308,6 +312,7 @@ enum snd_soc_dapm_type { snd_soc_dapm_vmid, /* codec bias/vmid - to minimise pops */ snd_soc_dapm_pre, /* machine specific pre widget - exec first */ snd_soc_dapm_post, /* machine specific post widget - exec last */ + snd_soc_dapm_supply, /* power/clock supply */ }; /* diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index d3d1735..7847f80 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -52,17 +52,19 @@ /* dapm power sequences - make this per codec in the future */ static int dapm_up_seq[] = { - snd_soc_dapm_pre, snd_soc_dapm_micbias, snd_soc_dapm_mic, - snd_soc_dapm_mux, snd_soc_dapm_value_mux, snd_soc_dapm_dac, - snd_soc_dapm_mixer, snd_soc_dapm_mixer_named_ctl, snd_soc_dapm_pga, - snd_soc_dapm_adc, snd_soc_dapm_hp, snd_soc_dapm_spk, snd_soc_dapm_post + snd_soc_dapm_pre, snd_soc_dapm_supply, snd_soc_dapm_micbias, + snd_soc_dapm_mic, snd_soc_dapm_mux, snd_soc_dapm_value_mux, + snd_soc_dapm_dac, snd_soc_dapm_mixer, snd_soc_dapm_mixer_named_ctl, + snd_soc_dapm_pga, snd_soc_dapm_adc, snd_soc_dapm_hp, snd_soc_dapm_spk, + snd_soc_dapm_post }; static int dapm_down_seq[] = { snd_soc_dapm_pre, snd_soc_dapm_adc, snd_soc_dapm_hp, snd_soc_dapm_spk, snd_soc_dapm_pga, snd_soc_dapm_mixer_named_ctl, snd_soc_dapm_mixer, snd_soc_dapm_dac, snd_soc_dapm_mic, snd_soc_dapm_micbias, - snd_soc_dapm_mux, snd_soc_dapm_value_mux, snd_soc_dapm_post + snd_soc_dapm_mux, snd_soc_dapm_value_mux, snd_soc_dapm_supply, + snd_soc_dapm_post }; static int dapm_status = 1; @@ -165,6 +167,7 @@ static void dapm_set_path_status(struct snd_soc_dapm_widget *w, case snd_soc_dapm_dac: case snd_soc_dapm_micbias: case snd_soc_dapm_vmid: + case snd_soc_dapm_supply: p->connect = 1; break; /* does effect routing - dynamically connected */ @@ -435,6 +438,9 @@ static int is_connected_output_ep(struct snd_soc_dapm_widget *widget) struct snd_soc_dapm_path *path; int con = 0; + if (widget->id == snd_soc_dapm_supply) + return 0; + if (widget->id == snd_soc_dapm_adc && widget->active) return 1; @@ -471,6 +477,9 @@ static int is_connected_input_ep(struct snd_soc_dapm_widget *widget) struct snd_soc_dapm_path *path; int con = 0; + if (widget->id == snd_soc_dapm_supply) + return 0; + /* active stream ? */ if (widget->id == snd_soc_dapm_dac && widget->active) return 1; @@ -622,6 +631,26 @@ static int dapm_dac_check_power(struct snd_soc_dapm_widget *w) } } +/* Check to see if a power supply is needed */ +static int dapm_supply_check_power(struct snd_soc_dapm_widget *w) +{ + struct snd_soc_dapm_path *path; + int power = 0; + + /* Check if one of our outputs is connected */ + list_for_each_entry(path, &w->sinks, list_source) { + if (path->sink && path->sink->power_check && + path->sink->power_check(path->sink)) { + power = 1; + break; + } + } + + dapm_clear_walk(w->codec); + + return power; +} + /* * Scan a single DAPM widget for a complete audio path and update the * power status appropriately. @@ -752,6 +781,7 @@ static void dbg_dump_dapm(struct snd_soc_codec* codec, const char *action) case snd_soc_dapm_pga: case snd_soc_dapm_mixer: case snd_soc_dapm_mixer_named_ctl: + case snd_soc_dapm_supply: if (w->name) { in = is_connected_input_ep(w); dapm_clear_walk(w->codec); @@ -880,6 +910,7 @@ static ssize_t dapm_widget_show(struct device *dev, case snd_soc_dapm_pga: case snd_soc_dapm_mixer: case snd_soc_dapm_mixer_named_ctl: + case snd_soc_dapm_supply: if (w->name) count += sprintf(buf + count, "%s: %s\n", w->name, w->power ? "On":"Off"); @@ -1044,6 +1075,7 @@ static int snd_soc_dapm_add_route(struct snd_soc_codec *codec, case snd_soc_dapm_vmid: case snd_soc_dapm_pre: case snd_soc_dapm_post: + case snd_soc_dapm_supply: list_add(&path->list, &codec->dapm_paths); list_add(&path->list_sink, &wsink->sources); list_add(&path->list_source, &wsource->sinks); @@ -1164,6 +1196,8 @@ int snd_soc_dapm_new_widgets(struct snd_soc_codec *codec) case snd_soc_dapm_line: w->power_check = dapm_generic_check_power; break; + case snd_soc_dapm_supply: + w->power_check = dapm_supply_check_power; case snd_soc_dapm_vmid: case snd_soc_dapm_pre: case snd_soc_dapm_post: -- cgit v0.10.2 From 42768a12822c3a0a6d7db69445281db975938294 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 22 Apr 2009 18:39:39 +0100 Subject: ASoC: Use DAPM supply widget for WM8903 charge pump Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c index c539184..a3a489d 100644 --- a/sound/soc/codecs/wm8903.c +++ b/sound/soc/codecs/wm8903.c @@ -217,7 +217,6 @@ struct wm8903_priv { int sysclk; /* Reference counts */ - int charge_pump_users; int class_w_users; int playback_active; int capture_active; @@ -373,6 +372,15 @@ static void wm8903_reset(struct snd_soc_codec *codec) #define WM8903_OUTPUT_INT 0x2 #define WM8903_OUTPUT_IN 0x1 +static int wm8903_cp_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + WARN_ON(event != SND_SOC_DAPM_POST_PMU); + mdelay(4); + + return 0; +} + /* * Event for headphone and line out amplifier power changes. Special * power up/down sequences are required in order to maximise pop/click @@ -382,12 +390,9 @@ static int wm8903_output_event(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event) { struct snd_soc_codec *codec = w->codec; - struct wm8903_priv *wm8903 = codec->private_data; - struct i2c_client *i2c = codec->control_data; u16 val; u16 reg; int shift; - u16 cp_reg = wm8903_read(codec, WM8903_CHARGE_PUMP_0); switch (w->reg) { case WM8903_POWER_MANAGEMENT_2: @@ -419,18 +424,6 @@ static int wm8903_output_event(struct snd_soc_dapm_widget *w, /* Short the output */ val &= ~(WM8903_OUTPUT_SHORT << shift); wm8903_write(codec, reg, val); - - wm8903->charge_pump_users++; - - dev_dbg(&i2c->dev, "Charge pump use count now %d\n", - wm8903->charge_pump_users); - - if (wm8903->charge_pump_users == 1) { - dev_dbg(&i2c->dev, "Enabling charge pump\n"); - wm8903_write(codec, WM8903_CHARGE_PUMP_0, - cp_reg | WM8903_CP_ENA); - mdelay(4); - } } if (event & SND_SOC_DAPM_POST_PMU) { @@ -464,19 +457,6 @@ static int wm8903_output_event(struct snd_soc_dapm_widget *w, wm8903_write(codec, reg, val); } - if (event & SND_SOC_DAPM_POST_PMD) { - wm8903->charge_pump_users--; - - dev_dbg(&i2c->dev, "Charge pump use count now %d\n", - wm8903->charge_pump_users); - - if (wm8903->charge_pump_users == 0) { - dev_dbg(&i2c->dev, "Disabling charge pump\n"); - wm8903_write(codec, WM8903_CHARGE_PUMP_0, - cp_reg & ~WM8903_CP_ENA); - } - } - return 0; } @@ -844,26 +824,28 @@ SND_SOC_DAPM_MIXER("Right Speaker Mixer", WM8903_POWER_MANAGEMENT_4, 0, 0, SND_SOC_DAPM_PGA_E("Left Headphone Output PGA", WM8903_POWER_MANAGEMENT_2, 1, 0, NULL, 0, wm8903_output_event, SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU | - SND_SOC_DAPM_PRE_PMD | SND_SOC_DAPM_POST_PMD), + SND_SOC_DAPM_PRE_PMD), SND_SOC_DAPM_PGA_E("Right Headphone Output PGA", WM8903_POWER_MANAGEMENT_2, 0, 0, NULL, 0, wm8903_output_event, SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU | - SND_SOC_DAPM_PRE_PMD | SND_SOC_DAPM_POST_PMD), + SND_SOC_DAPM_PRE_PMD), SND_SOC_DAPM_PGA_E("Left Line Output PGA", WM8903_POWER_MANAGEMENT_3, 1, 0, NULL, 0, wm8903_output_event, SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU | - SND_SOC_DAPM_PRE_PMD | SND_SOC_DAPM_POST_PMD), + SND_SOC_DAPM_PRE_PMD), SND_SOC_DAPM_PGA_E("Right Line Output PGA", WM8903_POWER_MANAGEMENT_3, 0, 0, NULL, 0, wm8903_output_event, SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU | - SND_SOC_DAPM_PRE_PMD | SND_SOC_DAPM_POST_PMD), + SND_SOC_DAPM_PRE_PMD), SND_SOC_DAPM_PGA("Left Speaker PGA", WM8903_POWER_MANAGEMENT_5, 1, 0, NULL, 0), SND_SOC_DAPM_PGA("Right Speaker PGA", WM8903_POWER_MANAGEMENT_5, 0, 0, NULL, 0), +SND_SOC_DAPM_SUPPLY("Charge Pump", WM8903_CHARGE_PUMP_0, 0, 0, + wm8903_cp_event, SND_SOC_DAPM_POST_PMU), }; static const struct snd_soc_dapm_route intercon[] = { @@ -951,6 +933,11 @@ static const struct snd_soc_dapm_route intercon[] = { { "ROP", NULL, "Right Speaker PGA" }, { "RON", NULL, "Right Speaker PGA" }, + + { "Left Headphone Output PGA", NULL, "Charge Pump" }, + { "Right Headphone Output PGA", NULL, "Charge Pump" }, + { "Left Line Output PGA", NULL, "Charge Pump" }, + { "Right Line Output PGA", NULL, "Charge Pump" }, }; static int wm8903_add_widgets(struct snd_soc_codec *codec) -- cgit v0.10.2 From c2aef4ffd24dab5c8e94c66e4042ad39d38bcf39 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 22 Apr 2009 20:04:44 +0100 Subject: ASoC: Support CLK_DSP in WM8903 CLK_DSP provides a master clock for the DAC and ADC related functionality on the device. Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c index a3a489d..27c8b94 100644 --- a/sound/soc/codecs/wm8903.c +++ b/sound/soc/codecs/wm8903.c @@ -846,6 +846,7 @@ SND_SOC_DAPM_PGA("Right Speaker PGA", WM8903_POWER_MANAGEMENT_5, 0, 0, SND_SOC_DAPM_SUPPLY("Charge Pump", WM8903_CHARGE_PUMP_0, 0, 0, wm8903_cp_event, SND_SOC_DAPM_POST_PMU), +SND_SOC_DAPM_SUPPLY("CLK_DSP", WM8903_CLOCK_RATES_2, 1, 0, NULL, 0), }; static const struct snd_soc_dapm_route intercon[] = { @@ -891,7 +892,12 @@ static const struct snd_soc_dapm_route intercon[] = { { "Right Input PGA", NULL, "Right Input Mode Mux" }, { "ADCL", NULL, "Left Input PGA" }, + { "ADCL", NULL, "CLK_DSP" }, { "ADCR", NULL, "Right Input PGA" }, + { "ADCR", NULL, "CLK_DSP" }, + + { "DACL", NULL, "CLK_DSP" }, + { "DACR", NULL, "CLK_DSP" }, { "Left Output Mixer", "Left Bypass Switch", "Left Input PGA" }, { "Left Output Mixer", "Right Bypass Switch", "Right Input PGA" }, -- cgit v0.10.2 From 4dbfe8097157fde1f8054f48f991ea45833852cd Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 22 Apr 2009 20:32:40 +0100 Subject: ASoC: Optimise configuration of WM8903 DC servo Modify the default startup sequence in the chip to set the DC servo dither level for optimal performance. Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c index 27c8b94..de0a585 100644 --- a/sound/soc/codecs/wm8903.c +++ b/sound/soc/codecs/wm8903.c @@ -978,6 +978,11 @@ static int wm8903_set_bias_level(struct snd_soc_codec *codec, wm8903_write(codec, WM8903_CLOCK_RATES_2, WM8903_CLK_SYS_ENA); + /* Change DC servo dither level in startup sequence */ + wm8903_write(codec, WM8903_WRITE_SEQUENCER_0, 0x11); + wm8903_write(codec, WM8903_WRITE_SEQUENCER_1, 0x1257); + wm8903_write(codec, WM8903_WRITE_SEQUENCER_2, 0x2); + wm8903_run_sequence(codec, 0); wm8903_sync_reg_cache(codec, codec->reg_cache); -- cgit v0.10.2 From d7d5c5476a12333a33b7a14ebb10eccc729c01cb Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 22 Apr 2009 21:03:50 +0100 Subject: ASoC: Actively manage the DC servo for WM8903 Save a little extra power by enabling the DC servo offset correction for the output channels only when the relevant channels are enabled. Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c index de0a585..0bab5c6 100644 --- a/sound/soc/codecs/wm8903.c +++ b/sound/soc/codecs/wm8903.c @@ -392,14 +392,18 @@ static int wm8903_output_event(struct snd_soc_dapm_widget *w, struct snd_soc_codec *codec = w->codec; u16 val; u16 reg; + u16 dcs_reg; + u16 dcs_bit; int shift; switch (w->reg) { case WM8903_POWER_MANAGEMENT_2: reg = WM8903_ANALOGUE_HP_0; + dcs_bit = 0 + w->shift; break; case WM8903_POWER_MANAGEMENT_3: reg = WM8903_ANALOGUE_LINEOUT_0; + dcs_bit = 2 + w->shift; break; default: BUG(); @@ -439,6 +443,11 @@ static int wm8903_output_event(struct snd_soc_dapm_widget *w, val |= (WM8903_OUTPUT_OUT << shift); wm8903_write(codec, reg, val); + /* Enable the DC servo */ + dcs_reg = wm8903_read(codec, WM8903_DC_SERVO_0); + dcs_reg |= dcs_bit; + wm8903_write(codec, WM8903_DC_SERVO_0, dcs_reg); + /* Remove the short */ val |= (WM8903_OUTPUT_SHORT << shift); wm8903_write(codec, reg, val); @@ -451,6 +460,11 @@ static int wm8903_output_event(struct snd_soc_dapm_widget *w, val &= ~(WM8903_OUTPUT_SHORT << shift); wm8903_write(codec, reg, val); + /* Disable the DC servo */ + dcs_reg = wm8903_read(codec, WM8903_DC_SERVO_0); + dcs_reg &= ~dcs_bit; + wm8903_write(codec, WM8903_DC_SERVO_0, dcs_reg); + /* Then disable the intermediate and output stages */ val &= ~((WM8903_OUTPUT_OUT | WM8903_OUTPUT_INT | WM8903_OUTPUT_IN) << shift); -- cgit v0.10.2 From 727fb909e541ebd09d5b552afef02a147978c151 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 22 Apr 2009 21:06:14 +0100 Subject: ASoC: Remove redundant rate constraint for WM8903 This is now handled by symmetric_rates. Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c index 0bab5c6..bec418a 100644 --- a/sound/soc/codecs/wm8903.c +++ b/sound/soc/codecs/wm8903.c @@ -1289,14 +1289,8 @@ static int wm8903_startup(struct snd_pcm_substream *substream, if (wm8903->master_substream) { master_runtime = wm8903->master_substream->runtime; - dev_dbg(&i2c->dev, "Constraining to %d bits at %dHz\n", - master_runtime->sample_bits, - master_runtime->rate); - - snd_pcm_hw_constraint_minmax(substream->runtime, - SNDRV_PCM_HW_PARAM_RATE, - master_runtime->rate, - master_runtime->rate); + dev_dbg(&i2c->dev, "Constraining to %d bits\n", + master_runtime->sample_bits); snd_pcm_hw_constraint_minmax(substream->runtime, SNDRV_PCM_HW_PARAM_SAMPLE_BITS, -- cgit v0.10.2 From 291ce18ceb84aca79368369885eec2d329ae16c5 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 22 Apr 2009 21:36:14 +0100 Subject: ASoC: Implement WM8903 digital sidetone support Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c index bec418a..d8a9222 100644 --- a/sound/soc/codecs/wm8903.c +++ b/sound/soc/codecs/wm8903.c @@ -533,6 +533,7 @@ static int wm8903_class_w_put(struct snd_kcontrol *kcontrol, /* ALSA can only do steps of .01dB */ static const DECLARE_TLV_DB_SCALE(digital_tlv, -7200, 75, 1); +static const DECLARE_TLV_DB_SCALE(digital_sidetone_tlv, -3600, 300, 0); static const DECLARE_TLV_DB_SCALE(out_tlv, -5700, 100, 0); static const DECLARE_TLV_DB_SCALE(drc_tlv_thresh, 0, 75, 0); @@ -651,6 +652,16 @@ static const struct soc_enum rinput_inv_enum = SOC_ENUM_SINGLE(WM8903_ANALOGUE_RIGHT_INPUT_1, 4, 3, rinput_mux_text); +static const char *sidetone_text[] = { + "None", "Left", "Right" +}; + +static const struct soc_enum lsidetone_enum = + SOC_ENUM_SINGLE(WM8903_DAC_DIGITAL_0, 2, 3, sidetone_text); + +static const struct soc_enum rsidetone_enum = + SOC_ENUM_SINGLE(WM8903_DAC_DIGITAL_0, 0, 3, sidetone_text); + static const struct snd_kcontrol_new wm8903_snd_controls[] = { /* Input PGAs - No TLV since the scale depends on PGA mode */ @@ -694,6 +705,9 @@ SOC_DOUBLE_R_TLV("Digital Capture Volume", WM8903_ADC_DIGITAL_VOLUME_LEFT, SOC_ENUM("ADC Companding Mode", adc_companding), SOC_SINGLE("ADC Companding Switch", WM8903_AUDIO_INTERFACE_0, 3, 1, 0), +SOC_DOUBLE_TLV("Digital Sidetone Volume", WM8903_DAC_DIGITAL_0, 4, 8, + 12, 0, digital_sidetone_tlv), + /* DAC */ SOC_DOUBLE_R_TLV("Digital Playback Volume", WM8903_DAC_DIGITAL_VOLUME_LEFT, WM8903_DAC_DIGITAL_VOLUME_RIGHT, 1, 120, 0, digital_tlv), @@ -756,6 +770,12 @@ static const struct snd_kcontrol_new rinput_mux = static const struct snd_kcontrol_new rinput_inv_mux = SOC_DAPM_ENUM("Right Inverting Input Mux", rinput_inv_enum); +static const struct snd_kcontrol_new lsidetone_mux = + SOC_DAPM_ENUM("DACL Sidetone Mux", lsidetone_enum); + +static const struct snd_kcontrol_new rsidetone_mux = + SOC_DAPM_ENUM("DACR Sidetone Mux", rsidetone_enum); + static const struct snd_kcontrol_new left_output_mixer[] = { SOC_DAPM_SINGLE("DACL Switch", WM8903_ANALOGUE_LEFT_MIX_0, 3, 1, 0), SOC_DAPM_SINGLE("DACR Switch", WM8903_ANALOGUE_LEFT_MIX_0, 2, 1, 0), @@ -822,6 +842,9 @@ SND_SOC_DAPM_PGA("Right Input PGA", WM8903_POWER_MANAGEMENT_0, 0, 0, NULL, 0), SND_SOC_DAPM_ADC("ADCL", "Left HiFi Capture", WM8903_POWER_MANAGEMENT_6, 1, 0), SND_SOC_DAPM_ADC("ADCR", "Right HiFi Capture", WM8903_POWER_MANAGEMENT_6, 0, 0), +SND_SOC_DAPM_MUX("DACL Sidetone", SND_SOC_NOPM, 0, 0, &lsidetone_mux), +SND_SOC_DAPM_MUX("DACR Sidetone", SND_SOC_NOPM, 0, 0, &rsidetone_mux), + SND_SOC_DAPM_DAC("DACL", "Left Playback", WM8903_POWER_MANAGEMENT_6, 3, 0), SND_SOC_DAPM_DAC("DACR", "Right Playback", WM8903_POWER_MANAGEMENT_6, 2, 0), @@ -910,7 +933,14 @@ static const struct snd_soc_dapm_route intercon[] = { { "ADCR", NULL, "Right Input PGA" }, { "ADCR", NULL, "CLK_DSP" }, + { "DACL Sidetone", "Left", "ADCL" }, + { "DACL Sidetone", "Right", "ADCR" }, + { "DACR Sidetone", "Left", "ADCL" }, + { "DACR Sidetone", "Right", "ADCR" }, + + { "DACL", NULL, "DACL Sidetone" }, { "DACL", NULL, "CLK_DSP" }, + { "DACR", NULL, "DACR Sidetone" }, { "DACR", NULL, "CLK_DSP" }, { "Left Output Mixer", "Left Bypass Switch", "Left Input PGA" }, -- cgit v0.10.2 From 818cf5909701806285d977f7a9365c5cadb062a7 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Thu, 23 Apr 2009 09:58:22 +0300 Subject: slub: enforce MAX_ORDER slub_max_order may not be equal to or greater than MAX_ORDER. Additionally, if a single object cannot be placed in a slab of slub_max_order, it still must allocate slabs below MAX_ORDER. Acked-by: Christoph Lameter Signed-off-by: David Rientjes Signed-off-by: Pekka Enberg diff --git a/mm/slub.c b/mm/slub.c index 7ab54ec..0e1247e 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1909,7 +1909,7 @@ static inline int calculate_order(int size) * Doh this slab cannot be placed using slub_max_order. */ order = slab_order(size, 1, MAX_ORDER, 1); - if (order <= MAX_ORDER) + if (order < MAX_ORDER) return order; return -ENOSYS; } @@ -2522,6 +2522,7 @@ __setup("slub_min_order=", setup_slub_min_order); static int __init setup_slub_max_order(char *str) { get_option(&str, &slub_max_order); + slub_max_order = min(slub_max_order, MAX_ORDER - 1); return 1; } -- cgit v0.10.2 From 1a787e7ad242312af0afb2156596d42ee5e0c6bc Mon Sep 17 00:00:00 2001 From: Joonyoung Shim Date: Wed, 22 Apr 2009 13:13:34 +0900 Subject: ASoC: TWL4030: Add VDL path support Add DAPMs for VDL(Voice Down Link) path. To support VDL path, we have to change DAPMs of outputs(Earpiece, PreDrive Left/Right, Headset Left/Right, Carkit Left/Right) from mux to mixer. Signed-off-by: Joonyoung Shim Acked-by: Peter Ujfalusi Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c index cc2968c..fdf88df 100644 --- a/sound/soc/codecs/twl4030.c +++ b/sound/soc/codecs/twl4030.c @@ -321,104 +321,60 @@ static void twl4030_power_down(struct snd_soc_codec *codec) } /* Earpiece */ -static const char *twl4030_earpiece_texts[] = - {"Off", "DACL1", "DACL2", "DACR1"}; - -static const unsigned int twl4030_earpiece_values[] = - {0x0, 0x1, 0x2, 0x4}; - -static const struct soc_enum twl4030_earpiece_enum = - SOC_VALUE_ENUM_SINGLE(TWL4030_REG_EAR_CTL, 1, 0x7, - ARRAY_SIZE(twl4030_earpiece_texts), - twl4030_earpiece_texts, - twl4030_earpiece_values); - -static const struct snd_kcontrol_new twl4030_dapm_earpiece_control = -SOC_DAPM_VALUE_ENUM("Route", twl4030_earpiece_enum); +static const struct snd_kcontrol_new twl4030_dapm_earpiece_controls[] = { + SOC_DAPM_SINGLE("Voice", TWL4030_REG_EAR_CTL, 0, 1, 0), + SOC_DAPM_SINGLE("AudioL1", TWL4030_REG_EAR_CTL, 1, 1, 0), + SOC_DAPM_SINGLE("AudioL2", TWL4030_REG_EAR_CTL, 2, 1, 0), + SOC_DAPM_SINGLE("AudioR1", TWL4030_REG_EAR_CTL, 3, 1, 0), +}; /* PreDrive Left */ -static const char *twl4030_predrivel_texts[] = - {"Off", "DACL1", "DACL2", "DACR2"}; - -static const unsigned int twl4030_predrivel_values[] = - {0x0, 0x1, 0x2, 0x4}; - -static const struct soc_enum twl4030_predrivel_enum = - SOC_VALUE_ENUM_SINGLE(TWL4030_REG_PREDL_CTL, 1, 0x7, - ARRAY_SIZE(twl4030_predrivel_texts), - twl4030_predrivel_texts, - twl4030_predrivel_values); - -static const struct snd_kcontrol_new twl4030_dapm_predrivel_control = -SOC_DAPM_VALUE_ENUM("Route", twl4030_predrivel_enum); +static const struct snd_kcontrol_new twl4030_dapm_predrivel_controls[] = { + SOC_DAPM_SINGLE("Voice", TWL4030_REG_PREDL_CTL, 0, 1, 0), + SOC_DAPM_SINGLE("AudioL1", TWL4030_REG_PREDL_CTL, 1, 1, 0), + SOC_DAPM_SINGLE("AudioL2", TWL4030_REG_PREDL_CTL, 2, 1, 0), + SOC_DAPM_SINGLE("AudioR2", TWL4030_REG_PREDL_CTL, 3, 1, 0), +}; /* PreDrive Right */ -static const char *twl4030_predriver_texts[] = - {"Off", "DACR1", "DACR2", "DACL2"}; - -static const unsigned int twl4030_predriver_values[] = - {0x0, 0x1, 0x2, 0x4}; - -static const struct soc_enum twl4030_predriver_enum = - SOC_VALUE_ENUM_SINGLE(TWL4030_REG_PREDR_CTL, 1, 0x7, - ARRAY_SIZE(twl4030_predriver_texts), - twl4030_predriver_texts, - twl4030_predriver_values); - -static const struct snd_kcontrol_new twl4030_dapm_predriver_control = -SOC_DAPM_VALUE_ENUM("Route", twl4030_predriver_enum); +static const struct snd_kcontrol_new twl4030_dapm_predriver_controls[] = { + SOC_DAPM_SINGLE("Voice", TWL4030_REG_PREDR_CTL, 0, 1, 0), + SOC_DAPM_SINGLE("AudioR1", TWL4030_REG_PREDR_CTL, 1, 1, 0), + SOC_DAPM_SINGLE("AudioR2", TWL4030_REG_PREDR_CTL, 2, 1, 0), + SOC_DAPM_SINGLE("AudioL2", TWL4030_REG_PREDR_CTL, 3, 1, 0), +}; /* Headset Left */ -static const char *twl4030_hsol_texts[] = - {"Off", "DACL1", "DACL2"}; - -static const struct soc_enum twl4030_hsol_enum = - SOC_ENUM_SINGLE(TWL4030_REG_HS_SEL, 1, - ARRAY_SIZE(twl4030_hsol_texts), - twl4030_hsol_texts); - -static const struct snd_kcontrol_new twl4030_dapm_hsol_control = -SOC_DAPM_ENUM("Route", twl4030_hsol_enum); +static const struct snd_kcontrol_new twl4030_dapm_hsol_controls[] = { + SOC_DAPM_SINGLE("Voice", TWL4030_REG_HS_SEL, 0, 1, 0), + SOC_DAPM_SINGLE("AudioL1", TWL4030_REG_HS_SEL, 1, 1, 0), + SOC_DAPM_SINGLE("AudioL2", TWL4030_REG_HS_SEL, 2, 1, 0), +}; /* Headset Right */ -static const char *twl4030_hsor_texts[] = - {"Off", "DACR1", "DACR2"}; - -static const struct soc_enum twl4030_hsor_enum = - SOC_ENUM_SINGLE(TWL4030_REG_HS_SEL, 4, - ARRAY_SIZE(twl4030_hsor_texts), - twl4030_hsor_texts); - -static const struct snd_kcontrol_new twl4030_dapm_hsor_control = -SOC_DAPM_ENUM("Route", twl4030_hsor_enum); +static const struct snd_kcontrol_new twl4030_dapm_hsor_controls[] = { + SOC_DAPM_SINGLE("Voice", TWL4030_REG_HS_SEL, 3, 1, 0), + SOC_DAPM_SINGLE("AudioR1", TWL4030_REG_HS_SEL, 4, 1, 0), + SOC_DAPM_SINGLE("AudioR2", TWL4030_REG_HS_SEL, 5, 1, 0), +}; /* Carkit Left */ -static const char *twl4030_carkitl_texts[] = - {"Off", "DACL1", "DACL2"}; - -static const struct soc_enum twl4030_carkitl_enum = - SOC_ENUM_SINGLE(TWL4030_REG_PRECKL_CTL, 1, - ARRAY_SIZE(twl4030_carkitl_texts), - twl4030_carkitl_texts); - -static const struct snd_kcontrol_new twl4030_dapm_carkitl_control = -SOC_DAPM_ENUM("Route", twl4030_carkitl_enum); +static const struct snd_kcontrol_new twl4030_dapm_carkitl_controls[] = { + SOC_DAPM_SINGLE("Voice", TWL4030_REG_PRECKL_CTL, 0, 1, 0), + SOC_DAPM_SINGLE("AudioL1", TWL4030_REG_PRECKL_CTL, 1, 1, 0), + SOC_DAPM_SINGLE("AudioL2", TWL4030_REG_PRECKL_CTL, 2, 1, 0), +}; /* Carkit Right */ -static const char *twl4030_carkitr_texts[] = - {"Off", "DACR1", "DACR2"}; - -static const struct soc_enum twl4030_carkitr_enum = - SOC_ENUM_SINGLE(TWL4030_REG_PRECKR_CTL, 1, - ARRAY_SIZE(twl4030_carkitr_texts), - twl4030_carkitr_texts); - -static const struct snd_kcontrol_new twl4030_dapm_carkitr_control = -SOC_DAPM_ENUM("Route", twl4030_carkitr_enum); +static const struct snd_kcontrol_new twl4030_dapm_carkitr_controls[] = { + SOC_DAPM_SINGLE("Voice", TWL4030_REG_PRECKR_CTL, 0, 1, 0), + SOC_DAPM_SINGLE("AudioR1", TWL4030_REG_PRECKR_CTL, 1, 1, 0), + SOC_DAPM_SINGLE("AudioR2", TWL4030_REG_PRECKR_CTL, 2, 1, 0), +}; /* Handsfree Left */ static const char *twl4030_handsfreel_texts[] = - {"Voice", "DACL1", "DACL2", "DACR2"}; + {"Voice", "AudioL1", "AudioL2", "AudioR2"}; static const struct soc_enum twl4030_handsfreel_enum = SOC_ENUM_SINGLE(TWL4030_REG_HFL_CTL, 0, @@ -430,7 +386,7 @@ SOC_DAPM_ENUM("Route", twl4030_handsfreel_enum); /* Handsfree Right */ static const char *twl4030_handsfreer_texts[] = - {"Voice", "DACR1", "DACR2", "DACL2"}; + {"Voice", "AudioR1", "AudioR2", "AudioL2"}; static const struct soc_enum twl4030_handsfreer_enum = SOC_ENUM_SINGLE(TWL4030_REG_HFR_CTL, 0, @@ -829,6 +785,12 @@ static DECLARE_TLV_DB_SCALE(digital_fine_tlv, -6300, 100, 1); static DECLARE_TLV_DB_SCALE(digital_coarse_tlv, 0, 600, 0); /* + * Voice Downlink GAIN volume control: + * from -37 to 12 dB in 1 dB steps (mute instead of -37 dB) + */ +static DECLARE_TLV_DB_SCALE(digital_voice_downlink_tlv, -3700, 100, 1); + +/* * Analog playback gain * -24 dB to 12 dB in 2 dB steps */ @@ -892,6 +854,16 @@ static const struct snd_kcontrol_new twl4030_snd_controls[] = { TWL4030_REG_ARXL2_APGA_CTL, TWL4030_REG_ARXR2_APGA_CTL, 1, 1, 0), + /* Common voice downlink gain controls */ + SOC_SINGLE_TLV("DAC Voice Digital Downlink Volume", + TWL4030_REG_VRXPGA, 0, 0x31, 0, digital_voice_downlink_tlv), + + SOC_SINGLE_TLV("DAC Voice Analog Downlink Volume", + TWL4030_REG_VDL_APGA_CTL, 3, 0x12, 1, analog_tlv), + + SOC_SINGLE("DAC Voice Analog Downlink Switch", + TWL4030_REG_VDL_APGA_CTL, 1, 1, 0), + /* Separate output gain controls */ SOC_DOUBLE_R_TLV_TWL4030("PreDriv Playback Volume", TWL4030_REG_PREDL_CTL, TWL4030_REG_PREDR_CTL, @@ -956,6 +928,8 @@ static const struct snd_soc_dapm_widget twl4030_dapm_widgets[] = { SND_SOC_NOPM, 0, 0), SND_SOC_DAPM_DAC("DAC Left2", "Left Rear Playback", SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_DAC("DAC Voice", "Voice Playback", + TWL4030_REG_AVDAC_CTL, 4, 0), /* Analog PGAs */ SND_SOC_DAPM_PGA("ARXR1_APGA", TWL4030_REG_ARXR1_APGA_CTL, @@ -966,6 +940,8 @@ static const struct snd_soc_dapm_widget twl4030_dapm_widgets[] = { 0, 0, NULL, 0), SND_SOC_DAPM_PGA("ARXL2_APGA", TWL4030_REG_ARXL2_APGA_CTL, 0, 0, NULL, 0), + SND_SOC_DAPM_PGA("VDL_APGA", TWL4030_REG_VDL_APGA_CTL, + 0, 0, NULL, 0), /* Analog bypasses */ SND_SOC_DAPM_SWITCH_E("Right1 Analog Loopback", SND_SOC_NOPM, 0, 0, @@ -998,26 +974,35 @@ static const struct snd_soc_dapm_widget twl4030_dapm_widgets[] = { SND_SOC_DAPM_MIXER("Analog L2 Playback Mixer", TWL4030_REG_AVDAC_CTL, 3, 0, NULL, 0), - /* Output MUX controls */ + /* Output MIXER controls */ /* Earpiece */ - SND_SOC_DAPM_VALUE_MUX("Earpiece Mux", SND_SOC_NOPM, 0, 0, - &twl4030_dapm_earpiece_control), + SND_SOC_DAPM_MIXER("Earpiece Mixer", SND_SOC_NOPM, 0, 0, + &twl4030_dapm_earpiece_controls[0], + ARRAY_SIZE(twl4030_dapm_earpiece_controls)), /* PreDrivL/R */ - SND_SOC_DAPM_VALUE_MUX("PredriveL Mux", SND_SOC_NOPM, 0, 0, - &twl4030_dapm_predrivel_control), - SND_SOC_DAPM_VALUE_MUX("PredriveR Mux", SND_SOC_NOPM, 0, 0, - &twl4030_dapm_predriver_control), + SND_SOC_DAPM_MIXER("PredriveL Mixer", SND_SOC_NOPM, 0, 0, + &twl4030_dapm_predrivel_controls[0], + ARRAY_SIZE(twl4030_dapm_predrivel_controls)), + SND_SOC_DAPM_MIXER("PredriveR Mixer", SND_SOC_NOPM, 0, 0, + &twl4030_dapm_predriver_controls[0], + ARRAY_SIZE(twl4030_dapm_predriver_controls)), /* HeadsetL/R */ - SND_SOC_DAPM_MUX_E("HeadsetL Mux", SND_SOC_NOPM, 0, 0, - &twl4030_dapm_hsol_control, headsetl_event, - SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_POST_PMD), - SND_SOC_DAPM_MUX("HeadsetR Mux", SND_SOC_NOPM, 0, 0, - &twl4030_dapm_hsor_control), + SND_SOC_DAPM_MIXER_E("HeadsetL Mixer", SND_SOC_NOPM, 0, 0, + &twl4030_dapm_hsol_controls[0], + ARRAY_SIZE(twl4030_dapm_hsol_controls), headsetl_event, + SND_SOC_DAPM_POST_PMU|SND_SOC_DAPM_POST_PMD), + SND_SOC_DAPM_MIXER("HeadsetR Mixer", SND_SOC_NOPM, 0, 0, + &twl4030_dapm_hsor_controls[0], + ARRAY_SIZE(twl4030_dapm_hsor_controls)), /* CarkitL/R */ - SND_SOC_DAPM_MUX("CarkitL Mux", SND_SOC_NOPM, 0, 0, - &twl4030_dapm_carkitl_control), - SND_SOC_DAPM_MUX("CarkitR Mux", SND_SOC_NOPM, 0, 0, - &twl4030_dapm_carkitr_control), + SND_SOC_DAPM_MIXER("CarkitL Mixer", SND_SOC_NOPM, 0, 0, + &twl4030_dapm_carkitl_controls[0], + ARRAY_SIZE(twl4030_dapm_carkitl_controls)), + SND_SOC_DAPM_MIXER("CarkitR Mixer", SND_SOC_NOPM, 0, 0, + &twl4030_dapm_carkitr_controls[0], + ARRAY_SIZE(twl4030_dapm_carkitr_controls)), + + /* Output MUX controls */ /* HandsfreeL/R */ SND_SOC_DAPM_MUX_E("HandsfreeL Mux", TWL4030_REG_HFL_CTL, 5, 0, &twl4030_dapm_handsfreel_control, handsfree_event, @@ -1082,50 +1067,61 @@ static const struct snd_soc_dapm_route intercon[] = { {"ARXL2_APGA", NULL, "Analog L2 Playback Mixer"}, {"ARXR2_APGA", NULL, "Analog R2 Playback Mixer"}, + {"VDL_APGA", NULL, "DAC Voice"}, + /* Internal playback routings */ /* Earpiece */ - {"Earpiece Mux", "DACL1", "ARXL1_APGA"}, - {"Earpiece Mux", "DACL2", "ARXL2_APGA"}, - {"Earpiece Mux", "DACR1", "ARXR1_APGA"}, + {"Earpiece Mixer", "Voice", "VDL_APGA"}, + {"Earpiece Mixer", "AudioL1", "ARXL1_APGA"}, + {"Earpiece Mixer", "AudioL2", "ARXL2_APGA"}, + {"Earpiece Mixer", "AudioR1", "ARXR1_APGA"}, /* PreDrivL */ - {"PredriveL Mux", "DACL1", "ARXL1_APGA"}, - {"PredriveL Mux", "DACL2", "ARXL2_APGA"}, - {"PredriveL Mux", "DACR2", "ARXR2_APGA"}, + {"PredriveL Mixer", "Voice", "VDL_APGA"}, + {"PredriveL Mixer", "AudioL1", "ARXL1_APGA"}, + {"PredriveL Mixer", "AudioL2", "ARXL2_APGA"}, + {"PredriveL Mixer", "AudioR2", "ARXR2_APGA"}, /* PreDrivR */ - {"PredriveR Mux", "DACR1", "ARXR1_APGA"}, - {"PredriveR Mux", "DACR2", "ARXR2_APGA"}, - {"PredriveR Mux", "DACL2", "ARXL2_APGA"}, + {"PredriveR Mixer", "Voice", "VDL_APGA"}, + {"PredriveR Mixer", "AudioR1", "ARXR1_APGA"}, + {"PredriveR Mixer", "AudioR2", "ARXR2_APGA"}, + {"PredriveR Mixer", "AudioL2", "ARXL2_APGA"}, /* HeadsetL */ - {"HeadsetL Mux", "DACL1", "ARXL1_APGA"}, - {"HeadsetL Mux", "DACL2", "ARXL2_APGA"}, + {"HeadsetL Mixer", "Voice", "VDL_APGA"}, + {"HeadsetL Mixer", "AudioL1", "ARXL1_APGA"}, + {"HeadsetL Mixer", "AudioL2", "ARXL2_APGA"}, /* HeadsetR */ - {"HeadsetR Mux", "DACR1", "ARXR1_APGA"}, - {"HeadsetR Mux", "DACR2", "ARXR2_APGA"}, + {"HeadsetR Mixer", "Voice", "VDL_APGA"}, + {"HeadsetR Mixer", "AudioR1", "ARXR1_APGA"}, + {"HeadsetR Mixer", "AudioR2", "ARXR2_APGA"}, /* CarkitL */ - {"CarkitL Mux", "DACL1", "ARXL1_APGA"}, - {"CarkitL Mux", "DACL2", "ARXL2_APGA"}, + {"CarkitL Mixer", "Voice", "VDL_APGA"}, + {"CarkitL Mixer", "AudioL1", "ARXL1_APGA"}, + {"CarkitL Mixer", "AudioL2", "ARXL2_APGA"}, /* CarkitR */ - {"CarkitR Mux", "DACR1", "ARXR1_APGA"}, - {"CarkitR Mux", "DACR2", "ARXR2_APGA"}, + {"CarkitR Mixer", "Voice", "VDL_APGA"}, + {"CarkitR Mixer", "AudioR1", "ARXR1_APGA"}, + {"CarkitR Mixer", "AudioR2", "ARXR2_APGA"}, /* HandsfreeL */ - {"HandsfreeL Mux", "DACL1", "ARXL1_APGA"}, - {"HandsfreeL Mux", "DACL2", "ARXL2_APGA"}, - {"HandsfreeL Mux", "DACR2", "ARXR2_APGA"}, + {"HandsfreeL Mux", "Voice", "VDL_APGA"}, + {"HandsfreeL Mux", "AudioL1", "ARXL1_APGA"}, + {"HandsfreeL Mux", "AudioL2", "ARXL2_APGA"}, + {"HandsfreeL Mux", "AudioR2", "ARXR2_APGA"}, /* HandsfreeR */ - {"HandsfreeR Mux", "DACR1", "ARXR1_APGA"}, - {"HandsfreeR Mux", "DACR2", "ARXR2_APGA"}, - {"HandsfreeR Mux", "DACL2", "ARXL2_APGA"}, + {"HandsfreeR Mux", "Voice", "VDL_APGA"}, + {"HandsfreeR Mux", "AudioR1", "ARXR1_APGA"}, + {"HandsfreeR Mux", "AudioR2", "ARXR2_APGA"}, + {"HandsfreeR Mux", "AudioL2", "ARXL2_APGA"}, /* outputs */ {"OUTL", NULL, "ARXL2_APGA"}, {"OUTR", NULL, "ARXR2_APGA"}, - {"EARPIECE", NULL, "Earpiece Mux"}, - {"PREDRIVEL", NULL, "PredriveL Mux"}, - {"PREDRIVER", NULL, "PredriveR Mux"}, - {"HSOL", NULL, "HeadsetL Mux"}, - {"HSOR", NULL, "HeadsetR Mux"}, - {"CARKITL", NULL, "CarkitL Mux"}, - {"CARKITR", NULL, "CarkitR Mux"}, + {"EARPIECE", NULL, "Earpiece Mixer"}, + {"PREDRIVEL", NULL, "PredriveL Mixer"}, + {"PREDRIVER", NULL, "PredriveR Mixer"}, + {"HSOL", NULL, "HeadsetL Mixer"}, + {"HSOR", NULL, "HeadsetR Mixer"}, + {"CARKITL", NULL, "CarkitL Mixer"}, + {"CARKITR", NULL, "CarkitR Mixer"}, {"HFL", NULL, "HandsfreeL Mux"}, {"HFR", NULL, "HandsfreeR Mux"}, -- cgit v0.10.2 From 2d7e71fa231035d69faffbfe506ef23638385994 Mon Sep 17 00:00:00 2001 From: Eric Miao Date: Thu, 23 Apr 2009 17:05:38 +0800 Subject: ASoC: simplify the SSP DMA parameters settings by run-time generation The SSP DMA parameters can actually be easily generated at run-time since they are almost similar except for the FIFO width and direction. Another benefit is the re-use of information from 'struct ssp_device', like SSDR physical FIFO address and DRCMR register index for both directions. Signed-off-by: Eric Miao Signed-off-by: Mark Brown Reviewed-by: pHilipp Zabel diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c index b9b61dd..fb8cacc 100644 --- a/sound/soc/pxa/pxa-ssp.c +++ b/sound/soc/pxa/pxa-ssp.c @@ -50,139 +50,6 @@ struct ssp_priv { #endif }; -#define PXA2xx_SSP1_BASE 0x41000000 -#define PXA27x_SSP2_BASE 0x41700000 -#define PXA27x_SSP3_BASE 0x41900000 -#define PXA3xx_SSP4_BASE 0x41a00000 - -static struct pxa2xx_pcm_dma_params pxa_ssp1_pcm_mono_out = { - .name = "SSP1 PCM Mono out", - .dev_addr = PXA2xx_SSP1_BASE + SSDR, - .drcmr = &DRCMR(14), - .dcmd = DCMD_INCSRCADDR | DCMD_FLOWTRG | - DCMD_BURST16 | DCMD_WIDTH2, -}; - -static struct pxa2xx_pcm_dma_params pxa_ssp1_pcm_mono_in = { - .name = "SSP1 PCM Mono in", - .dev_addr = PXA2xx_SSP1_BASE + SSDR, - .drcmr = &DRCMR(13), - .dcmd = DCMD_INCTRGADDR | DCMD_FLOWSRC | - DCMD_BURST16 | DCMD_WIDTH2, -}; - -static struct pxa2xx_pcm_dma_params pxa_ssp1_pcm_stereo_out = { - .name = "SSP1 PCM Stereo out", - .dev_addr = PXA2xx_SSP1_BASE + SSDR, - .drcmr = &DRCMR(14), - .dcmd = DCMD_INCSRCADDR | DCMD_FLOWTRG | - DCMD_BURST16 | DCMD_WIDTH4, -}; - -static struct pxa2xx_pcm_dma_params pxa_ssp1_pcm_stereo_in = { - .name = "SSP1 PCM Stereo in", - .dev_addr = PXA2xx_SSP1_BASE + SSDR, - .drcmr = &DRCMR(13), - .dcmd = DCMD_INCTRGADDR | DCMD_FLOWSRC | - DCMD_BURST16 | DCMD_WIDTH4, -}; - -static struct pxa2xx_pcm_dma_params pxa_ssp2_pcm_mono_out = { - .name = "SSP2 PCM Mono out", - .dev_addr = PXA27x_SSP2_BASE + SSDR, - .drcmr = &DRCMR(16), - .dcmd = DCMD_INCSRCADDR | DCMD_FLOWTRG | - DCMD_BURST16 | DCMD_WIDTH2, -}; - -static struct pxa2xx_pcm_dma_params pxa_ssp2_pcm_mono_in = { - .name = "SSP2 PCM Mono in", - .dev_addr = PXA27x_SSP2_BASE + SSDR, - .drcmr = &DRCMR(15), - .dcmd = DCMD_INCTRGADDR | DCMD_FLOWSRC | - DCMD_BURST16 | DCMD_WIDTH2, -}; - -static struct pxa2xx_pcm_dma_params pxa_ssp2_pcm_stereo_out = { - .name = "SSP2 PCM Stereo out", - .dev_addr = PXA27x_SSP2_BASE + SSDR, - .drcmr = &DRCMR(16), - .dcmd = DCMD_INCSRCADDR | DCMD_FLOWTRG | - DCMD_BURST16 | DCMD_WIDTH4, -}; - -static struct pxa2xx_pcm_dma_params pxa_ssp2_pcm_stereo_in = { - .name = "SSP2 PCM Stereo in", - .dev_addr = PXA27x_SSP2_BASE + SSDR, - .drcmr = &DRCMR(15), - .dcmd = DCMD_INCTRGADDR | DCMD_FLOWSRC | - DCMD_BURST16 | DCMD_WIDTH4, -}; - -static struct pxa2xx_pcm_dma_params pxa_ssp3_pcm_mono_out = { - .name = "SSP3 PCM Mono out", - .dev_addr = PXA27x_SSP3_BASE + SSDR, - .drcmr = &DRCMR(67), - .dcmd = DCMD_INCSRCADDR | DCMD_FLOWTRG | - DCMD_BURST16 | DCMD_WIDTH2, -}; - -static struct pxa2xx_pcm_dma_params pxa_ssp3_pcm_mono_in = { - .name = "SSP3 PCM Mono in", - .dev_addr = PXA27x_SSP3_BASE + SSDR, - .drcmr = &DRCMR(66), - .dcmd = DCMD_INCTRGADDR | DCMD_FLOWSRC | - DCMD_BURST16 | DCMD_WIDTH2, -}; - -static struct pxa2xx_pcm_dma_params pxa_ssp3_pcm_stereo_out = { - .name = "SSP3 PCM Stereo out", - .dev_addr = PXA27x_SSP3_BASE + SSDR, - .drcmr = &DRCMR(67), - .dcmd = DCMD_INCSRCADDR | DCMD_FLOWTRG | - DCMD_BURST16 | DCMD_WIDTH4, -}; - -static struct pxa2xx_pcm_dma_params pxa_ssp3_pcm_stereo_in = { - .name = "SSP3 PCM Stereo in", - .dev_addr = PXA27x_SSP3_BASE + SSDR, - .drcmr = &DRCMR(66), - .dcmd = DCMD_INCTRGADDR | DCMD_FLOWSRC | - DCMD_BURST16 | DCMD_WIDTH4, -}; - -static struct pxa2xx_pcm_dma_params pxa_ssp4_pcm_mono_out = { - .name = "SSP4 PCM Mono out", - .dev_addr = PXA3xx_SSP4_BASE + SSDR, - .drcmr = &DRCMR(67), - .dcmd = DCMD_INCSRCADDR | DCMD_FLOWTRG | - DCMD_BURST16 | DCMD_WIDTH2, -}; - -static struct pxa2xx_pcm_dma_params pxa_ssp4_pcm_mono_in = { - .name = "SSP4 PCM Mono in", - .dev_addr = PXA3xx_SSP4_BASE + SSDR, - .drcmr = &DRCMR(66), - .dcmd = DCMD_INCTRGADDR | DCMD_FLOWSRC | - DCMD_BURST16 | DCMD_WIDTH2, -}; - -static struct pxa2xx_pcm_dma_params pxa_ssp4_pcm_stereo_out = { - .name = "SSP4 PCM Stereo out", - .dev_addr = PXA3xx_SSP4_BASE + SSDR, - .drcmr = &DRCMR(67), - .dcmd = DCMD_INCSRCADDR | DCMD_FLOWTRG | - DCMD_BURST16 | DCMD_WIDTH4, -}; - -static struct pxa2xx_pcm_dma_params pxa_ssp4_pcm_stereo_in = { - .name = "SSP4 PCM Stereo in", - .dev_addr = PXA3xx_SSP4_BASE + SSDR, - .drcmr = &DRCMR(66), - .dcmd = DCMD_INCTRGADDR | DCMD_FLOWSRC | - DCMD_BURST16 | DCMD_WIDTH4, -}; - static void dump_registers(struct ssp_device *ssp) { dev_dbg(&ssp->pdev->dev, "SSCR0 0x%08x SSCR1 0x%08x SSTO 0x%08x\n", @@ -194,25 +61,33 @@ static void dump_registers(struct ssp_device *ssp) ssp_read_reg(ssp, SSACD)); } -static struct pxa2xx_pcm_dma_params *ssp_dma_params[4][4] = { - { - &pxa_ssp1_pcm_mono_out, &pxa_ssp1_pcm_mono_in, - &pxa_ssp1_pcm_stereo_out, &pxa_ssp1_pcm_stereo_in, - }, - { - &pxa_ssp2_pcm_mono_out, &pxa_ssp2_pcm_mono_in, - &pxa_ssp2_pcm_stereo_out, &pxa_ssp2_pcm_stereo_in, - }, - { - &pxa_ssp3_pcm_mono_out, &pxa_ssp3_pcm_mono_in, - &pxa_ssp3_pcm_stereo_out, &pxa_ssp3_pcm_stereo_in, - }, - { - &pxa_ssp4_pcm_mono_out, &pxa_ssp4_pcm_mono_in, - &pxa_ssp4_pcm_stereo_out, &pxa_ssp4_pcm_stereo_in, - }, +struct pxa2xx_pcm_dma_data { + struct pxa2xx_pcm_dma_params params; + char name[20]; }; +static struct pxa2xx_pcm_dma_params * +ssp_get_dma_params(struct ssp_device *ssp, int stereo, int out) +{ + struct pxa2xx_pcm_dma_data *dma; + + dma = kzalloc(sizeof(struct pxa2xx_pcm_dma_data), GFP_KERNEL); + if (dma == NULL) + return NULL; + + snprintf(dma->name, 20, "SSP%d PCM %s %s", ssp->port_id, + stereo ? "Stereo" : "Mono", out ? "out" : "in"); + + dma->params.name = dma->name; + dma->params.drcmr = &DRCMR(out ? ssp->drcmr_tx : ssp->drcmr_rx); + dma->params.dcmd = (out ? (DCMD_INCSRCADDR | DCMD_FLOWTRG) : + (DCMD_INCTRGADDR | DCMD_FLOWSRC)) | + (stereo ? DCMD_WIDTH4 : DCMD_WIDTH2) | DCMD_BURST16; + dma->params.dev_addr = ssp->phys_base + SSDR; + + return &dma->params; +} + static int pxa_ssp_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { @@ -227,6 +102,11 @@ static int pxa_ssp_startup(struct snd_pcm_substream *substream, clk_enable(priv->dev.ssp->clk); ssp_disable(&priv->dev); } + + if (cpu_dai->dma_data) { + kfree(cpu_dai->dma_data); + cpu_dai->dma_data = NULL; + } return ret; } @@ -241,6 +121,11 @@ static void pxa_ssp_shutdown(struct snd_pcm_substream *substream, ssp_disable(&priv->dev); clk_disable(priv->dev.ssp->clk); } + + if (cpu_dai->dma_data) { + kfree(cpu_dai->dma_data); + cpu_dai->dma_data = NULL; + } } #ifdef CONFIG_PM @@ -653,25 +538,23 @@ static int pxa_ssp_hw_params(struct snd_pcm_substream *substream, struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai; struct ssp_priv *priv = cpu_dai->private_data; struct ssp_device *ssp = priv->dev.ssp; - int dma = 0, chn = params_channels(params); + int chn = params_channels(params); u32 sscr0; u32 sspsp; int width = snd_pcm_format_physical_width(params_format(params)); int ttsa = ssp_read_reg(ssp, SSTSA) & 0xf; - /* select correct DMA params */ - if (substream->stream != SNDRV_PCM_STREAM_PLAYBACK) - dma = 1; /* capture DMA offset is 1,3 */ + /* generate correct DMA params */ + if (cpu_dai->dma_data) + kfree(cpu_dai->dma_data); + /* Network mode with one active slot (ttsa == 1) can be used * to force 16-bit frame width on the wire (for S16_LE), even * with two channels. Use 16-bit DMA transfers for this case. */ - if (((chn == 2) && (ttsa != 1)) || (width == 32)) - dma += 2; /* 32-bit DMA offset is 2, 16-bit is 0 */ - - cpu_dai->dma_data = ssp_dma_params[cpu_dai->id][dma]; - - dev_dbg(&ssp->pdev->dev, "pxa_ssp_hw_params: dma %d\n", dma); + cpu_dai->dma_data = ssp_get_dma_params(ssp, + ((chn == 2) && (ttsa != 1)) || (width == 32), + substream->stream == SNDRV_PCM_STREAM_PLAYBACK); /* we can only change the settings if the port is not in use */ if (ssp_read_reg(ssp, SSCR0) & SSCR0_SSE) -- cgit v0.10.2 From 8eb9feabe566d8272510d5fb33f55a72e3ab3ce4 Mon Sep 17 00:00:00 2001 From: Eric Miao Date: Thu, 23 Apr 2009 17:57:46 +0800 Subject: ASoC: change stereo/mono to 32-bit/16-bit for pxa-ssp The original idea came from pHilipp, and this makes the code looks more consistent. Signed-off-by: Eric Miao Signed-off-by: Mark Brown diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c index fb8cacc..6fc7876 100644 --- a/sound/soc/pxa/pxa-ssp.c +++ b/sound/soc/pxa/pxa-ssp.c @@ -67,7 +67,7 @@ struct pxa2xx_pcm_dma_data { }; static struct pxa2xx_pcm_dma_params * -ssp_get_dma_params(struct ssp_device *ssp, int stereo, int out) +ssp_get_dma_params(struct ssp_device *ssp, int width4, int out) { struct pxa2xx_pcm_dma_data *dma; @@ -76,13 +76,13 @@ ssp_get_dma_params(struct ssp_device *ssp, int stereo, int out) return NULL; snprintf(dma->name, 20, "SSP%d PCM %s %s", ssp->port_id, - stereo ? "Stereo" : "Mono", out ? "out" : "in"); + width4 ? "32-bit" : "16-bit", out ? "out" : "in"); dma->params.name = dma->name; dma->params.drcmr = &DRCMR(out ? ssp->drcmr_tx : ssp->drcmr_rx); dma->params.dcmd = (out ? (DCMD_INCSRCADDR | DCMD_FLOWTRG) : (DCMD_INCTRGADDR | DCMD_FLOWSRC)) | - (stereo ? DCMD_WIDTH4 : DCMD_WIDTH2) | DCMD_BURST16; + (width4 ? DCMD_WIDTH4 : DCMD_WIDTH2) | DCMD_BURST16; dma->params.dev_addr = ssp->phys_base + SSDR; return &dma->params; -- cgit v0.10.2 From 31a00c6b7c0c4f01be49f02660de920c8b82b613 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 23 Apr 2009 14:36:48 +0300 Subject: ASoC: OMAP: Add 4 channel support to mcbsp Add 4 channel support to omap-mcbsp. This mode is going to be used by the twl4030 codec, when it is configured in Option1 mode. Signed-off-by: Peter Ujfalusi Signed-off-by: Mark Brown diff --git a/sound/soc/omap/omap-mcbsp.c b/sound/soc/omap/omap-mcbsp.c index 495192a..a5d46a7 100644 --- a/sound/soc/omap/omap-mcbsp.c +++ b/sound/soc/omap/omap-mcbsp.c @@ -259,6 +259,7 @@ static int omap_mcbsp_dai_hw_params(struct snd_pcm_substream *substream, regs->xcr2 |= XFRLEN2(wpf - 1); } case 1: + case 4: /* Set word per (McBSP) frame for phase1 */ regs->rcr1 |= RFRLEN1(wpf - 1); regs->xcr1 |= XFRLEN1(wpf - 1); @@ -506,13 +507,13 @@ static struct snd_soc_dai_ops omap_mcbsp_dai_ops = { .id = (link_id), \ .playback = { \ .channels_min = 1, \ - .channels_max = 2, \ + .channels_max = 4, \ .rates = OMAP_MCBSP_RATES, \ .formats = SNDRV_PCM_FMTBIT_S16_LE, \ }, \ .capture = { \ .channels_min = 1, \ - .channels_max = 2, \ + .channels_max = 4, \ .rates = OMAP_MCBSP_RATES, \ .formats = SNDRV_PCM_FMTBIT_S16_LE, \ }, \ -- cgit v0.10.2 From 8a1f936acdfd53cb0a981f3f80483863dcd84fa9 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 23 Apr 2009 14:36:49 +0300 Subject: ASoC: TWL4030: Add 4 channel TDM support Support for 4 channel TDM (SND_SOC_DAIFMT_DSP_A) for twl4030 codec. The channel allocations are: Playback: TDM i2s TWL RX Channel 1 Left SDRL2 Channel 3 Right SDRR2 Channel 2 -- SDRL1 Channel 4 -- SDRR1 Capture: TDM i2s TWL TX Channel 1 Left TXL1 Channel 3 Right TXR1 Channel 2 -- TXL2 Channel 4 -- TXR2 Signed-off-by: Peter Ujfalusi Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c index fdf88df..e23c20c 100644 --- a/sound/soc/codecs/twl4030.c +++ b/sound/soc/codecs/twl4030.c @@ -1251,6 +1251,28 @@ static void twl4030_constraints(struct twl4030_priv *twl4030, twl4030->channels); } +/* In case of 4 channel mode, the RX1 L/R for playback and the TX2 L/R for + * capture has to be enabled/disabled. */ +static void twl4030_tdm_enable(struct snd_soc_codec *codec, int direction, + int enable) +{ + u8 reg, mask; + + reg = twl4030_read_reg_cache(codec, TWL4030_REG_OPTION); + + if (direction == SNDRV_PCM_STREAM_PLAYBACK) + mask = TWL4030_ARXL1_VRX_EN | TWL4030_ARXR1_EN; + else + mask = TWL4030_ATXL2_VTXL_EN | TWL4030_ATXR2_VTXR_EN; + + if (enable) + reg |= mask; + else + reg &= ~mask; + + twl4030_write(codec, TWL4030_REG_OPTION, reg); +} + static int twl4030_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { @@ -1267,6 +1289,15 @@ static int twl4030_startup(struct snd_pcm_substream *substream, if (twl4030->configured) twl4030_constraints(twl4030, twl4030->master_substream); } else { + if (!(twl4030_read_reg_cache(codec, TWL4030_REG_CODEC_MODE) & + TWL4030_OPTION_1)) { + /* In option2 4 channel is not supported, set the + * constraint for the first stream for channels, the + * second stream will 'inherit' this cosntraint */ + snd_pcm_hw_constraint_minmax(substream->runtime, + SNDRV_PCM_HW_PARAM_CHANNELS, + 2, 2); + } twl4030->master_substream = substream; } @@ -1292,6 +1323,10 @@ static void twl4030_shutdown(struct snd_pcm_substream *substream, twl4030->configured = 0; else if (!twl4030->master_substream->runtime->channels) twl4030->configured = 0; + + /* If the closing substream had 4 channel, do the necessary cleanup */ + if (substream->runtime->channels == 4) + twl4030_tdm_enable(codec, substream->stream, 0); } static int twl4030_hw_params(struct snd_pcm_substream *substream, @@ -1304,6 +1339,16 @@ static int twl4030_hw_params(struct snd_pcm_substream *substream, struct twl4030_priv *twl4030 = codec->private_data; u8 mode, old_mode, format, old_format; + /* If the substream has 4 channel, do the necessary setup */ + if (params_channels(params) == 4) { + /* Safety check: are we in the correct operating mode? */ + if ((twl4030_read_reg_cache(codec, TWL4030_REG_CODEC_MODE) & + TWL4030_OPTION_1)) + twl4030_tdm_enable(codec, substream->stream, 1); + else + return -EINVAL; + } + if (twl4030->configured) /* Ignoring hw_params for already configured DAI */ return 0; @@ -1461,6 +1506,9 @@ static int twl4030_set_dai_fmt(struct snd_soc_dai *codec_dai, case SND_SOC_DAIFMT_I2S: format |= TWL4030_AIF_FORMAT_CODEC; break; + case SND_SOC_DAIFMT_DSP_A: + format |= TWL4030_AIF_FORMAT_TDM; + break; default: return -EINVAL; } @@ -1642,13 +1690,13 @@ struct snd_soc_dai twl4030_dai[] = { .playback = { .stream_name = "Playback", .channels_min = 2, - .channels_max = 2, + .channels_max = 4, .rates = TWL4030_RATES | SNDRV_PCM_RATE_96000, .formats = TWL4030_FORMATS,}, .capture = { .stream_name = "Capture", .channels_min = 2, - .channels_max = 2, + .channels_max = 4, .rates = TWL4030_RATES, .formats = TWL4030_FORMATS,}, .ops = &twl4030_dai_ops, diff --git a/sound/soc/codecs/twl4030.h b/sound/soc/codecs/twl4030.h index 981ec60..3441115 100644 --- a/sound/soc/codecs/twl4030.h +++ b/sound/soc/codecs/twl4030.h @@ -116,6 +116,17 @@ #define TWL4030_OPTION_1 (1 << 0) #define TWL4030_OPTION_2 (0 << 0) +/* TWL4030_OPTION (0x02) Fields */ + +#define TWL4030_ATXL1_EN (1 << 0) +#define TWL4030_ATXR1_EN (1 << 1) +#define TWL4030_ATXL2_VTXL_EN (1 << 2) +#define TWL4030_ATXR2_VTXR_EN (1 << 3) +#define TWL4030_ARXL1_VRX_EN (1 << 4) +#define TWL4030_ARXR1_EN (1 << 5) +#define TWL4030_ARXL2_EN (1 << 6) +#define TWL4030_ARXR2_EN (1 << 7) + /* TWL4030_REG_MICBIAS_CTL (0x04) Fields */ #define TWL4030_MICBIAS2_CTL 0x40 -- cgit v0.10.2 From 9be24414aad047dcf9d8d2a9a929321536c7ebec Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 26 Mar 2009 10:25:24 -0400 Subject: tracing/wakeup: move access to wakeup_cpu into spinlock The code had the following outside the lock: if (next != wakeup_task) return; pc = preempt_count(); /* The task we are waiting for is waking up */ data = wakeup_trace->data[wakeup_cpu]; On initialization, wakeup_task is NULL and wakeup_cpu -1. This code is not under a lock. If wakeup_task is set on another CPU as that task is waking up, we can see the wakeup_task before wakeup_cpu is set. If we read wakeup_cpu while it is still -1 then we will have a bad data pointer. This patch moves the reading of wakeup_cpu within the protection of the spinlock used to protect the writing of wakeup_cpu and wakeup_task. [ Impact: remove possible race causing invalid pointer dereference ] Reported-by: Maneesh Soni Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index b8b13c5..eacb272 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -138,9 +138,6 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, pc = preempt_count(); - /* The task we are waiting for is waking up */ - data = wakeup_trace->data[wakeup_cpu]; - /* disable local data, not wakeup_cpu data */ cpu = raw_smp_processor_id(); disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled); @@ -154,6 +151,9 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, if (unlikely(!tracer_enabled || next != wakeup_task)) goto out_unlock; + /* The task we are waiting for is waking up */ + data = wakeup_trace->data[wakeup_cpu]; + trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); -- cgit v0.10.2 From 89ec0dee9eba6275d47be0b878cf5f6d5c2fb6eb Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 26 Mar 2009 11:03:29 -0400 Subject: tracing: increase size of number of possible events With the new event tracing registration, we must increase the number of events that can be registered. Currently the type field is only one byte, which leaves us only 256 possible events. Since we do not save the CPU number in the tracer anymore (it is determined by the per cpu ring buffer that is used) we have an extra byte to use. This patch increases the size of type from 1 byte (256 events) to 2 bytes (65,536 events). It also adds a WARN_ON_ONCE if we exceed that limit. [ Impact: allow more than 255 events ] Signed-off-by: Steven Rostedt diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 2a4a407..07e0a6d 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -16,13 +16,16 @@ struct dentry; * bash-15816 [01] 235.197585: idle_cpu <- irq_enter */ struct trace_entry { - int type; + unsigned short type; unsigned char flags; unsigned char preempt_count; int pid; int tgid; }; +#define FTRACE_MAX_EVENT \ + ((1 << (sizeof(((struct trace_entry *)0)->type) * 8)) - 1) + /* * Trace iterator - used by printout routines who present trace * results to users and which routines might sleep, etc: diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 5d6e879..9887131 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -398,7 +398,7 @@ static int trace_write_header(struct trace_seq *s) "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" "\n", - FIELD(int, type), + FIELD(unsigned short, type), FIELD(unsigned char, flags), FIELD(unsigned char, preempt_count), FIELD(int, pid), diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 83a8abb..06997e7 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -537,6 +537,8 @@ int register_ftrace_event(struct trace_event *event) out: mutex_unlock(&trace_event_mutex); + WARN_ON_ONCE(next_event_type > FTRACE_MAX_EVENT); + return ret; } EXPORT_SYMBOL_GPL(register_ftrace_event); -- cgit v0.10.2 From 75db37d2f4c0ad9466ead57d467277d097b4105c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 26 Mar 2009 11:43:36 -0400 Subject: tracing: add size checks for exported ftrace internal structures The events exported by TRACE_EVENT are automated and are guaranteed to be correct when used. The internal ftrace structures on the other hand are more manually exported. These require the ftrace maintainer to make sure they are up to date. This patch adds a size check to help flag when a type changes in an internal ftrace data structure, and the update needs to be reflected in the export. If a export is incorrect, then the only harm is that the user space tools will not know how to correctly read the internal structures of ftrace. [ Impact: help prevent inconsistent ftrace format print outs ] Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 9887131..b920815 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -381,8 +381,11 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, return cnt; } +extern char *__bad_type_size(void); + #undef FIELD #define FIELD(type, name) \ + sizeof(type) != sizeof(field.name) ? __bad_type_size() : \ #type, "common_" #name, offsetof(typeof(field), name), \ sizeof(field.name) diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 48fc02f..0cb1a14 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -19,8 +19,12 @@ #undef TRACE_STRUCT #define TRACE_STRUCT(args...) args +extern void __bad_type_size(void); + #undef TRACE_FIELD #define TRACE_FIELD(type, item, assign) \ + if (sizeof(type) != sizeof(field.item)) \ + __bad_type_size(); \ ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ "offset:%u;\tsize:%u;\n", \ (unsigned int)offsetof(typeof(field), item), \ -- cgit v0.10.2 From d7285c6b5c54397fdf112c2fb98ee43193173aa9 Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Thu, 23 Apr 2009 10:21:38 -0700 Subject: x86: use native register access for native tlb flushing currently these are paravirtulaized, doesn't appear any callers rely on this (no pv_ops backends are using native_tlb and overriding cr3/4 access). [ Impact: fix lockdep warning with paravirt and function tracer ] Signed-off-by: Chris Wright LKML-Reference: <20090423172138.GR3036@sequoia.sous-sol.org> Signed-off-by: Steven Rostedt diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index d3539f9..e2927c5 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -17,7 +17,7 @@ static inline void __native_flush_tlb(void) { - write_cr3(read_cr3()); + native_write_cr3(native_read_cr3()); } static inline void __native_flush_tlb_global(void) @@ -32,11 +32,11 @@ static inline void __native_flush_tlb_global(void) */ raw_local_irq_save(flags); - cr4 = read_cr4(); + cr4 = native_read_cr4(); /* clear PGE */ - write_cr4(cr4 & ~X86_CR4_PGE); + native_write_cr4(cr4 & ~X86_CR4_PGE); /* write old PGE again and flush TLBs */ - write_cr4(cr4); + native_write_cr4(cr4); raw_local_irq_restore(flags); } -- cgit v0.10.2 From c2518c4366f087ebc10b3919cb2461bbe4f42d0c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 23 Apr 2009 23:26:18 -0400 Subject: tracing: fix cut and paste macro error In case a module uses the TRACE_EVENT macro for creating automated events in ftrace, it may choose to use a different file name than the defined system name, or choose to use a different path than the default "include/trace/events" include path. If this is done, then before including trace/define_trace.h the header would define either "TRACE_INCLUDE_FILE" for the file name or "TRACE_INCLUDE_PATH" for the include path. If it does not define these, then the define_trace.h defines them instead. If define trace defines them, then define_trace.h should also undefine them before exiting. To do this a macro is used to note this: #ifndef TRACE_INCLUDE_FILE # define TRACE_INCLUDE_FILE TRACE_SYSTEM # define UNDEF_TRACE_INCLUDE_FILE #endif [...] #ifdef UNDEF_TRACE_INCLUDE_FILE # undef TRACE_INCLUDE_FILE # undef UNDEF_TRACE_INCLUDE_FILE #endif The UNDEF_TRACE_INCLUDE_FILE acts as a CPP variable to know to undef the TRACE_INCLUDE_FILE before leaving define_trace.h. Unfortunately, due to cut and paste errors, the macros between FILE and PATH got mixed up. [ Impact: undef TRACE_INCLUDE_FILE and/or TRACE_INCLUDE_PATH when needed ] Signed-off-by: Steven Rostedt diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index 7f1f23d..abc611f 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -44,7 +44,7 @@ #ifndef TRACE_INCLUDE_PATH # define __TRACE_INCLUDE(system) -# define UNDEF_TRACE_INCLUDE_FILE +# define UNDEF_TRACE_INCLUDE_PATH #else # define __TRACE_INCLUDE(system) __stringify(TRACE_INCLUDE_PATH/system.h) #endif @@ -64,13 +64,13 @@ /* Only undef what we defined in this file */ #ifdef UNDEF_TRACE_INCLUDE_FILE -# undef TRACE_INCLUDE_PATH +# undef TRACE_INCLUDE_FILE # undef UNDEF_TRACE_INCLUDE_FILE #endif -#ifdef UNDEF_TRACE_INCLUDE_FILE +#ifdef UNDEF_TRACE_INCLUDE_PATH # undef TRACE_INCLUDE_PATH -# undef UNDEF_TRACE_INCLUDE_FILE +# undef UNDEF_TRACE_INCLUDE_PATH #endif /* We may be processing more files */ -- cgit v0.10.2 From 334d4169a6592d3fcd863bbe822a8f6985ffa9af Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Fri, 24 Apr 2009 11:27:05 +0800 Subject: ring_buffer: compressed event header RB_MAX_SMALL_DATA = 28bytes is too small for most tracers, it wastes an 'u32' to save the actually length for events which data size > 28. This fix uses compressed event header and enlarges RB_MAX_SMALL_DATA. [ Impact: saves about 0%-12.5%(depends on tracer) memory in ring_buffer ] Signed-off-by: Lai Jiangshan LKML-Reference: <49F13189.3090000@cn.fujitsu.com> Signed-off-by: Steven Rostedt diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index fac8f1a..1c2f809 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -11,7 +11,7 @@ struct ring_buffer_iter; * Don't refer to this struct directly, use functions below. */ struct ring_buffer_event { - u32 type:2, len:3, time_delta:27; + u32 type_len:5, time_delta:27; u32 array[]; }; @@ -24,7 +24,8 @@ struct ring_buffer_event { * size is variable depending on how much * padding is needed * If time_delta is non zero: - * everything else same as RINGBUF_TYPE_DATA + * array[0] holds the actual length + * size = 4 + length (bytes) * * @RINGBUF_TYPE_TIME_EXTEND: Extend the time delta * array[0] = time delta (28 .. 59) @@ -35,22 +36,23 @@ struct ring_buffer_event { * array[1..2] = tv_sec * size = 16 bytes * - * @RINGBUF_TYPE_DATA: Data record - * If len is zero: + * <= @RINGBUF_TYPE_DATA_TYPE_LEN_MAX: + * Data record + * If type_len is zero: * array[0] holds the actual length * array[1..(length+3)/4] holds data - * size = 4 + 4 + length (bytes) + * size = 4 + length (bytes) * else - * length = len << 2 + * length = type_len << 2 * array[0..(length+3)/4-1] holds data * size = 4 + length (bytes) */ enum ring_buffer_type { + RINGBUF_TYPE_DATA_TYPE_LEN_MAX = 28, RINGBUF_TYPE_PADDING, RINGBUF_TYPE_TIME_EXTEND, /* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */ RINGBUF_TYPE_TIME_STAMP, - RINGBUF_TYPE_DATA, }; unsigned ring_buffer_event_length(struct ring_buffer_event *event); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 61dbdf2..9692f10 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -28,8 +28,8 @@ int ring_buffer_print_entry_header(struct trace_seq *s) { int ret; - ret = trace_seq_printf(s, "\ttype : 2 bits\n"); - ret = trace_seq_printf(s, "\tlen : 3 bits\n"); + ret = trace_seq_printf(s, "# compressed entry header\n"); + ret = trace_seq_printf(s, "\ttype_len : 5 bits\n"); ret = trace_seq_printf(s, "\ttime_delta : 27 bits\n"); ret = trace_seq_printf(s, "\tarray : 32 bits\n"); ret = trace_seq_printf(s, "\n"); @@ -37,8 +37,8 @@ int ring_buffer_print_entry_header(struct trace_seq *s) RINGBUF_TYPE_PADDING); ret = trace_seq_printf(s, "\ttime_extend : type == %d\n", RINGBUF_TYPE_TIME_EXTEND); - ret = trace_seq_printf(s, "\tdata : type == %d\n", - RINGBUF_TYPE_DATA); + ret = trace_seq_printf(s, "\tdata max type_len == %d\n", + RINGBUF_TYPE_DATA_TYPE_LEN_MAX); return ret; } @@ -204,7 +204,10 @@ EXPORT_SYMBOL_GPL(tracing_is_on); #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) #define RB_ALIGNMENT 4U -#define RB_MAX_SMALL_DATA 28 +#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) + +/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ +#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX enum { RB_LEN_TIME_EXTEND = 8, @@ -213,17 +216,18 @@ enum { static inline int rb_null_event(struct ring_buffer_event *event) { - return event->type == RINGBUF_TYPE_PADDING && event->time_delta == 0; + return event->type_len == RINGBUF_TYPE_PADDING + && event->time_delta == 0; } static inline int rb_discarded_event(struct ring_buffer_event *event) { - return event->type == RINGBUF_TYPE_PADDING && event->time_delta; + return event->type_len == RINGBUF_TYPE_PADDING && event->time_delta; } static void rb_event_set_padding(struct ring_buffer_event *event) { - event->type = RINGBUF_TYPE_PADDING; + event->type_len = RINGBUF_TYPE_PADDING; event->time_delta = 0; } @@ -232,8 +236,8 @@ rb_event_data_length(struct ring_buffer_event *event) { unsigned length; - if (event->len) - length = event->len * RB_ALIGNMENT; + if (event->type_len) + length = event->type_len * RB_ALIGNMENT; else length = event->array[0]; return length + RB_EVNT_HDR_SIZE; @@ -243,12 +247,12 @@ rb_event_data_length(struct ring_buffer_event *event) static unsigned rb_event_length(struct ring_buffer_event *event) { - switch (event->type) { + switch (event->type_len) { case RINGBUF_TYPE_PADDING: if (rb_null_event(event)) /* undefined */ return -1; - return rb_event_data_length(event); + return event->array[0] + RB_EVNT_HDR_SIZE; case RINGBUF_TYPE_TIME_EXTEND: return RB_LEN_TIME_EXTEND; @@ -272,7 +276,7 @@ rb_event_length(struct ring_buffer_event *event) unsigned ring_buffer_event_length(struct ring_buffer_event *event) { unsigned length = rb_event_length(event); - if (event->type != RINGBUF_TYPE_DATA) + if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) return length; length -= RB_EVNT_HDR_SIZE; if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0])) @@ -285,9 +289,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length); static void * rb_event_data(struct ring_buffer_event *event) { - BUG_ON(event->type != RINGBUF_TYPE_DATA); + BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); /* If length is in len field, then array[0] has the data */ - if (event->len) + if (event->type_len) return (void *)&event->array[0]; /* Otherwise length is in array[0] and array[1] has the data */ return (void *)&event->array[1]; @@ -988,7 +992,7 @@ static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer) if (RB_WARN_ON(cpu_buffer, rb_null_event(event))) return; /* Only count data entries */ - if (event->type != RINGBUF_TYPE_DATA) + if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) continue; cpu_buffer->overrun++; cpu_buffer->entries--; @@ -1133,28 +1137,21 @@ static void rb_update_event(struct ring_buffer_event *event, unsigned type, unsigned length) { - event->type = type; + event->type_len = type; switch (type) { case RINGBUF_TYPE_PADDING: - break; - case RINGBUF_TYPE_TIME_EXTEND: - event->len = DIV_ROUND_UP(RB_LEN_TIME_EXTEND, RB_ALIGNMENT); - break; - case RINGBUF_TYPE_TIME_STAMP: - event->len = DIV_ROUND_UP(RB_LEN_TIME_STAMP, RB_ALIGNMENT); break; - case RINGBUF_TYPE_DATA: + case 0: length -= RB_EVNT_HDR_SIZE; - if (length > RB_MAX_SMALL_DATA) { - event->len = 0; + if (length > RB_MAX_SMALL_DATA) event->array[0] = length; - } else - event->len = DIV_ROUND_UP(length, RB_ALIGNMENT); + else + event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); break; default: BUG(); @@ -1562,7 +1559,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) if (length > BUF_PAGE_SIZE) goto out; - event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length); + event = rb_reserve_next_event(cpu_buffer, 0, length); if (!event) goto out; @@ -1634,7 +1631,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit); static inline void rb_event_discard(struct ring_buffer_event *event) { - event->type = RINGBUF_TYPE_PADDING; + /* array[0] holds the actual length for the discarded event */ + event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE; + event->type_len = RINGBUF_TYPE_PADDING; /* time delta must be non zero */ if (!event->time_delta) event->time_delta = 1; @@ -1786,8 +1785,7 @@ int ring_buffer_write(struct ring_buffer *buffer, goto out; event_length = rb_calculate_event_length(length); - event = rb_reserve_next_event(cpu_buffer, - RINGBUF_TYPE_DATA, event_length); + event = rb_reserve_next_event(cpu_buffer, 0, event_length); if (!event) goto out; @@ -2035,7 +2033,7 @@ rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer, { u64 delta; - switch (event->type) { + switch (event->type_len) { case RINGBUF_TYPE_PADDING: return; @@ -2066,7 +2064,7 @@ rb_update_iter_read_stamp(struct ring_buffer_iter *iter, { u64 delta; - switch (event->type) { + switch (event->type_len) { case RINGBUF_TYPE_PADDING: return; @@ -2181,7 +2179,8 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer) event = rb_reader_event(cpu_buffer); - if (event->type == RINGBUF_TYPE_DATA || rb_discarded_event(event)) + if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX + || rb_discarded_event(event)) cpu_buffer->entries--; rb_update_read_stamp(cpu_buffer, event); @@ -2262,7 +2261,7 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) event = rb_reader_event(cpu_buffer); - switch (event->type) { + switch (event->type_len) { case RINGBUF_TYPE_PADDING: if (rb_null_event(event)) RB_WARN_ON(cpu_buffer, 1); @@ -2334,7 +2333,7 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) event = rb_iter_head_event(iter); - switch (event->type) { + switch (event->type_len) { case RINGBUF_TYPE_PADDING: if (rb_null_event(event)) { rb_inc_iter(iter); @@ -2393,7 +2392,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts) event = rb_buffer_peek(buffer, cpu, ts); spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); - if (event && event->type == RINGBUF_TYPE_PADDING) { + if (event && event->type_len == RINGBUF_TYPE_PADDING) { cpu_relax(); goto again; } @@ -2421,7 +2420,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) event = rb_iter_peek(iter, ts); spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); - if (event && event->type == RINGBUF_TYPE_PADDING) { + if (event && event->type_len == RINGBUF_TYPE_PADDING) { cpu_relax(); goto again; } @@ -2466,7 +2465,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts) out: preempt_enable(); - if (event && event->type == RINGBUF_TYPE_PADDING) { + if (event && event->type_len == RINGBUF_TYPE_PADDING) { cpu_relax(); goto again; } @@ -2559,7 +2558,7 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) out: spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); - if (event && event->type == RINGBUF_TYPE_PADDING) { + if (event && event->type_len == RINGBUF_TYPE_PADDING) { cpu_relax(); goto again; } @@ -2766,7 +2765,7 @@ static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer, if (RB_WARN_ON(cpu_buffer, rb_null_event(event))) return; /* Only count data entries */ - if (event->type != RINGBUF_TYPE_DATA) + if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) continue; cpu_buffer->entries--; } -- cgit v0.10.2 From 0cfcdedaddf2468cb53e3cff9c3abfef14b4d784 Mon Sep 17 00:00:00 2001 From: Krzysztof Helt Date: Thu, 23 Apr 2009 21:46:19 +0200 Subject: ALSA: sc6000: fix older card initialization The last patch to handle newer cards like SC7000 broke initialization of the SC6000. Fix this. Signed-off-by: Krzysztof Helt Signed-off-by: Takashi Iwai diff --git a/sound/isa/sc6000.c b/sound/isa/sc6000.c index 983ab7e..c803b2e 100644 --- a/sound/isa/sc6000.c +++ b/sound/isa/sc6000.c @@ -391,7 +391,6 @@ static int __devinit sc6000_init_board(char __iomem *vport, int config = mss_config | sc6000_mpu_irq_to_softcfg(mpu_irq[dev]); int err; - int cfg[2]; int old = 0; err = sc6000_dsp_reset(vport); @@ -421,11 +420,18 @@ static int __devinit sc6000_init_board(char __iomem *vport, answer, version[0], version[1]); /* set configuration */ - sc6000_hw_cfg_encode(vport, &cfg[0], port[dev], mpu_port[dev], - mss_port[dev]); - if (sc6000_hw_cfg_write(vport, cfg) < 0) { - snd_printk(KERN_ERR "sc6000_hw_cfg_write: failed!\n"); - return -EIO; + sc6000_write(vport, COMMAND_5C); + if (sc6000_read(vport) < 0) + old = 1; + + if (!old) { + int cfg[2]; + sc6000_hw_cfg_encode(vport, &cfg[0], port[dev], mpu_port[dev], + mss_port[dev]); + if (sc6000_hw_cfg_write(vport, cfg) < 0) { + snd_printk(KERN_ERR "sc6000_hw_cfg_write: failed!\n"); + return -EIO; + } } err = sc6000_setup_board(vport, config); if (err < 0) { @@ -434,10 +440,6 @@ static int __devinit sc6000_init_board(char __iomem *vport, } sc6000_dsp_reset(vport); - sc6000_write(vport, COMMAND_5C); - if (sc6000_read(vport) < 0) - old = 1; - sc6000_dsp_reset(vport); if (!old) { sc6000_write(vport, COMMAND_60); -- cgit v0.10.2 From 3e98f9f15e916c48dfc5231d7e6a59be7f122764 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Fri, 24 Apr 2009 15:39:39 +0900 Subject: sh: pci: Fix up the build for CONFIG_PCI=n. Signed-off-by: Paul Mundt diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h index f9101215..5b2e0fc 100644 --- a/arch/sh/include/asm/pci.h +++ b/arch/sh/include/asm/pci.h @@ -88,6 +88,7 @@ static inline void pcibios_penalize_isa_irq(int irq, int active) #define pci_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0) #endif +#ifdef CONFIG_PCI static inline void pci_dma_burst_advice(struct pci_dev *pdev, enum pci_dma_burst_strategy *strat, unsigned long *strategy_parameter) @@ -95,6 +96,7 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev, *strat = PCI_DMA_BURST_INFINITY; *strategy_parameter = ~0UL; } +#endif #ifdef CONFIG_SUPERH32 /* -- cgit v0.10.2 From 782cc5ae6331d63b4febaa312c9d14493aafa9b8 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 24 Apr 2009 09:43:09 +0200 Subject: x86, ds: fix buffer alignment in debug store selftest The debug store selftest code uses a stack-allocated buffer, which is not necessarily correctly aligned. For tests using a buffer to hold a single entry, the buffer that is passed to ds_request must already be suitably aligned. Pass a suitably aligned portion of the bigger buffer. [ Impact: fix hw-branch-tracer self-test failure ] Signed-off-by: Markus Metzger Cc: markus.t.metzger@gmail.com LKML-Reference: <20090424094309.A30145@sedona.ch.intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/ds_selftest.c b/arch/x86/kernel/ds_selftest.c index 5f104a0..6bc7c19 100644 --- a/arch/x86/kernel/ds_selftest.c +++ b/arch/x86/kernel/ds_selftest.c @@ -323,13 +323,15 @@ static int ds_selftest_bts_bad_request_task(void *buffer) int ds_selftest_bts(void) { struct ds_selftest_bts_conf conf; - unsigned char buffer[BUFFER_SIZE]; + unsigned char buffer[BUFFER_SIZE], *small_buffer; unsigned long irq; int cpu; printk(KERN_INFO "[ds] bts selftest..."); conf.error = 0; + small_buffer = (unsigned char *)ALIGN((unsigned long)buffer, 8) + 8; + get_online_cpus(); for_each_online_cpu(cpu) { conf.suspend = ds_suspend_bts_wrap; @@ -381,7 +383,7 @@ int ds_selftest_bts(void) conf.suspend = ds_suspend_bts_noirq; conf.resume = ds_resume_bts_noirq; conf.tracer = - ds_request_bts_task(current, buffer, SMALL_BUFFER_SIZE, + ds_request_bts_task(current, small_buffer, SMALL_BUFFER_SIZE, NULL, (size_t)-1, BTS_KERNEL); local_irq_save(irq); ds_selftest_bts_cpu(&conf); -- cgit v0.10.2 From 7e0bfad24d85de7cf2202a7b0ce51de11a077b21 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 24 Apr 2009 09:44:48 +0200 Subject: x86, bts: reenable ptrace branch trace support The races found by Oleg Nesterov have been fixed. Reenable branch trace support. Signed-off-by: Markus Metzger Acked-by: Oleg Nesterov LKML-Reference: <20090424094448.A30216@sedona.ch.intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 8130334..924e156 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -506,7 +506,6 @@ config X86_PTRACE_BTS bool "Branch Trace Store" default y depends on X86_DEBUGCTLMSR - depends on BROKEN ---help--- This adds a ptrace interface to the hardware's branch trace store. -- cgit v0.10.2 From 1cb81b143fa8f0e4629f10690862e2e52ca792ff Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 24 Apr 2009 09:51:43 +0200 Subject: x86, bts, mm: clean up buffer allocation The current mm interface is asymetric. One function allocates a locked buffer, another function only refunds the memory. Change this to have two functions for accounting and refunding locked memory, respectively; and do the actual buffer allocation in ptrace. [ Impact: refactor BTS buffer allocation code ] Signed-off-by: Markus Metzger Acked-by: Andrew Morton Cc: Peter Zijlstra LKML-Reference: <20090424095143.A30265@sedona.ch.intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index d5252ae..09ecbde 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -617,17 +617,28 @@ struct bts_context { struct work_struct work; }; -static inline void alloc_bts_buffer(struct bts_context *context, - unsigned int size) +static int alloc_bts_buffer(struct bts_context *context, unsigned int size) { - void *buffer; + void *buffer = NULL; + int err = -ENOMEM; - buffer = alloc_locked_buffer(size); - if (buffer) { - context->buffer = buffer; - context->size = size; - context->mm = get_task_mm(current); - } + err = account_locked_memory(current->mm, current->signal->rlim, size); + if (err < 0) + return err; + + buffer = kzalloc(size, GFP_KERNEL); + if (!buffer) + goto out_refund; + + context->buffer = buffer; + context->size = size; + context->mm = get_task_mm(current); + + return 0; + + out_refund: + refund_locked_memory(current->mm, size); + return err; } static inline void free_bts_buffer(struct bts_context *context) @@ -638,7 +649,7 @@ static inline void free_bts_buffer(struct bts_context *context) kfree(context->buffer); context->buffer = NULL; - refund_locked_buffer_memory(context->mm, context->size); + refund_locked_memory(context->mm, context->size); context->size = 0; mmput(context->mm); @@ -786,13 +797,15 @@ static int ptrace_bts_config(struct task_struct *child, context->tracer = NULL; if ((cfg.flags & PTRACE_BTS_O_ALLOC) && (cfg.size != context->size)) { + int err; + free_bts_buffer(context); if (!cfg.size) return 0; - alloc_bts_buffer(context, cfg.size); - if (!context->buffer) - return -ENOMEM; + err = alloc_bts_buffer(context, cfg.size); + if (err < 0) + return err; } if (cfg.flags & PTRACE_BTS_O_TRACE) diff --git a/include/linux/mm.h b/include/linux/mm.h index a3963ba..009eabd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -19,6 +19,7 @@ struct anon_vma; struct file_ra_state; struct user_struct; struct writeback_control; +struct rlimit; #ifndef CONFIG_DISCONTIGMEM /* Don't use mapnrs, do it properly */ extern unsigned long max_mapnr; @@ -1319,7 +1320,8 @@ int vmemmap_populate_basepages(struct page *start_page, int vmemmap_populate(struct page *start_page, unsigned long pages, int node); void vmemmap_populate_print_last(void); -extern void *alloc_locked_buffer(size_t size); -extern void refund_locked_buffer_memory(struct mm_struct *mm, size_t size); +extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim, + size_t size); +extern void refund_locked_memory(struct mm_struct *mm, size_t size); #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/mm/mlock.c b/mm/mlock.c index 28be15e..ac13043 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -629,38 +629,36 @@ void user_shm_unlock(size_t size, struct user_struct *user) free_uid(user); } -void *alloc_locked_buffer(size_t size) +int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim, + size_t size) { - unsigned long rlim, vm, pgsz; - void *buffer = NULL; + unsigned long lim, vm, pgsz; + int error = -ENOMEM; pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; - down_write(¤t->mm->mmap_sem); - - rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; - vm = current->mm->total_vm + pgsz; - if (rlim < vm) - goto out; + down_write(&mm->mmap_sem); - rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; - vm = current->mm->locked_vm + pgsz; - if (rlim < vm) + lim = rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; + vm = mm->total_vm + pgsz; + if (lim < vm) goto out; - buffer = kzalloc(size, GFP_KERNEL); - if (!buffer) + lim = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; + vm = mm->locked_vm + pgsz; + if (lim < vm) goto out; - current->mm->total_vm += pgsz; - current->mm->locked_vm += pgsz; + mm->total_vm += pgsz; + mm->locked_vm += pgsz; + error = 0; out: - up_write(¤t->mm->mmap_sem); - return buffer; + up_write(&mm->mmap_sem); + return error; } -void refund_locked_buffer_memory(struct mm_struct *mm, size_t size) +void refund_locked_memory(struct mm_struct *mm, size_t size) { unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; -- cgit v0.10.2 From 39517091f88fae32b52254b561ced78da1eaf0a7 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 24 Apr 2009 11:05:52 -0400 Subject: tracing/lockdep: convert lockdep to use TRACE_EVENT macro The TRACE_FORMAT will soon be deprecated. This patch converts it to the TRACE_EVENT macro. Note, this change should also speed up the tracing. [ Impact: remove a user of deprecated TRACE_FORMAT ] Cc: Peter Zijlstra Signed-off-by: Steven Rostedt diff --git a/include/trace/events/lockdep.h b/include/trace/events/lockdep.h index 3ca315c..0e956c9 100644 --- a/include/trace/events/lockdep.h +++ b/include/trace/events/lockdep.h @@ -9,28 +9,64 @@ #ifdef CONFIG_LOCKDEP -TRACE_FORMAT(lock_acquire, +TRACE_EVENT(lock_acquire, + TP_PROTO(struct lockdep_map *lock, unsigned int subclass, int trylock, int read, int check, struct lockdep_map *next_lock, unsigned long ip), + TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), - TP_FMT("%s%s%s", trylock ? "try " : "", - read ? "read " : "", lock->name) - ); -TRACE_FORMAT(lock_release, + TP_STRUCT__entry( + __field(unsigned int, flags) + __string(name, lock->name) + ), + + TP_fast_assign( + __entry->flags = (trylock ? 1 : 0) | (read ? 2 : 0); + __assign_str(name, lock->name); + ), + + TP_printk("%s%s%s", (__entry->flags & 1) ? "try " : "", + (__entry->flags & 2) ? "read " : "", + __get_str(name)) +); + +TRACE_EVENT(lock_release, + TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), + TP_ARGS(lock, nested, ip), - TP_FMT("%s", lock->name) - ); + + TP_STRUCT__entry( + __string(name, lock->name) + ), + + TP_fast_assign( + __assign_str(name, lock->name); + ), + + TP_printk("%s", __get_str(name)) +); #ifdef CONFIG_LOCK_STAT -TRACE_FORMAT(lock_contended, +TRACE_EVENT(lock_contended, + TP_PROTO(struct lockdep_map *lock, unsigned long ip), + TP_ARGS(lock, ip), - TP_FMT("%s", lock->name) - ); + + TP_STRUCT__entry( + __string(name, lock->name) + ), + + TP_fast_assign( + __assign_str(name, lock->name); + ), + + TP_printk("%s", __get_str(name)) +); TRACE_EVENT(lock_acquired, TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime), -- cgit v0.10.2 From 160031b556e93590fa8635210d73d93c3d3853a9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 24 Apr 2009 11:26:55 -0400 Subject: tracing/irq: convert irq traces to use TRACE_EVENT macro The TRACE_FORMAT will soon be deprecated. This patch converts it to the TRACE_EVENT macro. Note, this change should also speed up the tracing. [ Impact: remove a user of deprecated TRACE_FORMAT ] Cc: Jason Baron Signed-off-by: Steven Rostedt diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index 75e3468..7686864 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -10,11 +10,24 @@ /* * Tracepoint for entry of interrupt handler: */ -TRACE_FORMAT(irq_handler_entry, +TRACE_EVENT(irq_handler_entry, + TP_PROTO(int irq, struct irqaction *action), + TP_ARGS(irq, action), - TP_FMT("irq=%d handler=%s", irq, action->name) - ); + + TP_STRUCT__entry( + __field( int, irq ) + __string( name, action->name ) + ), + + TP_fast_assign( + __entry->irq = irq; + __assign_str(name, action->name); + ), + + TP_printk("irq=%d handler=%s", __entry->irq, __get_str(name)) +); /* * Tracepoint for return of an interrupt handler: @@ -39,17 +52,43 @@ TRACE_EVENT(irq_handler_exit, __entry->irq, __entry->ret ? "handled" : "unhandled") ); -TRACE_FORMAT(softirq_entry, +TRACE_EVENT(softirq_entry, + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + TP_ARGS(h, vec), - TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) - ); -TRACE_FORMAT(softirq_exit, + TP_STRUCT__entry( + __field( int, vec ) + __string( name, softirq_to_name[h-vec] ) + ), + + TP_fast_assign( + __entry->vec = (int)(h - vec); + __assign_str(name, softirq_to_name[h-vec]); + ), + + TP_printk("softirq=%d action=%s", __entry->vec, __get_str(name)) +); + +TRACE_EVENT(softirq_exit, + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + TP_ARGS(h, vec), - TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec]) - ); + + TP_STRUCT__entry( + __field( int, vec ) + __string( name, softirq_to_name[h-vec] ) + ), + + TP_fast_assign( + __entry->vec = (int)(h - vec); + __assign_str(name, softirq_to_name[h-vec]); + ), + + TP_printk("softirq=%d action=%s", __entry->vec, __get_str(name)) +); #endif /* _TRACE_IRQ_H */ -- cgit v0.10.2 From b8e65554d80b4c560d201362d0e8fa02109d89fd Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 24 Apr 2009 11:50:39 -0400 Subject: tracing: remove deprecated TRACE_FORMAT The TRACE_FORMAT macro has been deprecated by the TRACE_EVENT macro. There are no more users. All new users must use the TRACE_EVENT macro. [ Impact: remove old functionality ] Cc: Peter Zijlstra Signed-off-by: Steven Rostedt diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 4353f3f..14df7e6 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -158,11 +158,6 @@ static inline void tracepoint_synchronize_unregister(void) #define PARAMS(args...) args -#ifndef TRACE_FORMAT -#define TRACE_FORMAT(name, proto, args, fmt) \ - DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) -#endif - #ifndef TRACE_EVENT /* * For use with the TRACE_EVENT macro: diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index abc611f..f7a7ae1 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -26,10 +26,6 @@ #define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ DEFINE_TRACE(name) -#undef TRACE_FORMAT -#define TRACE_FORMAT(name, proto, args, print) \ - DEFINE_TRACE(name) - #undef DECLARE_TRACE #define DECLARE_TRACE(name, proto, args) \ DEFINE_TRACE(name) diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index a77f71a..1e68114 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -18,9 +18,6 @@ #include -#undef TRACE_FORMAT -#define TRACE_FORMAT(call, proto, args, fmt) - #undef __array #define __array(type, item, len) type item[len]; @@ -62,9 +59,6 @@ * */ -#undef TRACE_FORMAT -#define TRACE_FORMAT(call, proto, args, fmt) - #undef __array #define __array(type, item, len) @@ -298,16 +292,6 @@ ftrace_define_fields_##call(void) \ * unregister_trace_(ftrace_event_); * } * - * For those macros defined with TRACE_FORMAT: - * - * static struct ftrace_event_call __used - * __attribute__((__aligned__(4))) - * __attribute__((section("_ftrace_events"))) event_ = { - * .name = "", - * .regfunc = ftrace_reg_event_, - * .unregfunc = ftrace_unreg_event_, - * } - * * * For those macros defined with TRACE_EVENT: * @@ -417,56 +401,6 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *call) \ #define _TRACE_PROFILE_INIT(call) #endif -#define _TRACE_FORMAT(call, proto, args, fmt) \ -static void ftrace_event_##call(proto) \ -{ \ - event_trace_printk(_RET_IP_, #call ": " fmt); \ -} \ - \ -static int ftrace_reg_event_##call(void) \ -{ \ - int ret; \ - \ - ret = register_trace_##call(ftrace_event_##call); \ - if (ret) \ - pr_info("event trace: Could not activate trace point " \ - "probe to " #call "\n"); \ - return ret; \ -} \ - \ -static void ftrace_unreg_event_##call(void) \ -{ \ - unregister_trace_##call(ftrace_event_##call); \ -} \ - \ -static struct ftrace_event_call event_##call; \ - \ -static int ftrace_init_event_##call(void) \ -{ \ - int id; \ - \ - id = register_ftrace_event(NULL); \ - if (!id) \ - return -ENODEV; \ - event_##call.id = id; \ - return 0; \ -} - -#undef TRACE_FORMAT -#define TRACE_FORMAT(call, proto, args, fmt) \ -_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) \ -_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \ -static struct ftrace_event_call __used \ -__attribute__((__aligned__(4))) \ -__attribute__((section("_ftrace_events"))) event_##call = { \ - .name = #call, \ - .system = __stringify(TRACE_SYSTEM), \ - .raw_init = ftrace_init_event_##call, \ - .regfunc = ftrace_reg_event_##call, \ - .unregfunc = ftrace_unreg_event_##call, \ - _TRACE_PROFILE_INIT(call) \ -} - #undef __entry #define __entry entry -- cgit v0.10.2 From a8353a57299f965ca8747b1b062490aef2c9ca50 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 24 Apr 2009 11:03:21 +0300 Subject: ASoC: Beagle: Add support for 4 channel This patch adds support for the four channel TDM mode on Beagle board. Depending on the channel count, the interface needs to be configured differently (I2S for stereo DSP_A for four channels) Signed-off-by: Peter Ujfalusi Signed-off-by: Mark Brown diff --git a/sound/soc/omap/omap3beagle.c b/sound/soc/omap/omap3beagle.c index 6aa428e..b0cff9f 100644 --- a/sound/soc/omap/omap3beagle.c +++ b/sound/soc/omap/omap3beagle.c @@ -41,23 +41,33 @@ static int omap3beagle_hw_params(struct snd_pcm_substream *substream, struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_dai *codec_dai = rtd->dai->codec_dai; struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai; + unsigned int fmt; int ret; + switch (params_channels(params)) { + case 2: /* Stereo I2S mode */ + fmt = SND_SOC_DAIFMT_I2S | + SND_SOC_DAIFMT_NB_NF | + SND_SOC_DAIFMT_CBM_CFM; + break; + case 4: /* Four channel TDM mode */ + fmt = SND_SOC_DAIFMT_DSP_A | + SND_SOC_DAIFMT_IB_NF | + SND_SOC_DAIFMT_CBM_CFM; + break; + default: + return -EINVAL; + } + /* Set codec DAI configuration */ - ret = snd_soc_dai_set_fmt(codec_dai, - SND_SOC_DAIFMT_I2S | - SND_SOC_DAIFMT_NB_NF | - SND_SOC_DAIFMT_CBM_CFM); + ret = snd_soc_dai_set_fmt(codec_dai, fmt); if (ret < 0) { printk(KERN_ERR "can't set codec DAI configuration\n"); return ret; } /* Set cpu DAI configuration */ - ret = snd_soc_dai_set_fmt(cpu_dai, - SND_SOC_DAIFMT_I2S | - SND_SOC_DAIFMT_NB_NF | - SND_SOC_DAIFMT_CBM_CFM); + ret = snd_soc_dai_set_fmt(cpu_dai, fmt); if (ret < 0) { printk(KERN_ERR "can't set cpu DAI configuration\n"); return ret; -- cgit v0.10.2 From 7629ad24f2b3df95c8b4cd8869e3c04e1df6c442 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Fri, 24 Apr 2009 16:37:44 +0200 Subject: ASoC: add SOC_DOUBLE_EXT macro Add a macro for double controls with special callback functions. Signed-off-by: Daniel Mack Signed-off-by: Mark Brown diff --git a/include/sound/soc.h b/include/sound/soc.h index b1f2f88..6ab80bf 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -118,6 +118,14 @@ .info = snd_soc_info_volsw, \ .get = xhandler_get, .put = xhandler_put, \ .private_value = SOC_SINGLE_VALUE(xreg, xshift, xmax, xinvert) } +#define SOC_DOUBLE_EXT(xname, xreg, shift_left, shift_right, xmax, xinvert,\ + xhandler_get, xhandler_put) \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = (xname),\ + .info = snd_soc_info_volsw, \ + .get = xhandler_get, .put = xhandler_put, \ + .private_value = (unsigned long)&(struct soc_mixer_control) \ + {.reg = xreg, .shift = shift_left, .rshift = shift_right, \ + .max = xmax, .invert = xinvert} } #define SOC_SINGLE_EXT_TLV(xname, xreg, xshift, xmax, xinvert,\ xhandler_get, xhandler_put, tlv_array) \ { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, \ -- cgit v0.10.2 From 172fd9e26200668ebaf3e1d6d09b36d5d531bfa6 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 24 Apr 2009 16:33:10 +0100 Subject: ASoC: Fix S3C64xx IIS device registration and support both ports The S3C64xx IIS code had a number of problems with device registration. The hardware has two IIS ports of which the driver supported only one at once via a single exported DAI, attempting to identify the DAI to use based on the dev->id of the ASoC platform device. As well as limiting the driver to only supporting one IIS port at once this also meant that the ID of the soc-audio device (or in future the card device) had to match the IIS ID. Fix both problems by converting the driver to register the DAIs based on probing of platform devices registered by the arch/arm code, using those platform devices to interact with the clock API. Signed-off-by: Mark Brown diff --git a/sound/soc/s3c24xx/s3c64xx-i2s.c b/sound/soc/s3c24xx/s3c64xx-i2s.c index 33c5de7..a84c4be 100644 --- a/sound/soc/s3c24xx/s3c64xx-i2s.c +++ b/sound/soc/s3c24xx/s3c64xx-i2s.c @@ -120,36 +120,8 @@ EXPORT_SYMBOL_GPL(s3c64xx_i2s_get_clockrate); static int s3c64xx_i2s_probe(struct platform_device *pdev, struct snd_soc_dai *dai) { - struct device *dev = &pdev->dev; - struct s3c_i2sv2_info *i2s; - int ret; - - dev_dbg(dev, "%s: probing dai %d\n", __func__, pdev->id); - - if (pdev->id < 0 || pdev->id > ARRAY_SIZE(s3c64xx_i2s)) { - dev_err(dev, "id %d out of range\n", pdev->id); - return -EINVAL; - } - - i2s = &s3c64xx_i2s[pdev->id]; - - ret = s3c_i2sv2_probe(pdev, dai, i2s, - pdev->id ? S3C64XX_PA_IIS1 : S3C64XX_PA_IIS0); - if (ret) - return ret; - - i2s->dma_capture = &s3c64xx_i2s_pcm_stereo_in[pdev->id]; - i2s->dma_playback = &s3c64xx_i2s_pcm_stereo_out[pdev->id]; - - i2s->iis_cclk = clk_get(dev, "audio-bus"); - if (IS_ERR(i2s->iis_cclk)) { - dev_err(dev, "failed to get audio-bus"); - iounmap(i2s->regs); - return -ENODEV; - } - /* configure GPIO for i2s port */ - switch (pdev->id) { + switch (dai->id) { case 0: s3c_gpio_cfgpin(S3C64XX_GPD(0), S3C64XX_GPD0_I2S0_CLK); s3c_gpio_cfgpin(S3C64XX_GPD(1), S3C64XX_GPD1_I2S0_CDCLK); @@ -181,35 +153,114 @@ static struct snd_soc_dai_ops s3c64xx_i2s_dai_ops = { .set_sysclk = s3c64xx_i2s_set_sysclk, }; -struct snd_soc_dai s3c64xx_i2s_dai = { - .name = "s3c64xx-i2s", - .id = 0, - .probe = s3c64xx_i2s_probe, - .playback = { - .channels_min = 2, - .channels_max = 2, - .rates = S3C64XX_I2S_RATES, - .formats = S3C64XX_I2S_FMTS, +struct snd_soc_dai s3c64xx_i2s_dai[] = { + { + .name = "s3c64xx-i2s", + .id = 0, + .probe = s3c64xx_i2s_probe, + .playback = { + .channels_min = 2, + .channels_max = 2, + .rates = S3C64XX_I2S_RATES, + .formats = S3C64XX_I2S_FMTS, + }, + .capture = { + .channels_min = 2, + .channels_max = 2, + .rates = S3C64XX_I2S_RATES, + .formats = S3C64XX_I2S_FMTS, + }, + .ops = &s3c64xx_i2s_dai_ops, }, - .capture = { - .channels_min = 2, - .channels_max = 2, - .rates = S3C64XX_I2S_RATES, - .formats = S3C64XX_I2S_FMTS, + { + .name = "s3c64xx-i2s", + .id = 1, + .probe = s3c64xx_i2s_probe, + .playback = { + .channels_min = 2, + .channels_max = 2, + .rates = S3C64XX_I2S_RATES, + .formats = S3C64XX_I2S_FMTS, + }, + .capture = { + .channels_min = 2, + .channels_max = 2, + .rates = S3C64XX_I2S_RATES, + .formats = S3C64XX_I2S_FMTS, + }, + .ops = &s3c64xx_i2s_dai_ops, }, - .ops = &s3c64xx_i2s_dai_ops, }; EXPORT_SYMBOL_GPL(s3c64xx_i2s_dai); +static __devinit int s3c64xx_iis_dev_probe(struct platform_device *pdev) +{ + struct s3c_i2sv2_info *i2s; + struct snd_soc_dai *dai; + int ret; + + if (pdev->id >= ARRAY_SIZE(s3c64xx_i2s)) { + dev_err(&pdev->dev, "id %d out of range\n", pdev->id); + return -EINVAL; + } + + i2s = &s3c64xx_i2s[pdev->id]; + dai = &s3c64xx_i2s_dai[pdev->id]; + dai->dev = &pdev->dev; + + i2s->dma_capture = &s3c64xx_i2s_pcm_stereo_in[pdev->id]; + i2s->dma_playback = &s3c64xx_i2s_pcm_stereo_out[pdev->id]; + + i2s->iis_cclk = clk_get(&pdev->dev, "audio-bus"); + if (IS_ERR(i2s->iis_cclk)) { + dev_err(&pdev->dev, "failed to get audio-bus"); + ret = PTR_ERR(i2s->iis_cclk); + goto err; + } + + ret = s3c_i2sv2_probe(pdev, dai, i2s, + dai->id ? S3C64XX_PA_IIS1 : S3C64XX_PA_IIS0); + if (ret) + goto err_clk; + + ret = snd_soc_register_dai(dai); + if (ret != 0) + goto err_i2sv2; + + return 0; + +err_i2sv2: + /* Not implemented for I2Sv2 core yet */ +err_clk: + clk_put(i2s->iis_cclk); +err: + return ret; +} + +static __devexit int s3c64xx_iis_dev_remove(struct platform_device *pdev) +{ + dev_err(&pdev->dev, "Device removal not yet supported\n"); + return 0; +} + +static struct platform_driver s3c64xx_iis_driver = { + .probe = s3c64xx_iis_dev_probe, + .remove = s3c64xx_iis_dev_remove, + .driver = { + .name = "s3c64xx-iis", + .owner = THIS_MODULE, + }, +}; + static int __init s3c64xx_i2s_init(void) { - return s3c_i2sv2_register_dai(&s3c64xx_i2s_dai); + return platform_driver_register(&s3c64xx_iis_driver); } module_init(s3c64xx_i2s_init); static void __exit s3c64xx_i2s_exit(void) { - snd_soc_unregister_dai(&s3c64xx_i2s_dai); + platform_driver_unregister(&s3c64xx_iis_driver); } module_exit(s3c64xx_i2s_exit); @@ -217,6 +268,3 @@ module_exit(s3c64xx_i2s_exit); MODULE_AUTHOR("Ben Dooks, "); MODULE_DESCRIPTION("S3C64XX I2S SoC Interface"); MODULE_LICENSE("GPL"); - - - diff --git a/sound/soc/s3c24xx/s3c64xx-i2s.h b/sound/soc/s3c24xx/s3c64xx-i2s.h index b7ffe3c..597822a 100644 --- a/sound/soc/s3c24xx/s3c64xx-i2s.h +++ b/sound/soc/s3c24xx/s3c64xx-i2s.h @@ -24,7 +24,7 @@ #define S3C64XX_CLKSRC_PCLK (0) #define S3C64XX_CLKSRC_MUX (1) -extern struct snd_soc_dai s3c64xx_i2s_dai; +extern struct snd_soc_dai s3c64xx_i2s_dai[]; extern unsigned long s3c64xx_i2s_get_clockrate(struct snd_soc_dai *cpu_dai); -- cgit v0.10.2 From 008bec397cdabd22a6f4e4c16a746a86a046f8af Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 24 Apr 2009 16:27:09 +0100 Subject: ASoC: S3C2412: Failing to get the I2S clock is an error Signed-off-by: Mark Brown diff --git a/sound/soc/s3c24xx/s3c2412-i2s.c b/sound/soc/s3c24xx/s3c2412-i2s.c index b7e0b3f..168a088 100644 --- a/sound/soc/s3c24xx/s3c2412-i2s.c +++ b/sound/soc/s3c24xx/s3c2412-i2s.c @@ -120,7 +120,7 @@ static int s3c2412_i2s_probe(struct platform_device *pdev, s3c2412_i2s.iis_cclk = clk_get(&pdev->dev, "i2sclk"); if (s3c2412_i2s.iis_cclk == NULL) { - pr_debug("failed to get i2sclk clock\n"); + pr_err("failed to get i2sclk clock\n"); iounmap(s3c2412_i2s.regs); return -ENODEV; } -- cgit v0.10.2 From 060fa5c83e67901ba47ab484cfcdb32737d630ba Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 24 Apr 2009 12:20:52 -0400 Subject: tracing/events: reuse trace event ids after overflow With modules being able to add trace events, and the max trace event counter is 16 bits (65536) we can overflow the counter easily with a simple while loop adding and removing modules that contain trace events. This patch links together the registered trace events and on overflow searches for available trace event ids. It will still fail if over 65536 events are registered, but considering that a typical kernel only has 22000 functions, 65000 events should be sufficient. Reported-by: Li Zefan Signed-off-by: Steven Rostedt diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 07e0a6d..78a9ba2 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -56,6 +56,7 @@ typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter, int flags); struct trace_event { struct hlist_node node; + struct list_head list; int type; trace_print_func trace; trace_print_func raw; diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 06997e7..5fc51f0 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -483,6 +483,36 @@ struct trace_event *ftrace_find_event(int type) return NULL; } +static LIST_HEAD(ftrace_event_list); + +static int trace_search_list(struct list_head **list) +{ + struct trace_event *e; + int last = __TRACE_LAST_TYPE; + + if (list_empty(&ftrace_event_list)) { + *list = &ftrace_event_list; + return last + 1; + } + + /* + * We used up all possible max events, + * lets see if somebody freed one. + */ + list_for_each_entry(e, &ftrace_event_list, list) { + if (e->type != last + 1) + break; + last++; + } + + /* Did we used up all 65 thousand events??? */ + if ((last + 1) > FTRACE_MAX_EVENT) + return 0; + + *list = &e->list; + return last + 1; +} + /** * register_ftrace_event - register output for an event type * @event: the event type to register @@ -505,20 +535,40 @@ int register_ftrace_event(struct trace_event *event) mutex_lock(&trace_event_mutex); - if (!event) { - ret = next_event_type++; + if (WARN_ON(!event)) goto out; - } - if (!event->type) - event->type = next_event_type++; - else if (event->type > __TRACE_LAST_TYPE) { + INIT_LIST_HEAD(&event->list); + + if (!event->type) { + struct list_head *list; + + if (next_event_type > FTRACE_MAX_EVENT) { + + event->type = trace_search_list(&list); + if (!event->type) + goto out; + + } else { + + event->type = next_event_type++; + list = &ftrace_event_list; + } + + if (WARN_ON(ftrace_find_event(event->type))) + goto out; + + list_add_tail(&event->list, list); + + } else if (event->type > __TRACE_LAST_TYPE) { printk(KERN_WARNING "Need to add type to trace.h\n"); WARN_ON(1); - } - - if (ftrace_find_event(event->type)) goto out; + } else { + /* Is this event already used */ + if (ftrace_find_event(event->type)) + goto out; + } if (event->trace == NULL) event->trace = trace_nop_print; @@ -537,8 +587,6 @@ int register_ftrace_event(struct trace_event *event) out: mutex_unlock(&trace_event_mutex); - WARN_ON_ONCE(next_event_type > FTRACE_MAX_EVENT); - return ret; } EXPORT_SYMBOL_GPL(register_ftrace_event); @@ -551,6 +599,7 @@ int unregister_ftrace_event(struct trace_event *event) { mutex_lock(&trace_event_mutex); hlist_del(&event->node); + list_del(&event->list); mutex_unlock(&trace_event_mutex); return 0; -- cgit v0.10.2 From 79ffab34391933ee3b95dac7f25c0478fa2f8f1e Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 13 May 2009 15:13:42 -0400 Subject: ext4: Properly initialize the buffer_head state These struct buffer_heads are allocated on the stack (and hence are initialized with stack garbage). They are only used to call a get_blocks() function, so that's mostly OK, but b_state must be initialized to be 0 so we don't have any unexpected BH_* flags set by accident, such as BH_Unwritten or BH_Delay. Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index e3a55eb..a953214 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3150,6 +3150,7 @@ retry: ret = PTR_ERR(handle); break; } + map_bh.b_state = 0; ret = ext4_get_blocks_wrap(handle, inode, block, max_blocks, &map_bh, EXT4_CREATE_UNINITIALIZED_EXT, 0, 0); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 2a9ffd5..d7ad0bb 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2055,7 +2055,20 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) if ((mpd->b_state & (1 << BH_Mapped)) && !(mpd->b_state & (1 << BH_Delay))) return 0; - new.b_state = mpd->b_state; + /* + * We need to make sure the BH_Delay flag is passed down to + * ext4_da_get_block_write(), since it calls + * ext4_get_blocks_wrap() with the EXT4_DELALLOC_RSVED flag. + * This flag causes ext4_get_blocks_wrap() to call + * ext4_da_update_reserve_space() if the passed buffer head + * has the BH_Delay flag set. In the future, once we clean up + * the interfaces to ext4_get_blocks_wrap(), we should pass in + * a separate flag which requests that the delayed allocation + * statistics should be updated, instead of depending on the + * state information getting passed down via the map_bh's + * state bitmasks plus the magic EXT4_DELALLOC_RSVED flag. + */ + new.b_state = mpd->b_state & (1 << BH_Delay); new.b_blocknr = 0; new.b_size = mpd->b_size; next = mpd->b_blocknr; diff --git a/fs/mpage.c b/fs/mpage.c index 680ba60..42381bd 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -379,7 +379,8 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages, struct buffer_head map_bh; unsigned long first_logical_block = 0; - clear_buffer_mapped(&map_bh); + map_bh.b_state = 0; + map_bh.b_size = 0; for (page_idx = 0; page_idx < nr_pages; page_idx++) { struct page *page = list_entry(pages->prev, struct page, lru); @@ -412,7 +413,8 @@ int mpage_readpage(struct page *page, get_block_t get_block) struct buffer_head map_bh; unsigned long first_logical_block = 0; - clear_buffer_mapped(&map_bh); + map_bh.b_state = 0; + map_bh.b_size = 0; bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio, &map_bh, &first_logical_block, get_block); if (bio) -- cgit v0.10.2 From 8fb0e342481c4d80040670fec915f0b9c7c6499a Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 12 May 2009 16:22:37 -0400 Subject: vfs: Add BUG_ON for delayed and unwritten flags in submit_bh() The BH_Delay and BH_Unwritten flags should never leak out to submit_bh(). So add some BUG_ON() checks to submit_bh so we can get a stack trace and determine how and why this might have happened. (Note that only XFS and ext4 use these buffer head flags, and XFS does not use submit_bh(). So this patch should only modify behavior for ext4.) Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" Cc: linux-fsdevel@vger.kernel.org diff --git a/fs/buffer.c b/fs/buffer.c index aed2977..ad01129 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2933,6 +2933,8 @@ int submit_bh(int rw, struct buffer_head * bh) BUG_ON(!buffer_locked(bh)); BUG_ON(!buffer_mapped(bh)); BUG_ON(!bh->b_end_io); + BUG_ON(buffer_delay(bh)); + BUG_ON(buffer_unwritten(bh)); /* * Mask in barrier bit for a write (could be either a WRITE or a -- cgit v0.10.2 From 29fa89d088941d79765d60f22d5ccdd6b8696e11 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 12 May 2009 16:30:27 -0400 Subject: ext4: Mark the unwritten buffer_head as mapped during write_begin Setting BH_Unwritten buffer_heads as BH_Mapped avoids multiple (unnecessary) calls to get_block() during the call to the write(2) system call. Setting BH_Unwritten buffer heads as BH_Mapped requires that the writepages() functions can handle BH_Unwritten buffer_heads. After this commit, things work as follows: ext4_ext_get_block() returns unmapped, unwritten, buffer head when called with create = 0 for prealloc space. This makes sure we handle the read path and non-delayed allocation case correctly. Even though the buffer head is marked unmapped we have valid b_blocknr and b_bdev values in the buffer_head. ext4_da_get_block_prep() called for block resrevation will now return mapped, unwritten, new buffer_head for prealloc space. This avoids multiple calls to get_block() for write to same offset. By making such buffers as BH_New, we also assure that sub-block zeroing of buffered writes happens correctly. Signed-off-by: Aneesh Kumar K.V Signed-off-by: "Theodore Ts'o" diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index a953214..ea5c476 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2872,6 +2872,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, if (create == EXT4_CREATE_UNINITIALIZED_EXT) goto out; if (!create) { + if (allocated > max_blocks) + allocated = max_blocks; /* * We have blocks reserved already. We * return allocated blocks so that delalloc @@ -2879,8 +2881,6 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, * the buffer head will be unmapped so that * a read from the block returns 0s. */ - if (allocated > max_blocks) - allocated = max_blocks; set_buffer_unwritten(bh_result); bh_result->b_bdev = inode->i_sb->s_bdev; bh_result->b_blocknr = newblock; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d7ad0bb..96f3366 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1852,7 +1852,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) * @logical - first logical block to start assignment with * * the function goes through all passed space and put actual disk - * block numbers into buffer heads, dropping BH_Delay + * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten */ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, struct buffer_head *exbh) @@ -1902,16 +1902,24 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, do { if (cur_logical >= logical + blocks) break; - if (buffer_delay(bh)) { - bh->b_blocknr = pblock; - clear_buffer_delay(bh); - bh->b_bdev = inode->i_sb->s_bdev; - } else if (buffer_unwritten(bh)) { - bh->b_blocknr = pblock; - clear_buffer_unwritten(bh); - set_buffer_mapped(bh); - set_buffer_new(bh); - bh->b_bdev = inode->i_sb->s_bdev; + + if (buffer_delay(bh) || + buffer_unwritten(bh)) { + + BUG_ON(bh->b_bdev != inode->i_sb->s_bdev); + + if (buffer_delay(bh)) { + clear_buffer_delay(bh); + bh->b_blocknr = pblock; + } else { + /* + * unwritten already should have + * blocknr assigned. Verify that + */ + clear_buffer_unwritten(bh); + BUG_ON(bh->b_blocknr != pblock); + } + } else if (buffer_mapped(bh)) BUG_ON(bh->b_blocknr != pblock); @@ -2053,7 +2061,8 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) * We consider only non-mapped and non-allocated blocks */ if ((mpd->b_state & (1 << BH_Mapped)) && - !(mpd->b_state & (1 << BH_Delay))) + !(mpd->b_state & (1 << BH_Delay)) && + !(mpd->b_state & (1 << BH_Unwritten))) return 0; /* * We need to make sure the BH_Delay flag is passed down to @@ -2205,6 +2214,17 @@ flush_it: return; } +static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) +{ + /* + * unmapped buffer is possible for holes. + * delay buffer is possible with delayed allocation. + * We also need to consider unwritten buffer as unmapped. + */ + return (!buffer_mapped(bh) || buffer_delay(bh) || + buffer_unwritten(bh)) && buffer_dirty(bh); +} + /* * __mpage_da_writepage - finds extent of pages and blocks * @@ -2289,8 +2309,7 @@ static int __mpage_da_writepage(struct page *page, * Otherwise we won't make progress * with the page in ext4_da_writepage */ - if (buffer_dirty(bh) && - (!buffer_mapped(bh) || buffer_delay(bh))) { + if (ext4_bh_unmapped_or_delay(NULL, bh)) { mpage_add_bh_to_extent(mpd, logical, bh->b_size, bh->b_state); @@ -2318,6 +2337,14 @@ static int __mpage_da_writepage(struct page *page, /* * this is a special callback for ->write_begin() only * it's intention is to return mapped block or reserve space + * + * For delayed buffer_head we have BH_Mapped, BH_New, BH_Delay set. + * We also have b_blocknr = -1 and b_bdev initialized properly + * + * For unwritten buffer_head we have BH_Mapped, BH_New, BH_Unwritten set. + * We also have b_blocknr = physicalblock mapping unwritten extent and b_bdev + * initialized properly. + * */ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) @@ -2353,28 +2380,23 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, set_buffer_delay(bh_result); } else if (ret > 0) { bh_result->b_size = (ret << inode->i_blkbits); - /* - * With sub-block writes into unwritten extents - * we also need to mark the buffer as new so that - * the unwritten parts of the buffer gets correctly zeroed. - */ - if (buffer_unwritten(bh_result)) + if (buffer_unwritten(bh_result)) { + /* A delayed write to unwritten bh should + * be marked new and mapped. Mapped ensures + * that we don't do get_block multiple times + * when we write to the same offset and new + * ensures that we do proper zero out for + * partial write. + */ set_buffer_new(bh_result); + set_buffer_mapped(bh_result); + } ret = 0; } return ret; } -static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) -{ - /* - * unmapped buffer is possible for holes. - * delay buffer is possible with delayed allocation - */ - return ((!buffer_mapped(bh) || buffer_delay(bh)) && buffer_dirty(bh)); -} - static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { @@ -2828,7 +2850,7 @@ static int ext4_da_should_update_i_disksize(struct page *page, for (i = 0; i < idx; i++) bh = bh->b_this_page; - if (!buffer_mapped(bh) || (buffer_delay(bh))) + if (!buffer_mapped(bh) || (buffer_delay(bh)) || buffer_unwritten(bh)) return 0; return 1; } -- cgit v0.10.2 From c5ca7c7636fa689a9746b6032f83aa7fffec31c6 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 27 Apr 2009 22:48:48 -0400 Subject: ext4: Fallback to vmalloc if kmalloc can't allocate s_flex_groups array For very large filesystems, the s_flex_groups array can get quite big. For example, a filesystem that can be resized up to 16TB will have 8192 flex groups (assuming the default flex_bg size of 16), so the array is 96k, which is *very* marginal for kmalloc(). On the other hand, a 160GB filesystem without the resize_inode feature will only require 960 bytes. So we try to allocate the array first using kmalloc(), and if that fails, we'll try to use vmalloc() instead. Signed-off-by: "Theodore Ts'o" diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 2958f4e..3f4475d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -586,7 +587,10 @@ static void ext4_put_super(struct super_block *sb) for (i = 0; i < sbi->s_gdb_count; i++) brelse(sbi->s_group_desc[i]); kfree(sbi->s_group_desc); - kfree(sbi->s_flex_groups); + if (is_vmalloc_addr(sbi->s_flex_groups)) + vfree(sbi->s_flex_groups); + else + kfree(sbi->s_flex_groups); percpu_counter_destroy(&sbi->s_freeblocks_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); @@ -1620,6 +1624,7 @@ static int ext4_fill_flex_info(struct super_block *sb) ext4_group_t flex_group_count; ext4_group_t flex_group; int groups_per_flex = 0; + size_t size; int i; if (!sbi->s_es->s_log_groups_per_flex) { @@ -1634,8 +1639,13 @@ static int ext4_fill_flex_info(struct super_block *sb) flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex; - sbi->s_flex_groups = kzalloc(flex_group_count * - sizeof(struct flex_groups), GFP_KERNEL); + size = flex_group_count * sizeof(struct flex_groups); + sbi->s_flex_groups = kzalloc(size, GFP_KERNEL); + if (sbi->s_flex_groups == NULL) { + sbi->s_flex_groups = vmalloc(size); + if (sbi->s_flex_groups) + memset(sbi->s_flex_groups, 0, size); + } if (sbi->s_flex_groups == NULL) { printk(KERN_ERR "EXT4-fs: not enough memory for " "%u flex groups\n", flex_group_count); @@ -2842,6 +2852,12 @@ failed_mount4: sbi->s_journal = NULL; } failed_mount3: + if (sbi->s_flex_groups) { + if (is_vmalloc_addr(sbi->s_flex_groups)) + vfree(sbi->s_flex_groups); + else + kfree(sbi->s_flex_groups); + } percpu_counter_destroy(&sbi->s_freeblocks_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); -- cgit v0.10.2 From f7c439504ccba0cca43271e651013ab97a221c62 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 24 Apr 2009 23:31:59 -0400 Subject: ext4: Use is_power_of_2() for clarity Signed-off-by: Robert P. J. Day Signed-off-by: "Theodore Ts'o" diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 3f4475d..3e509bc 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1483,7 +1483,7 @@ set_qf_format: return 0; if (option < 0 || option > (1 << 30)) return 0; - if (option & (option - 1)) { + if (!is_power_of_2(option)) { printk(KERN_ERR "EXT4-fs: inode_readahead_blks" " must be a power of 2\n"); return 0; @@ -2101,8 +2101,7 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a, if (parse_strtoul(buf, 0x40000000, &t)) return -EINVAL; - /* inode_readahead_blks must be a power of 2 */ - if (t & (t-1)) + if (!is_power_of_2(t)) return -EINVAL; sbi->s_inode_readahead_blks = t; -- cgit v0.10.2 From e2d670523c6c4ccb0fca9f3ab1b8f066d9aa57d6 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 1 May 2009 00:33:44 -0400 Subject: ext4: Simplify ext4_commit_super()'s function signature The ext4_commit_super() function took both a struct super_block * and a struct ext4_super_block *, but the struct ext4_super_block can be derived from the struct super_block. Signed-off-by: "Theodore Ts'o" diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 3e509bc..ad4c9be 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -54,8 +54,7 @@ static struct kset *ext4_kset; static int ext4_load_journal(struct super_block *, struct ext4_super_block *, unsigned long journal_devnum); -static int ext4_commit_super(struct super_block *sb, - struct ext4_super_block *es, int sync); +static int ext4_commit_super(struct super_block *sb, int sync); static void ext4_mark_recovery_complete(struct super_block *sb, struct ext4_super_block *es); static void ext4_clear_journal_err(struct super_block *sb, @@ -306,7 +305,7 @@ static void ext4_handle_error(struct super_block *sb) printk(KERN_CRIT "Remounting filesystem read-only\n"); sb->s_flags |= MS_RDONLY; } - ext4_commit_super(sb, es, 1); + ext4_commit_super(sb, 1); if (test_opt(sb, ERRORS_PANIC)) panic("EXT4-fs (device %s): panic forced after error\n", sb->s_id); @@ -448,7 +447,7 @@ __acquires(bitlock) if (test_opt(sb, ERRORS_CONT)) { EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; es->s_state |= cpu_to_le16(EXT4_ERROR_FS); - ext4_commit_super(sb, es, 0); + ext4_commit_super(sb, 0); return; } ext4_unlock_group(sb, grp); @@ -577,7 +576,7 @@ static void ext4_put_super(struct super_block *sb) if (!(sb->s_flags & MS_RDONLY)) { EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); es->s_state = cpu_to_le16(sbi->s_mount_state); - ext4_commit_super(sb, es, 1); + ext4_commit_super(sb, 1); } if (sbi->s_proc) { remove_proc_entry(sb->s_id, ext4_proc_root); @@ -1596,7 +1595,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, if (sbi->s_journal) EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); - ext4_commit_super(sb, es, 1); + ext4_commit_super(sb, 1); if (test_opt(sb, DEBUG)) printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " "bpg=%lu, ipg=%lu, mo=%04lx]\n", @@ -2655,7 +2654,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (test_opt(sb, ERRORS_PANIC)) { EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; es->s_state |= cpu_to_le16(EXT4_ERROR_FS); - ext4_commit_super(sb, es, 1); + ext4_commit_super(sb, 1); goto failed_mount4; } } @@ -3132,15 +3131,15 @@ static int ext4_load_journal(struct super_block *sb, sb->s_dirt = 1; /* Make sure we flush the recovery flag to disk. */ - ext4_commit_super(sb, es, 1); + ext4_commit_super(sb, 1); } return 0; } -static int ext4_commit_super(struct super_block *sb, - struct ext4_super_block *es, int sync) +static int ext4_commit_super(struct super_block *sb, int sync) { + struct ext4_super_block *es = EXT4_SB(sb)->s_es; struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; int error = 0; @@ -3212,7 +3211,7 @@ static void ext4_mark_recovery_complete(struct super_block *sb, sb->s_flags & MS_RDONLY) { EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); sb->s_dirt = 0; - ext4_commit_super(sb, es, 1); + ext4_commit_super(sb, 1); } unlock_super(sb); @@ -3253,7 +3252,7 @@ static void ext4_clear_journal_err(struct super_block *sb, EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; es->s_state |= cpu_to_le16(EXT4_ERROR_FS); - ext4_commit_super(sb, es, 1); + ext4_commit_super(sb, 1); jbd2_journal_clear_err(journal); } @@ -3293,7 +3292,7 @@ static void ext4_write_super(struct super_block *sb) BUG(); sb->s_dirt = 0; } else { - ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); + ext4_commit_super(sb, 1); } } @@ -3312,7 +3311,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait) target); } } else { - ext4_commit_super(sb, EXT4_SB(sb)->s_es, wait); + ext4_commit_super(sb, wait); } return ret; } @@ -3345,7 +3344,7 @@ static int ext4_freeze(struct super_block *sb) /* Journal blocked and flushed, clear needs_recovery flag. */ EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); - error = ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); + error = ext4_commit_super(sb, 1); if (error) goto out; } @@ -3365,7 +3364,7 @@ static int ext4_unfreeze(struct super_block *sb) lock_super(sb); /* Reser the needs_recovery flag before the fs is unlocked. */ EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); - ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); + ext4_commit_super(sb, 1); unlock_super(sb); jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); } @@ -3520,7 +3519,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) } } if (sbi->s_journal == NULL) - ext4_commit_super(sb, es, 1); + ext4_commit_super(sb, 1); #ifdef CONFIG_QUOTA /* Release old quota file names */ -- cgit v0.10.2 From 7234ab2a55e77784b44cf2d862136d9e41b8d98a Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 30 Apr 2009 21:24:04 -0400 Subject: ext4: Fix and simplify s_dirt handling The s_dirt flag wasn't completely handled correctly, but it didn't really matter when journalling was enabled. It turns out that when ext4 runs without a journal, we don't clear s_dirt in places where we should have, with the result that the high-level write_super() function was writing the superblock when it wasn't necessary. So we fix this by making ext4_commit_super() clear the s_dirt flag, and removing many of the other places where s_dirt is manipulated. When journalling is enabled, the s_dirt flag might be left set more often, but s_dirt really doesn't matter when journalling is enabled. Signed-off-by: "Theodore Ts'o" diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ad4c9be..7c7a08a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3128,7 +3128,6 @@ static int ext4_load_journal(struct super_block *sb, if (journal_devnum && journal_devnum != le32_to_cpu(es->s_journal_dev)) { es->s_journal_dev = cpu_to_le32(journal_devnum); - sb->s_dirt = 1; /* Make sure we flush the recovery flag to disk. */ ext4_commit_super(sb, 1); @@ -3168,7 +3167,7 @@ static int ext4_commit_super(struct super_block *sb, int sync) &EXT4_SB(sb)->s_freeblocks_counter)); es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( &EXT4_SB(sb)->s_freeinodes_counter)); - + sb->s_dirt = 0; BUFFER_TRACE(sbh, "marking dirty"); mark_buffer_dirty(sbh); if (sync) { @@ -3210,7 +3209,6 @@ static void ext4_mark_recovery_complete(struct super_block *sb, if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && sb->s_flags & MS_RDONLY) { EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); - sb->s_dirt = 0; ext4_commit_super(sb, 1); } unlock_super(sb); @@ -3271,10 +3269,8 @@ int ext4_force_commit(struct super_block *sb) return 0; journal = EXT4_SB(sb)->s_journal; - if (journal) { - sb->s_dirt = 0; + if (journal) ret = ext4_journal_force_commit(journal); - } return ret; } @@ -3282,15 +3278,13 @@ int ext4_force_commit(struct super_block *sb) /* * Ext4 always journals updates to the superblock itself, so we don't * have to propagate any other updates to the superblock on disk at this - * point. (We can probably nuke this function altogether, and remove - * any mention to sb->s_dirt in all of fs/ext4; eventual cleanup...) + * point if the journalling is enabled. */ static void ext4_write_super(struct super_block *sb) { if (EXT4_SB(sb)->s_journal) { if (mutex_trylock(&sb->s_lock) != 0) BUG(); - sb->s_dirt = 0; } else { ext4_commit_super(sb, 1); } @@ -3302,7 +3296,6 @@ static int ext4_sync_fs(struct super_block *sb, int wait) tid_t target; trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); - sb->s_dirt = 0; if (EXT4_SB(sb)->s_journal) { if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { @@ -3324,7 +3317,6 @@ static int ext4_freeze(struct super_block *sb) { int error = 0; journal_t *journal; - sb->s_dirt = 0; if (!(sb->s_flags & MS_RDONLY)) { journal = EXT4_SB(sb)->s_journal; -- cgit v0.10.2 From 9ca92389c5312a51e819c15c762f0abdc7f3129b Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 1 May 2009 12:52:25 -0400 Subject: ext4: Use separate super_operations structure for no_journal filesystems By using a separate super_operations structure for filesystems that have and don't have journals, we can simply ext4_write_super() --- which is only needed when no journal is present --- and ext4_freeze(), ext4_unfreeze(), and ext4_sync_fs(), which are only needed when the journal is present. Signed-off-by: "Theodore Ts'o" diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 7c7a08a..68c3a44 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -995,7 +995,6 @@ static const struct super_operations ext4_sops = { .dirty_inode = ext4_dirty_inode, .delete_inode = ext4_delete_inode, .put_super = ext4_put_super, - .write_super = ext4_write_super, .sync_fs = ext4_sync_fs, .freeze_fs = ext4_freeze, .unfreeze_fs = ext4_unfreeze, @@ -1010,6 +1009,25 @@ static const struct super_operations ext4_sops = { .bdev_try_to_free_page = bdev_try_to_free_page, }; +static const struct super_operations ext4_nojournal_sops = { + .alloc_inode = ext4_alloc_inode, + .destroy_inode = ext4_destroy_inode, + .write_inode = ext4_write_inode, + .dirty_inode = ext4_dirty_inode, + .delete_inode = ext4_delete_inode, + .write_super = ext4_write_super, + .put_super = ext4_put_super, + .statfs = ext4_statfs, + .remount_fs = ext4_remount, + .clear_inode = ext4_clear_inode, + .show_options = ext4_show_options, +#ifdef CONFIG_QUOTA + .quota_read = ext4_quota_read, + .quota_write = ext4_quota_write, +#endif + .bdev_try_to_free_page = bdev_try_to_free_page, +}; + static const struct export_operations ext4_export_ops = { .fh_to_dentry = ext4_fh_to_dentry, .fh_to_parent = ext4_fh_to_parent, @@ -2615,7 +2633,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) /* * set up enough so that it can read an inode */ - sb->s_op = &ext4_sops; + if (!test_opt(sb, NOLOAD) && + EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) + sb->s_op = &ext4_sops; + else + sb->s_op = &ext4_nojournal_sops; sb->s_export_op = &ext4_export_ops; sb->s_xattr = ext4_xattr_handlers; #ifdef CONFIG_QUOTA @@ -3275,19 +3297,9 @@ int ext4_force_commit(struct super_block *sb) return ret; } -/* - * Ext4 always journals updates to the superblock itself, so we don't - * have to propagate any other updates to the superblock on disk at this - * point if the journalling is enabled. - */ static void ext4_write_super(struct super_block *sb) { - if (EXT4_SB(sb)->s_journal) { - if (mutex_trylock(&sb->s_lock) != 0) - BUG(); - } else { - ext4_commit_super(sb, 1); - } + ext4_commit_super(sb, 1); } static int ext4_sync_fs(struct super_block *sb, int wait) @@ -3296,15 +3308,9 @@ static int ext4_sync_fs(struct super_block *sb, int wait) tid_t target; trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); - if (EXT4_SB(sb)->s_journal) { - if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, - &target)) { - if (wait) - jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, - target); - } - } else { - ext4_commit_super(sb, wait); + if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { + if (wait) + jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target); } return ret; } @@ -3318,32 +3324,31 @@ static int ext4_freeze(struct super_block *sb) int error = 0; journal_t *journal; - if (!(sb->s_flags & MS_RDONLY)) { - journal = EXT4_SB(sb)->s_journal; + if (sb->s_flags & MS_RDONLY) + return 0; - if (journal) { - /* Now we set up the journal barrier. */ - jbd2_journal_lock_updates(journal); + journal = EXT4_SB(sb)->s_journal; - /* - * We don't want to clear needs_recovery flag when we - * failed to flush the journal. - */ - error = jbd2_journal_flush(journal); - if (error < 0) - goto out; - } + /* Now we set up the journal barrier. */ + jbd2_journal_lock_updates(journal); - /* Journal blocked and flushed, clear needs_recovery flag. */ - EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); - error = ext4_commit_super(sb, 1); - if (error) - goto out; + /* + * Don't clear the needs_recovery flag if we failed to flush + * the journal. + */ + error = jbd2_journal_flush(journal); + if (error < 0) { + out: + jbd2_journal_unlock_updates(journal); + return error; } + + /* Journal blocked and flushed, clear needs_recovery flag. */ + EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); + error = ext4_commit_super(sb, 1); + if (error) + goto out; return 0; -out: - jbd2_journal_unlock_updates(journal); - return error; } /* @@ -3352,14 +3357,15 @@ out: */ static int ext4_unfreeze(struct super_block *sb) { - if (EXT4_SB(sb)->s_journal && !(sb->s_flags & MS_RDONLY)) { - lock_super(sb); - /* Reser the needs_recovery flag before the fs is unlocked. */ - EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); - ext4_commit_super(sb, 1); - unlock_super(sb); - jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); - } + if (sb->s_flags & MS_RDONLY) + return 0; + + lock_super(sb); + /* Reset the needs_recovery flag before the fs is unlocked. */ + EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); + ext4_commit_super(sb, 1); + unlock_super(sb); + jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); return 0; } -- cgit v0.10.2 From 8df9675f8b498d0bfa1f0b5b06f56bf1ff366dd5 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 1 May 2009 08:50:38 -0400 Subject: ext4: Avoid races caused by on-line resizing and SMP memory reordering Ext4's on-line resizing adds a new block group and then, only at the last step adjusts s_groups_count. However, it's possible on SMP systems that another CPU could see the updated the s_group_count and not see the newly initialized data structures for the just-added block group. For this reason, it's important to insert a SMP read barrier after reading s_groups_count and before reading any (for example) the new block group descriptors allowed by the increased value of s_groups_count. Unfortunately, we rather blatently violate this locking protocol documented in fs/ext4/resize.c. Fortunately, (1) on-line resizes happen relatively rarely, and (2) it seems rare that the filesystem code will immediately try to use just-added block group before any memory ordering issues resolve themselves. So apparently problems here are relatively hard to hit, since ext3 has been vulnerable to the same issue for years with no one apparently complaining. Signed-off-by: "Theodore Ts'o" diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 53c72ad..a5ba039 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -88,6 +88,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, ext4_group_t block_group, struct ext4_group_desc *gdp) { int bit, bit_max; + ext4_group_t ngroups = ext4_get_groups_count(sb); unsigned free_blocks, group_blocks; struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -123,7 +124,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, bit_max += ext4_bg_num_gdb(sb, block_group); } - if (block_group == sbi->s_groups_count - 1) { + if (block_group == ngroups - 1) { /* * Even though mke2fs always initialize first and last group * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need @@ -131,7 +132,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, */ group_blocks = ext4_blocks_count(sbi->s_es) - le32_to_cpu(sbi->s_es->s_first_data_block) - - (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count - 1)); + (EXT4_BLOCKS_PER_GROUP(sb) * (ngroups - 1)); } else { group_blocks = EXT4_BLOCKS_PER_GROUP(sb); } @@ -205,18 +206,18 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, { unsigned int group_desc; unsigned int offset; + ext4_group_t ngroups = ext4_get_groups_count(sb); struct ext4_group_desc *desc; struct ext4_sb_info *sbi = EXT4_SB(sb); - if (block_group >= sbi->s_groups_count) { + if (block_group >= ngroups) { ext4_error(sb, "ext4_get_group_desc", "block_group >= groups_count - " "block_group = %u, groups_count = %u", - block_group, sbi->s_groups_count); + block_group, ngroups); return NULL; } - smp_rmb(); group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); @@ -665,7 +666,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) ext4_fsblk_t desc_count; struct ext4_group_desc *gdp; ext4_group_t i; - ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; + ext4_group_t ngroups = ext4_get_groups_count(sb); #ifdef EXT4FS_DEBUG struct ext4_super_block *es; ext4_fsblk_t bitmap_count; @@ -677,7 +678,6 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) bitmap_count = 0; gdp = NULL; - smp_rmb(); for (i = 0; i < ngroups; i++) { gdp = ext4_get_group_desc(sb, i, NULL); if (!gdp) @@ -700,7 +700,6 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) return bitmap_count; #else desc_count = 0; - smp_rmb(); for (i = 0; i < ngroups; i++) { gdp = ext4_get_group_desc(sb, i, NULL); if (!gdp) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index d0f15ef..02ec44b 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1228,6 +1228,18 @@ struct ext4_group_info *ext4_get_group_info(struct super_block *sb, return grp_info[indexv][indexh]; } +/* + * Reading s_groups_count requires using smp_rmb() afterwards. See + * the locking protocol documented in the comments of ext4_group_add() + * in resize.c + */ +static inline ext4_group_t ext4_get_groups_count(struct super_block *sb) +{ + ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; + + smp_rmb(); + return ngroups; +} static inline ext4_group_t ext4_flex_group(struct ext4_sb_info *sbi, ext4_group_t block_group) diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index f18e0a0..55ba419 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -316,7 +316,7 @@ error_return: static int find_group_dir(struct super_block *sb, struct inode *parent, ext4_group_t *best_group) { - ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; + ext4_group_t ngroups = ext4_get_groups_count(sb); unsigned int freei, avefreei; struct ext4_group_desc *desc, *best_desc = NULL; ext4_group_t group; @@ -353,7 +353,7 @@ static int find_group_flex(struct super_block *sb, struct inode *parent, struct flex_groups *flex_group = sbi->s_flex_groups; ext4_group_t parent_group = EXT4_I(parent)->i_block_group; ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group); - ext4_group_t ngroups = sbi->s_groups_count; + ext4_group_t ngroups = ext4_get_groups_count(sb); int flex_size = ext4_flex_bg_size(sbi); ext4_group_t best_flex = parent_fbg_group; int blocks_per_flex = sbi->s_blocks_per_group * flex_size; @@ -362,7 +362,7 @@ static int find_group_flex(struct super_block *sb, struct inode *parent, ext4_group_t n_fbg_groups; ext4_group_t i; - n_fbg_groups = (sbi->s_groups_count + flex_size - 1) >> + n_fbg_groups = (ngroups + flex_size - 1) >> sbi->s_log_groups_per_flex; find_close_to_parent: @@ -478,20 +478,21 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, { ext4_group_t parent_group = EXT4_I(parent)->i_block_group; struct ext4_sb_info *sbi = EXT4_SB(sb); - ext4_group_t ngroups = sbi->s_groups_count; + ext4_group_t real_ngroups = ext4_get_groups_count(sb); int inodes_per_group = EXT4_INODES_PER_GROUP(sb); unsigned int freei, avefreei; ext4_fsblk_t freeb, avefreeb; unsigned int ndirs; int max_dirs, min_inodes; ext4_grpblk_t min_blocks; - ext4_group_t i, grp, g; + ext4_group_t i, grp, g, ngroups; struct ext4_group_desc *desc; struct orlov_stats stats; int flex_size = ext4_flex_bg_size(sbi); + ngroups = real_ngroups; if (flex_size > 1) { - ngroups = (ngroups + flex_size - 1) >> + ngroups = (real_ngroups + flex_size - 1) >> sbi->s_log_groups_per_flex; parent_group >>= sbi->s_log_groups_per_flex; } @@ -543,7 +544,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, */ grp *= flex_size; for (i = 0; i < flex_size; i++) { - if (grp+i >= sbi->s_groups_count) + if (grp+i >= real_ngroups) break; desc = ext4_get_group_desc(sb, grp+i, NULL); if (desc && ext4_free_inodes_count(sb, desc)) { @@ -583,7 +584,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, } fallback: - ngroups = sbi->s_groups_count; + ngroups = real_ngroups; avefreei = freei / ngroups; fallback_retry: parent_group = EXT4_I(parent)->i_block_group; @@ -613,9 +614,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent, ext4_group_t *group, int mode) { ext4_group_t parent_group = EXT4_I(parent)->i_block_group; - ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; + ext4_group_t i, last, ngroups = ext4_get_groups_count(sb); struct ext4_group_desc *desc; - ext4_group_t i, last; int flex_size = ext4_flex_bg_size(EXT4_SB(sb)); /* @@ -799,11 +799,10 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) struct super_block *sb; struct buffer_head *inode_bitmap_bh = NULL; struct buffer_head *group_desc_bh; - ext4_group_t group = 0; + ext4_group_t ngroups, group = 0; unsigned long ino = 0; struct inode *inode; struct ext4_group_desc *gdp = NULL; - struct ext4_super_block *es; struct ext4_inode_info *ei; struct ext4_sb_info *sbi; int ret2, err = 0; @@ -818,15 +817,14 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) return ERR_PTR(-EPERM); sb = dir->i_sb; + ngroups = ext4_get_groups_count(sb); trace_mark(ext4_request_inode, "dev %s dir %lu mode %d", sb->s_id, dir->i_ino, mode); inode = new_inode(sb); if (!inode) return ERR_PTR(-ENOMEM); ei = EXT4_I(inode); - sbi = EXT4_SB(sb); - es = sbi->s_es; if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { ret2 = find_group_flex(sb, dir, &group); @@ -856,7 +854,7 @@ got_group: if (ret2 == -1) goto out; - for (i = 0; i < sbi->s_groups_count; i++) { + for (i = 0; i < ngroups; i++) { err = -EIO; gdp = ext4_get_group_desc(sb, group, &group_desc_bh); @@ -917,7 +915,7 @@ repeat_in_this_group: * group descriptor metadata has not yet been updated. * So we just go onto the next blockgroup. */ - if (++group == sbi->s_groups_count) + if (++group == ngroups) group = 0; } err = -ENOSPC; @@ -1158,7 +1156,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) { unsigned long desc_count; struct ext4_group_desc *gdp; - ext4_group_t i; + ext4_group_t i, ngroups = ext4_get_groups_count(sb); #ifdef EXT4FS_DEBUG struct ext4_super_block *es; unsigned long bitmap_count, x; @@ -1168,7 +1166,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) desc_count = 0; bitmap_count = 0; gdp = NULL; - for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { + for (i = 0; i < ngroups; i++) { gdp = ext4_get_group_desc(sb, i, NULL); if (!gdp) continue; @@ -1190,7 +1188,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) return desc_count; #else desc_count = 0; - for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { + for (i = 0; i < ngroups; i++) { gdp = ext4_get_group_desc(sb, i, NULL); if (!gdp) continue; @@ -1205,9 +1203,9 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) unsigned long ext4_count_dirs(struct super_block * sb) { unsigned long count = 0; - ext4_group_t i; + ext4_group_t i, ngroups = ext4_get_groups_count(sb); - for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { + for (i = 0; i < ngroups; i++) { struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); if (!gdp) continue; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 96f3366..4e7f363 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4965,7 +4965,8 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) */ int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) { - int groups, gdpblocks; + ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); + int gdpblocks; int idxblocks; int ret = 0; @@ -4992,8 +4993,8 @@ int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) groups += nrblocks; gdpblocks = groups; - if (groups > EXT4_SB(inode->i_sb)->s_groups_count) - groups = EXT4_SB(inode->i_sb)->s_groups_count; + if (groups > ngroups) + groups = ngroups; if (groups > EXT4_SB(inode->i_sb)->s_gdb_count) gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index f871677..c3af9e6 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -739,6 +739,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb, static int ext4_mb_init_cache(struct page *page, char *incore) { + ext4_group_t ngroups; int blocksize; int blocks_per_page; int groups_per_page; @@ -757,6 +758,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) inode = page->mapping->host; sb = inode->i_sb; + ngroups = ext4_get_groups_count(sb); blocksize = 1 << inode->i_blkbits; blocks_per_page = PAGE_CACHE_SIZE / blocksize; @@ -780,7 +782,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) for (i = 0; i < groups_per_page; i++) { struct ext4_group_desc *desc; - if (first_group + i >= EXT4_SB(sb)->s_groups_count) + if (first_group + i >= ngroups) break; err = -EIO; @@ -852,7 +854,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) struct ext4_group_info *grinfo; group = (first_block + i) >> 1; - if (group >= EXT4_SB(sb)->s_groups_count) + if (group >= ngroups) break; /* @@ -1788,6 +1790,7 @@ int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group) int block, pnum; int blocks_per_page; int groups_per_page; + ext4_group_t ngroups = ext4_get_groups_count(sb); ext4_group_t first_group; struct ext4_group_info *grp; @@ -1807,7 +1810,7 @@ int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group) /* read all groups the page covers into the cache */ for (i = 0; i < groups_per_page; i++) { - if ((first_group + i) >= EXT4_SB(sb)->s_groups_count) + if ((first_group + i) >= ngroups) break; grp = ext4_get_group_info(sb, first_group + i); /* take all groups write allocation @@ -1945,8 +1948,7 @@ err: static noinline_for_stack int ext4_mb_regular_allocator(struct ext4_allocation_context *ac) { - ext4_group_t group; - ext4_group_t i; + ext4_group_t ngroups, group, i; int cr; int err = 0; int bsbits; @@ -1957,6 +1959,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) sb = ac->ac_sb; sbi = EXT4_SB(sb); + ngroups = ext4_get_groups_count(sb); BUG_ON(ac->ac_status == AC_STATUS_FOUND); /* first, try the goal */ @@ -2017,11 +2020,11 @@ repeat: */ group = ac->ac_g_ex.fe_group; - for (i = 0; i < EXT4_SB(sb)->s_groups_count; group++, i++) { + for (i = 0; i < ngroups; group++, i++) { struct ext4_group_info *grp; struct ext4_group_desc *desc; - if (group == EXT4_SB(sb)->s_groups_count) + if (group == ngroups) group = 0; /* quick check to skip empty groups */ @@ -2315,12 +2318,10 @@ static struct file_operations ext4_mb_seq_history_fops = { static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) { struct super_block *sb = seq->private; - struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_group_t group; - if (*pos < 0 || *pos >= sbi->s_groups_count) + if (*pos < 0 || *pos >= ext4_get_groups_count(sb)) return NULL; - group = *pos + 1; return (void *) ((unsigned long) group); } @@ -2328,11 +2329,10 @@ static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) { struct super_block *sb = seq->private; - struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_group_t group; ++*pos; - if (*pos < 0 || *pos >= sbi->s_groups_count) + if (*pos < 0 || *pos >= ext4_get_groups_count(sb)) return NULL; group = *pos + 1; return (void *) ((unsigned long) group); @@ -2587,6 +2587,7 @@ void ext4_mb_update_group_info(struct ext4_group_info *grp, ext4_grpblk_t add) static int ext4_mb_init_backend(struct super_block *sb) { + ext4_group_t ngroups = ext4_get_groups_count(sb); ext4_group_t i; int metalen; struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -2598,7 +2599,7 @@ static int ext4_mb_init_backend(struct super_block *sb) struct ext4_group_desc *desc; /* This is the number of blocks used by GDT */ - num_meta_group_infos = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - + num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); /* @@ -2644,7 +2645,7 @@ static int ext4_mb_init_backend(struct super_block *sb) for (i = 0; i < num_meta_group_infos; i++) { if ((i + 1) == num_meta_group_infos) metalen = sizeof(*meta_group_info) * - (sbi->s_groups_count - + (ngroups - (i << EXT4_DESC_PER_BLOCK_BITS(sb))); meta_group_info = kmalloc(metalen, GFP_KERNEL); if (meta_group_info == NULL) { @@ -2655,7 +2656,7 @@ static int ext4_mb_init_backend(struct super_block *sb) sbi->s_group_info[i] = meta_group_info; } - for (i = 0; i < sbi->s_groups_count; i++) { + for (i = 0; i < ngroups; i++) { desc = ext4_get_group_desc(sb, i, NULL); if (desc == NULL) { printk(KERN_ERR @@ -2781,13 +2782,14 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp) int ext4_mb_release(struct super_block *sb) { + ext4_group_t ngroups = ext4_get_groups_count(sb); ext4_group_t i; int num_meta_group_infos; struct ext4_group_info *grinfo; struct ext4_sb_info *sbi = EXT4_SB(sb); if (sbi->s_group_info) { - for (i = 0; i < sbi->s_groups_count; i++) { + for (i = 0; i < ngroups; i++) { grinfo = ext4_get_group_info(sb, i); #ifdef DOUBLE_CHECK kfree(grinfo->bb_bitmap); @@ -2797,7 +2799,7 @@ int ext4_mb_release(struct super_block *sb) ext4_unlock_group(sb, i); kfree(grinfo); } - num_meta_group_infos = (sbi->s_groups_count + + num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); for (i = 0; i < num_meta_group_infos; i++) @@ -4121,7 +4123,7 @@ static void ext4_mb_return_to_preallocation(struct inode *inode, static void ext4_mb_show_ac(struct ext4_allocation_context *ac) { struct super_block *sb = ac->ac_sb; - ext4_group_t i; + ext4_group_t ngroups, i; printk(KERN_ERR "EXT4-fs: Can't allocate:" " Allocation context details:\n"); @@ -4145,7 +4147,8 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac) printk(KERN_ERR "EXT4-fs: %lu scanned, %d found\n", ac->ac_ex_scanned, ac->ac_found); printk(KERN_ERR "EXT4-fs: groups: \n"); - for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { + ngroups = ext4_get_groups_count(sb); + for (i = 0; i < ngroups; i++) { struct ext4_group_info *grp = ext4_get_group_info(sb, i); struct ext4_prealloc_space *pa; ext4_grpblk_t start; @@ -4469,13 +4472,13 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac) static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) { - ext4_group_t i; + ext4_group_t i, ngroups = ext4_get_groups_count(sb); int ret; int freed = 0; trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d", sb->s_id, needed); - for (i = 0; i < EXT4_SB(sb)->s_groups_count && needed > 0; i++) { + for (i = 0; i < ngroups && needed > 0; i++) { ret = ext4_mb_discard_group_preallocations(sb, i, needed); freed += ret; needed -= ret; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 68c3a44..fcd7b24 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3557,9 +3557,8 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) if (test_opt(sb, MINIX_DF)) { sbi->s_overhead_last = 0; } else if (sbi->s_blocks_last != ext4_blocks_count(es)) { - ext4_group_t ngroups = sbi->s_groups_count, i; + ext4_group_t i, ngroups = ext4_get_groups_count(sb); ext4_fsblk_t overhead = 0; - smp_rmb(); /* * Compute the overhead (FS structures). This is constant -- cgit v0.10.2 From 114e9fc90703bd6aac0229fb559e97caa6c49770 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 25 Apr 2009 15:48:07 -0400 Subject: ext4: Remove outdated comment about lock_super() ext4_fill_super() is no longer called by read_super(), and it is no longer called with the superblock locked. The unlock_super()/lock_super() is no longer present, so this comment is entirely superfluous. Signed-off-by: "Theodore Ts'o" diff --git a/fs/ext4/super.c b/fs/ext4/super.c index fcd7b24..e3b35f2 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2828,14 +2828,6 @@ no_journal: goto failed_mount4; }; - /* - * akpm: core read_super() calls in here with the superblock locked. - * That deadlocks, because orphan cleanup needs to lock the superblock - * in numerous places. Here we just pop the lock - it's relatively - * harmless, because we are now ready to accept write_super() requests, - * and aviro says that's the only reason for hanging onto the - * superblock lock. - */ EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; ext4_orphan_cleanup(sb, es); EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; -- cgit v0.10.2 From a63c9eb2ce6f5028da90f282798232c4f398ceb8 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 1 May 2009 01:59:42 -0400 Subject: ext4: ext4_mark_recovery_complete() doesn't need to use lock_super The function ext4_mark_recovery_complete() is called from two call paths: either (a) while mounting the filesystem, in which case there's no danger of any other CPU calling write_super() until the mount is completed, and (b) while remounting the filesystem read-write, in which case the fs core has already locked the superblock. This also allows us to take out a very vile unlock_super()/lock_super() pair in ext4_remount(). Signed-off-by: "Theodore Ts'o" diff --git a/fs/ext4/super.c b/fs/ext4/super.c index e3b35f2..45d0ada 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3219,13 +3219,11 @@ static void ext4_mark_recovery_complete(struct super_block *sb, if (jbd2_journal_flush(journal) < 0) goto out; - lock_super(sb); if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && sb->s_flags & MS_RDONLY) { EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); ext4_commit_super(sb, 1); } - unlock_super(sb); out: jbd2_journal_unlock_updates(journal); @@ -3436,15 +3434,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) (sbi->s_mount_state & EXT4_VALID_FS)) es->s_state = cpu_to_le16(sbi->s_mount_state); - /* - * We have to unlock super so that we can wait for - * transactions. - */ - if (sbi->s_journal) { - unlock_super(sb); + if (sbi->s_journal) ext4_mark_recovery_complete(sb, es); - lock_super(sb); - } } else { int ret; if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb, -- cgit v0.10.2 From 3b9d4ed26680771295d904a6b83e88e620780893 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 25 Apr 2009 22:54:04 -0400 Subject: ext4: Replace lock/unlock_super() with an explicit lock for the orphan list Use a separate lock to protect the orphan list, so we can stop overloading the use of lock_super(). Signed-off-by: "Theodore Ts'o" diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h index 57b71fe..4bda2f7 100644 --- a/fs/ext4/ext4_sb.h +++ b/fs/ext4/ext4_sb.h @@ -71,6 +71,7 @@ struct ext4_sb_info { struct inode *s_journal_inode; struct journal_s *s_journal; struct list_head s_orphan; + struct mutex s_orphan_lock; unsigned long s_commit_interval; u32 s_max_batch_time; u32 s_min_batch_time; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 22098e1..8018e49 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1997,7 +1997,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) if (!ext4_handle_valid(handle)) return 0; - lock_super(sb); + mutex_lock(&EXT4_SB(sb)->s_orphan_lock); if (!list_empty(&EXT4_I(inode)->i_orphan)) goto out_unlock; @@ -2006,9 +2006,13 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) /* @@@ FIXME: Observation from aviro: * I think I can trigger J_ASSERT in ext4_orphan_add(). We block - * here (on lock_super()), so race with ext4_link() which might bump + * here (on s_orphan_lock), so race with ext4_link() which might bump * ->i_nlink. For, say it, character device. Not a regular file, * not a directory, not a symlink and ->i_nlink > 0. + * + * tytso, 4/25/2009: I'm not sure how that could happen; + * shouldn't the fs core protect us from these sort of + * unlink()/link() races? */ J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); @@ -2045,7 +2049,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) jbd_debug(4, "orphan inode %lu will point to %d\n", inode->i_ino, NEXT_ORPHAN(inode)); out_unlock: - unlock_super(sb); + mutex_unlock(&EXT4_SB(sb)->s_orphan_lock); ext4_std_error(inode->i_sb, err); return err; } @@ -2066,11 +2070,9 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) if (!ext4_handle_valid(handle)) return 0; - lock_super(inode->i_sb); - if (list_empty(&ei->i_orphan)) { - unlock_super(inode->i_sb); - return 0; - } + mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock); + if (list_empty(&ei->i_orphan)) + goto out; ino_next = NEXT_ORPHAN(inode); prev = ei->i_orphan.prev; @@ -2120,7 +2122,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) out_err: ext4_std_error(inode->i_sb, err); out: - unlock_super(inode->i_sb); + mutex_unlock(&EXT4_SB(inode->i_sb)->s_orphan_lock); return err; out_brelse: diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 45d0ada..7f43fde 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2645,6 +2645,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sb->dq_op = &ext4_quota_operations; #endif INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ + mutex_init(&sbi->s_orphan_lock); sb->s_root = NULL; -- cgit v0.10.2 From 32ed5058ce90024efcd811254b4b1de0468099df Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 25 Apr 2009 22:53:39 -0400 Subject: ext4: Replace lock/unlock_super() with an explicit lock for resizing Use a separate lock to protect s_groups_count and the other block group descriptors which get changed via an on-line resize operation, so we can stop overloading the use of lock_super(). Signed-off-by: "Theodore Ts'o" diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h index 4bda2f7..2d36223 100644 --- a/fs/ext4/ext4_sb.h +++ b/fs/ext4/ext4_sb.h @@ -72,6 +72,7 @@ struct ext4_sb_info { struct journal_s *s_journal; struct list_head s_orphan; struct mutex s_orphan_lock; + struct mutex s_resize_lock; unsigned long s_commit_interval; u32 s_max_batch_time; u32 s_min_batch_time; diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 546c7dd..e8ded13 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -193,7 +193,7 @@ static int setup_new_group_blocks(struct super_block *sb, if (IS_ERR(handle)) return PTR_ERR(handle); - lock_super(sb); + mutex_lock(&sbi->s_resize_lock); if (input->group != sbi->s_groups_count) { err = -EBUSY; goto exit_journal; @@ -302,7 +302,7 @@ exit_bh: brelse(bh); exit_journal: - unlock_super(sb); + mutex_unlock(&sbi->s_resize_lock); if ((err2 = ext4_journal_stop(handle)) && !err) err = err2; @@ -643,11 +643,12 @@ exit_free: * important part is that the new block and inode counts are in the backup * superblocks, and the location of the new group metadata in the GDT backups. * - * We do not need lock_super() for this, because these blocks are not - * otherwise touched by the filesystem code when it is mounted. We don't - * need to worry about last changing from sbi->s_groups_count, because the - * worst that can happen is that we do not copy the full number of backups - * at this time. The resize which changed s_groups_count will backup again. + * We do not need take the s_resize_lock for this, because these + * blocks are not otherwise touched by the filesystem code when it is + * mounted. We don't need to worry about last changing from + * sbi->s_groups_count, because the worst that can happen is that we + * do not copy the full number of backups at this time. The resize + * which changed s_groups_count will backup again. */ static void update_backups(struct super_block *sb, int blk_off, char *data, int size) @@ -809,7 +810,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) goto exit_put; } - lock_super(sb); + mutex_lock(&sbi->s_resize_lock); if (input->group != sbi->s_groups_count) { ext4_warning(sb, __func__, "multiple resizers run on filesystem!"); @@ -840,7 +841,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) /* * OK, now we've set up the new group. Time to make it active. * - * Current kernels don't lock all allocations via lock_super(), + * We do not lock all allocations via s_resize_lock * so we have to be safe wrt. concurrent accesses the group * data. So we need to be careful to set all of the relevant * group descriptor data etc. *before* we enable the group. @@ -900,12 +901,12 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) * * The precise rules we use are: * - * * Writers of s_groups_count *must* hold lock_super + * * Writers of s_groups_count *must* hold s_resize_lock * AND * * Writers must perform a smp_wmb() after updating all dependent * data and before modifying the groups count * - * * Readers must hold lock_super() over the access + * * Readers must hold s_resize_lock over the access * OR * * Readers must perform an smp_rmb() after reading the groups count * and before reading any dependent data. @@ -948,7 +949,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) sb->s_dirt = 1; exit_journal: - unlock_super(sb); + mutex_unlock(&sbi->s_resize_lock); if ((err2 = ext4_journal_stop(handle)) && !err) err = err2; if (!err) { @@ -986,7 +987,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, /* We don't need to worry about locking wrt other resizers just * yet: we're going to revalidate es->s_blocks_count after - * taking lock_super() below. */ + * taking the s_resize_lock below. */ o_blocks_count = ext4_blocks_count(es); o_groups_count = EXT4_SB(sb)->s_groups_count; @@ -1056,11 +1057,11 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, goto exit_put; } - lock_super(sb); + mutex_lock(&EXT4_SB(sb)->s_resize_lock); if (o_blocks_count != ext4_blocks_count(es)) { ext4_warning(sb, __func__, "multiple resizers run on filesystem!"); - unlock_super(sb); + mutex_unlock(&EXT4_SB(sb)->s_resize_lock); ext4_journal_stop(handle); err = -EBUSY; goto exit_put; @@ -1070,14 +1071,14 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, EXT4_SB(sb)->s_sbh))) { ext4_warning(sb, __func__, "error %d on journal write access", err); - unlock_super(sb); + mutex_unlock(&EXT4_SB(sb)->s_resize_lock); ext4_journal_stop(handle); goto exit_put; } ext4_blocks_count_set(es, o_blocks_count + add); ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); sb->s_dirt = 1; - unlock_super(sb); + mutex_unlock(&EXT4_SB(sb)->s_resize_lock); ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, o_blocks_count + add); /* We add the blocks to the bitmap and set the group need init bit */ diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 7f43fde..1fbf090 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2646,6 +2646,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) #endif INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ mutex_init(&sbi->s_orphan_lock); + mutex_init(&sbi->s_resize_lock); sb->s_root = NULL; -- cgit v0.10.2 From 701970b3a83cc639c1ec8fc6f40a7871cb99426f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 24 Apr 2009 23:11:22 -0400 Subject: tracing/events: make modules have their own file_operations structure For proper module reference counting, the file_operations that modules use must have the "owner" field set to the module. Unfortunately, the trace events use share file_operations. The same file_operations are used by all both kernel core and all modules. This patch makes the modules allocate their own file_operations and copies the functions from the core kernel. This allows those file operations to be owned by the module. Care is taken to free this code on module unload. Thanks to Greg KH for reminding me that file_operations must be owned by the module to have reference counting take place. [ Impact: fix modular tracepoints / potential crash ] Signed-off-by: Steven Rostedt Acked-by: Greg Kroah-Hartman diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index b920815..be4d3a4 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -770,7 +770,11 @@ event_subsystem_dir(const char *name, struct dentry *d_events) } static int -event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) +event_create_dir(struct ftrace_event_call *call, struct dentry *d_events, + const struct file_operations *id, + const struct file_operations *enable, + const struct file_operations *filter, + const struct file_operations *format) { struct dentry *entry; int ret; @@ -800,11 +804,11 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) if (call->regfunc) entry = trace_create_file("enable", 0644, call->dir, call, - &ftrace_enable_fops); + enable); if (call->id) entry = trace_create_file("id", 0444, call->dir, call, - &ftrace_event_id_fops); + id); if (call->define_fields) { ret = call->define_fields(); @@ -814,7 +818,7 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) return ret; } entry = trace_create_file("filter", 0644, call->dir, call, - &ftrace_event_filter_fops); + filter); } /* A trace may not want to export its format */ @@ -822,7 +826,7 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) return 0; entry = trace_create_file("format", 0444, call->dir, call, - &ftrace_event_format_fops); + format); return 0; } @@ -833,8 +837,60 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) event++) #ifdef CONFIG_MODULES + +static LIST_HEAD(ftrace_module_file_list); + +/* + * Modules must own their file_operations to keep up with + * reference counting. + */ +struct ftrace_module_file_ops { + struct list_head list; + struct module *mod; + struct file_operations id; + struct file_operations enable; + struct file_operations format; + struct file_operations filter; +}; + +static struct ftrace_module_file_ops * +trace_create_file_ops(struct module *mod) +{ + struct ftrace_module_file_ops *file_ops; + + /* + * This is a bit of a PITA. To allow for correct reference + * counting, modules must "own" their file_operations. + * To do this, we allocate the file operations that will be + * used in the event directory. + */ + + file_ops = kmalloc(sizeof(*file_ops), GFP_KERNEL); + if (!file_ops) + return NULL; + + file_ops->mod = mod; + + file_ops->id = ftrace_event_id_fops; + file_ops->id.owner = mod; + + file_ops->enable = ftrace_enable_fops; + file_ops->enable.owner = mod; + + file_ops->filter = ftrace_event_filter_fops; + file_ops->filter.owner = mod; + + file_ops->format = ftrace_event_format_fops; + file_ops->format.owner = mod; + + list_add(&file_ops->list, &ftrace_module_file_list); + + return file_ops; +} + static void trace_module_add_events(struct module *mod) { + struct ftrace_module_file_ops *file_ops = NULL; struct ftrace_event_call *call, *start, *end; struct dentry *d_events; @@ -852,14 +908,27 @@ static void trace_module_add_events(struct module *mod) /* The linker may leave blanks */ if (!call->name) continue; + + /* + * This module has events, create file ops for this module + * if not already done. + */ + if (!file_ops) { + file_ops = trace_create_file_ops(mod); + if (!file_ops) + return; + } call->mod = mod; list_add(&call->list, &ftrace_events); - event_create_dir(call, d_events); + event_create_dir(call, d_events, + &file_ops->id, &file_ops->enable, + &file_ops->filter, &file_ops->format); } } static void trace_module_remove_events(struct module *mod) { + struct ftrace_module_file_ops *file_ops; struct ftrace_event_call *call, *p; list_for_each_entry_safe(call, p, &ftrace_events, list) { @@ -874,6 +943,16 @@ static void trace_module_remove_events(struct module *mod) list_del(&call->list); } } + + /* Now free the file_operations */ + list_for_each_entry(file_ops, &ftrace_module_file_list, list) { + if (file_ops->mod == mod) + break; + } + if (&file_ops->list != &ftrace_module_file_list) { + list_del(&file_ops->list); + kfree(file_ops); + } } static int trace_module_notify(struct notifier_block *self, @@ -954,7 +1033,9 @@ static __init int event_trace_init(void) if (!call->name) continue; list_add(&call->list, &ftrace_events); - event_create_dir(call, d_events); + event_create_dir(call, d_events, &ftrace_event_id_fops, + &ftrace_enable_fops, &ftrace_event_filter_fops, + &ftrace_event_format_fops); } ret = register_module_notifier(&trace_module_nb); -- cgit v0.10.2 From 924a158a12c7e732179dd85ddd20848039e7bd71 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 26 Apr 2009 13:14:52 +0100 Subject: [ARM] Convert pmd_page() to be highmem safe In the long run, we may want to place page tables in highmem. However, pmd_page() has traditionally been coded to convert the physical address to a virtual one, which won't work with highmem pages. Instead, translate the physical address to a PFN, and then convert the PFN to a struct page instead. Signed-off-by: Russell King diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 110295c..1cd2d64 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -342,7 +342,7 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd) return __va(ptr); } -#define pmd_page(pmd) virt_to_page(__va(pmd_val(pmd))) +#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd))) /* * Conversion functions: convert a page and protection to a page entry, -- cgit v0.10.2 From 38f7b009a6ae1708fcf0f208aba9a9a4364bcfcf Mon Sep 17 00:00:00 2001 From: Hartley Sweeten Date: Wed, 15 Apr 2009 23:18:26 +0100 Subject: [ARM] 5452/1: ep93x: rtc: use ioremap'ed addresses Update the rtc-ep93xx driver to use ioremap'ed addresses. This removes the dependency on and properly reports the memory addresses used by the driver in /proc/iomem. In addition, ep93xx_rtc_init() is updated to use platform_driver_probe() instead of platform_driver_register(). Also, the device_create_file() calls are now properly checked for error conditions. The created sysfs files are also now removed when the driver is removed. The version number for the driver has been bumped at the request of Alessandro Zummo. Signed-off-by: H Hartley Sweeten Acked-by: Alessandro Zummo Signed-off-by: Russell King diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c index ae24486..c535e88 100644 --- a/arch/arm/mach-ep93xx/core.c +++ b/arch/arm/mach-ep93xx/core.c @@ -450,10 +450,19 @@ static struct amba_device uart3_device = { }; +static struct resource ep93xx_rtc_resource[] = { + { + .start = EP93XX_RTC_PHYS_BASE, + .end = EP93XX_RTC_PHYS_BASE + 0x10c - 1, + .flags = IORESOURCE_MEM, + }, +}; + static struct platform_device ep93xx_rtc_device = { - .name = "ep93xx-rtc", - .id = -1, - .num_resources = 0, + .name = "ep93xx-rtc", + .id = -1, + .num_resources = ARRAY_SIZE(ep93xx_rtc_resource), + .resource = ep93xx_rtc_resource, }; diff --git a/arch/arm/mach-ep93xx/include/mach/ep93xx-regs.h b/arch/arm/mach-ep93xx/include/mach/ep93xx-regs.h index f66be12..78ac1bd 100644 --- a/arch/arm/mach-ep93xx/include/mach/ep93xx-regs.h +++ b/arch/arm/mach-ep93xx/include/mach/ep93xx-regs.h @@ -147,6 +147,7 @@ #define EP93XX_PWM_BASE (EP93XX_APB_VIRT_BASE + 0x00110000) #define EP93XX_RTC_BASE (EP93XX_APB_VIRT_BASE + 0x00120000) +#define EP93XX_RTC_PHYS_BASE (EP93XX_APB_PHYS_BASE + 0x00120000) #define EP93XX_SYSCON_BASE (EP93XX_APB_VIRT_BASE + 0x00130000) #define EP93XX_SYSCON_REG(x) (EP93XX_SYSCON_BASE + (x)) diff --git a/drivers/rtc/rtc-ep93xx.c b/drivers/rtc/rtc-ep93xx.c index f7a3283..551332e 100644 --- a/drivers/rtc/rtc-ep93xx.c +++ b/drivers/rtc/rtc-ep93xx.c @@ -12,32 +12,56 @@ #include #include #include -#include +#include + +#define EP93XX_RTC_DATA 0x000 +#define EP93XX_RTC_MATCH 0x004 +#define EP93XX_RTC_STATUS 0x008 +#define EP93XX_RTC_STATUS_INTR (1<<0) +#define EP93XX_RTC_LOAD 0x00C +#define EP93XX_RTC_CONTROL 0x010 +#define EP93XX_RTC_CONTROL_MIE (1<<0) +#define EP93XX_RTC_SWCOMP 0x108 +#define EP93XX_RTC_SWCOMP_DEL_MASK 0x001f0000 +#define EP93XX_RTC_SWCOMP_DEL_SHIFT 16 +#define EP93XX_RTC_SWCOMP_INT_MASK 0x0000ffff +#define EP93XX_RTC_SWCOMP_INT_SHIFT 0 + +#define DRV_VERSION "0.3" -#define EP93XX_RTC_REG(x) (EP93XX_RTC_BASE + (x)) -#define EP93XX_RTC_DATA EP93XX_RTC_REG(0x0000) -#define EP93XX_RTC_LOAD EP93XX_RTC_REG(0x000C) -#define EP93XX_RTC_SWCOMP EP93XX_RTC_REG(0x0108) - -#define DRV_VERSION "0.2" +/* + * struct device dev.platform_data is used to store our private data + * because struct rtc_device does not have a variable to hold it. + */ +struct ep93xx_rtc { + void __iomem *mmio_base; +}; -static int ep93xx_get_swcomp(struct device *dev, unsigned short *preload, +static int ep93xx_rtc_get_swcomp(struct device *dev, unsigned short *preload, unsigned short *delete) { - unsigned short comp = __raw_readl(EP93XX_RTC_SWCOMP); + struct ep93xx_rtc *ep93xx_rtc = dev->platform_data; + unsigned long comp; + + comp = __raw_readl(ep93xx_rtc->mmio_base + EP93XX_RTC_SWCOMP); if (preload) - *preload = comp & 0xffff; + *preload = (comp & EP93XX_RTC_SWCOMP_INT_MASK) + >> EP93XX_RTC_SWCOMP_INT_SHIFT; if (delete) - *delete = (comp >> 16) & 0x1f; + *delete = (comp & EP93XX_RTC_SWCOMP_DEL_MASK) + >> EP93XX_RTC_SWCOMP_DEL_SHIFT; return 0; } static int ep93xx_rtc_read_time(struct device *dev, struct rtc_time *tm) { - unsigned long time = __raw_readl(EP93XX_RTC_DATA); + struct ep93xx_rtc *ep93xx_rtc = dev->platform_data; + unsigned long time; + + time = __raw_readl(ep93xx_rtc->mmio_base + EP93XX_RTC_DATA); rtc_time_to_tm(time, tm); return 0; @@ -45,7 +69,9 @@ static int ep93xx_rtc_read_time(struct device *dev, struct rtc_time *tm) static int ep93xx_rtc_set_mmss(struct device *dev, unsigned long secs) { - __raw_writel(secs + 1, EP93XX_RTC_LOAD); + struct ep93xx_rtc *ep93xx_rtc = dev->platform_data; + + __raw_writel(secs + 1, ep93xx_rtc->mmio_base + EP93XX_RTC_LOAD); return 0; } @@ -53,7 +79,7 @@ static int ep93xx_rtc_proc(struct device *dev, struct seq_file *seq) { unsigned short preload, delete; - ep93xx_get_swcomp(dev, &preload, &delete); + ep93xx_rtc_get_swcomp(dev, &preload, &delete); seq_printf(seq, "preload\t\t: %d\n", preload); seq_printf(seq, "delete\t\t: %d\n", delete); @@ -67,54 +93,104 @@ static const struct rtc_class_ops ep93xx_rtc_ops = { .proc = ep93xx_rtc_proc, }; -static ssize_t ep93xx_sysfs_show_comp_preload(struct device *dev, +static ssize_t ep93xx_rtc_show_comp_preload(struct device *dev, struct device_attribute *attr, char *buf) { unsigned short preload; - ep93xx_get_swcomp(dev, &preload, NULL); + ep93xx_rtc_get_swcomp(dev, &preload, NULL); return sprintf(buf, "%d\n", preload); } -static DEVICE_ATTR(comp_preload, S_IRUGO, ep93xx_sysfs_show_comp_preload, NULL); +static DEVICE_ATTR(comp_preload, S_IRUGO, ep93xx_rtc_show_comp_preload, NULL); -static ssize_t ep93xx_sysfs_show_comp_delete(struct device *dev, +static ssize_t ep93xx_rtc_show_comp_delete(struct device *dev, struct device_attribute *attr, char *buf) { unsigned short delete; - ep93xx_get_swcomp(dev, NULL, &delete); + ep93xx_rtc_get_swcomp(dev, NULL, &delete); return sprintf(buf, "%d\n", delete); } -static DEVICE_ATTR(comp_delete, S_IRUGO, ep93xx_sysfs_show_comp_delete, NULL); +static DEVICE_ATTR(comp_delete, S_IRUGO, ep93xx_rtc_show_comp_delete, NULL); -static int __devinit ep93xx_rtc_probe(struct platform_device *dev) +static int __init ep93xx_rtc_probe(struct platform_device *pdev) { - struct rtc_device *rtc = rtc_device_register("ep93xx", - &dev->dev, &ep93xx_rtc_ops, THIS_MODULE); + struct ep93xx_rtc *ep93xx_rtc; + struct resource *res; + struct rtc_device *rtc; + int err; + + ep93xx_rtc = kzalloc(sizeof(struct ep93xx_rtc), GFP_KERNEL); + if (ep93xx_rtc == NULL) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (res == NULL) + return -ENXIO; + + res = request_mem_region(res->start, resource_size(res), pdev->name); + if (res == NULL) + return -EBUSY; + + ep93xx_rtc->mmio_base = ioremap(res->start, resource_size(res)); + if (ep93xx_rtc->mmio_base == NULL) { + err = -ENXIO; + goto fail; + } + pdev->dev.platform_data = ep93xx_rtc; + + rtc = rtc_device_register(pdev->name, + &pdev->dev, &ep93xx_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) { - return PTR_ERR(rtc); + err = PTR_ERR(rtc); + goto fail; } - platform_set_drvdata(dev, rtc); + platform_set_drvdata(pdev, rtc); - device_create_file(&dev->dev, &dev_attr_comp_preload); - device_create_file(&dev->dev, &dev_attr_comp_delete); + err = device_create_file(&pdev->dev, &dev_attr_comp_preload); + if (err) + goto fail; + err = device_create_file(&pdev->dev, &dev_attr_comp_delete); + if (err) { + device_remove_file(&pdev->dev, &dev_attr_comp_preload); + goto fail; + } return 0; + +fail: + if (ep93xx_rtc->mmio_base) { + iounmap(ep93xx_rtc->mmio_base); + pdev->dev.platform_data = NULL; + } + release_mem_region(res->start, resource_size(res)); + return err; } -static int __devexit ep93xx_rtc_remove(struct platform_device *dev) +static int __exit ep93xx_rtc_remove(struct platform_device *pdev) { - struct rtc_device *rtc = platform_get_drvdata(dev); + struct rtc_device *rtc = platform_get_drvdata(pdev); + struct ep93xx_rtc *ep93xx_rtc = pdev->dev.platform_data; + struct resource *res; + + /* cleanup sysfs */ + device_remove_file(&pdev->dev, &dev_attr_comp_delete); + device_remove_file(&pdev->dev, &dev_attr_comp_preload); + + rtc_device_unregister(rtc); + + iounmap(ep93xx_rtc->mmio_base); + pdev->dev.platform_data = NULL; - if (rtc) - rtc_device_unregister(rtc); + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + release_mem_region(res->start, resource_size(res)); - platform_set_drvdata(dev, NULL); + platform_set_drvdata(pdev, NULL); return 0; } @@ -122,23 +198,22 @@ static int __devexit ep93xx_rtc_remove(struct platform_device *dev) /* work with hotplug and coldplug */ MODULE_ALIAS("platform:ep93xx-rtc"); -static struct platform_driver ep93xx_rtc_platform_driver = { +static struct platform_driver ep93xx_rtc_driver = { .driver = { .name = "ep93xx-rtc", .owner = THIS_MODULE, }, - .probe = ep93xx_rtc_probe, - .remove = __devexit_p(ep93xx_rtc_remove), + .remove = __exit_p(ep93xx_rtc_remove), }; static int __init ep93xx_rtc_init(void) { - return platform_driver_register(&ep93xx_rtc_platform_driver); + return platform_driver_probe(&ep93xx_rtc_driver, ep93xx_rtc_probe); } static void __exit ep93xx_rtc_exit(void) { - platform_driver_unregister(&ep93xx_rtc_platform_driver); + platform_driver_unregister(&ep93xx_rtc_driver); } MODULE_AUTHOR("Alessandro Zummo "); -- cgit v0.10.2 From 0a3ec21fcd311b26ab0f249d62960e127bc20ca8 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Sun, 26 Apr 2009 23:07:42 +0200 Subject: x86: beautify vmlinux_64.lds.S Beautify vmlinux_64.lds.S: - Use tabs for indent - Located curly braces like in C code - Rearranged a few comments There is no functional changes in this patch The beautification is done to prepare a unification of the _32 and the _64 variants of the linker scripts. [ Impact: cleanup ] Signed-off-by: Sam Ravnborg Cc: Tim Abbott Cc: Linus Torvalds LKML-Reference: <20090426210742.GA3464@uranus.ravnborg.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index c874250..6d5a5b0 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -15,69 +15,79 @@ OUTPUT_ARCH(i386:x86-64) ENTRY(phys_startup_64) jiffies_64 = jiffies; PHDRS { - text PT_LOAD FLAGS(5); /* R_E */ - data PT_LOAD FLAGS(7); /* RWE */ - user PT_LOAD FLAGS(7); /* RWE */ + text PT_LOAD FLAGS(5); /* R_E */ + data PT_LOAD FLAGS(7); /* RWE */ + user PT_LOAD FLAGS(7); /* RWE */ data.init PT_LOAD FLAGS(7); /* RWE */ #ifdef CONFIG_SMP percpu PT_LOAD FLAGS(7); /* RWE */ #endif data.init2 PT_LOAD FLAGS(7); /* RWE */ - note PT_NOTE FLAGS(0); /* ___ */ + note PT_NOTE FLAGS(0); /* ___ */ } SECTIONS { - . = __START_KERNEL; - phys_startup_64 = startup_64 - LOAD_OFFSET; - .text : AT(ADDR(.text) - LOAD_OFFSET) { - _text = .; /* Text and read-only data */ - /* First the code that has to be first for bootstrapping */ - *(.text.head) - _stext = .; - /* Then the rest */ - TEXT_TEXT - SCHED_TEXT - LOCK_TEXT - KPROBES_TEXT - IRQENTRY_TEXT - *(.fixup) - *(.gnu.warning) - _etext = .; /* End of text section */ - } :text = 0x9090 - - NOTES :text :note - - . = ALIGN(16); /* Exception table */ - __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { - __start___ex_table = .; - *(__ex_table) - __stop___ex_table = .; - } :text = 0x9090 - - RODATA - - . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */ - /* Data */ - .data : AT(ADDR(.data) - LOAD_OFFSET) { - DATA_DATA - CONSTRUCTORS - _edata = .; /* End of data section */ - } :data + . = __START_KERNEL; + phys_startup_64 = startup_64 - LOAD_OFFSET; + + /* Text and read-only data */ + .text : AT(ADDR(.text) - LOAD_OFFSET) { + _text = .; + /* First the code that has to be first for bootstrapping */ + *(.text.head) + _stext = .; + /* Then the rest */ + TEXT_TEXT + SCHED_TEXT + LOCK_TEXT + KPROBES_TEXT + IRQENTRY_TEXT + *(.fixup) + *(.gnu.warning) + /* End of text section */ + _etext = .; + } :text = 0x9090 + + NOTES :text :note + + /* Exception table */ + . = ALIGN(16); + __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { + __start___ex_table = .; + *(__ex_table) + __stop___ex_table = .; + } :text = 0x9090 + RODATA - .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { + /* Align data segment to page size boundary */ . = ALIGN(PAGE_SIZE); - . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); - *(.data.cacheline_aligned) - } - . = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES); - .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { - *(.data.read_mostly) - } + /* Data */ + .data : AT(ADDR(.data) - LOAD_OFFSET) { + DATA_DATA + CONSTRUCTORS + /* End of data section */ + _edata = .; + } :data + + + .data.cacheline_aligned : + AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { + . = ALIGN(PAGE_SIZE); + . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); + *(.data.cacheline_aligned) + } + + . = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES); + .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { + *(.data.read_mostly) + } #define VSYSCALL_ADDR (-10*1024*1024) -#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.read_mostly) + SIZEOF(.data.read_mostly) + 4095) & ~(4095)) -#define VSYSCALL_VIRT_ADDR ((ADDR(.data.read_mostly) + SIZEOF(.data.read_mostly) + 4095) & ~(4095)) +#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.read_mostly) + \ + SIZEOF(.data.read_mostly) + 4095) & ~(4095)) +#define VSYSCALL_VIRT_ADDR ((ADDR(.data.read_mostly) + \ + SIZEOF(.data.read_mostly) + 4095) & ~(4095)) #define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR) #define VLOAD(x) (ADDR(x) - VLOAD_OFFSET) @@ -85,37 +95,53 @@ SECTIONS #define VVIRT_OFFSET (VSYSCALL_ADDR - VSYSCALL_VIRT_ADDR) #define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) - . = VSYSCALL_ADDR; - .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { *(.vsyscall_0) } :user - __vsyscall_0 = VSYSCALL_VIRT_ADDR; + . = VSYSCALL_ADDR; + .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { + *(.vsyscall_0) + } :user + + __vsyscall_0 = VSYSCALL_VIRT_ADDR; + + . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); + .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { + *(.vsyscall_fn) + } + + . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); + .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) { + *(.vsyscall_gtod_data) + } - . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); - .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { *(.vsyscall_fn) } - . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); - .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) - { *(.vsyscall_gtod_data) } - vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data); - .vsyscall_clock : AT(VLOAD(.vsyscall_clock)) - { *(.vsyscall_clock) } - vsyscall_clock = VVIRT(.vsyscall_clock); + vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data); + .vsyscall_clock : AT(VLOAD(.vsyscall_clock)) { + *(.vsyscall_clock) + } + vsyscall_clock = VVIRT(.vsyscall_clock); - .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) - { *(.vsyscall_1) } - .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) - { *(.vsyscall_2) } + .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) { + *(.vsyscall_1) + } + .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) { + *(.vsyscall_2) + } - .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { *(.vgetcpu_mode) } - vgetcpu_mode = VVIRT(.vgetcpu_mode); + .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { + *(.vgetcpu_mode) + } + vgetcpu_mode = VVIRT(.vgetcpu_mode); - . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); - .jiffies : AT(VLOAD(.jiffies)) { *(.jiffies) } - jiffies = VVIRT(.jiffies); + . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); + .jiffies : AT(VLOAD(.jiffies)) { + *(.jiffies) + } + jiffies = VVIRT(.jiffies); - .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) - { *(.vsyscall_3) } + .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) { + *(.vsyscall_3) + } - . = VSYSCALL_VIRT_ADDR + PAGE_SIZE; + . = VSYSCALL_VIRT_ADDR + PAGE_SIZE; #undef VSYSCALL_ADDR #undef VSYSCALL_PHYS_ADDR @@ -125,156 +151,168 @@ SECTIONS #undef VVIRT_OFFSET #undef VVIRT - .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { - . = ALIGN(THREAD_SIZE); /* init_task */ - *(.data.init_task) - }:data.init + /* init_task */ + .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { + . = ALIGN(THREAD_SIZE); + *(.data.init_task) + } :data.init - .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { - . = ALIGN(PAGE_SIZE); - *(.data.page_aligned) - } + .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { + . = ALIGN(PAGE_SIZE); + *(.data.page_aligned) + } - .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { - /* might get freed after init */ - . = ALIGN(PAGE_SIZE); - __smp_alt_begin = .; - __smp_locks = .; - *(.smp_locks) - __smp_locks_end = .; + .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { + /* might get freed after init */ + . = ALIGN(PAGE_SIZE); + __smp_alt_begin = .; + __smp_locks = .; + *(.smp_locks) + __smp_locks_end = .; + . = ALIGN(PAGE_SIZE); + __smp_alt_end = .; + } + + /* Init code and data */ . = ALIGN(PAGE_SIZE); - __smp_alt_end = .; - } - - . = ALIGN(PAGE_SIZE); /* Init code and data */ - __init_begin = .; /* paired with __init_end */ - .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { - _sinittext = .; - INIT_TEXT - _einittext = .; - } - .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { - __initdata_begin = .; - INIT_DATA - __initdata_end = .; - } - - .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { - . = ALIGN(16); - __setup_start = .; - *(.init.setup) - __setup_end = .; - } - .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { - __initcall_start = .; - INITCALLS - __initcall_end = .; - } - .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { - __con_initcall_start = .; - *(.con_initcall.init) - __con_initcall_end = .; - } - .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { - __x86_cpu_dev_start = .; - *(.x86_cpu_dev.init) - __x86_cpu_dev_end = .; - } - SECURITY_INIT - - . = ALIGN(8); - .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { - __parainstructions = .; - *(.parainstructions) - __parainstructions_end = .; - } - - .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { + __init_begin = .; /* paired with __init_end */ + .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { + _sinittext = .; + INIT_TEXT + _einittext = .; + } + + .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { + __initdata_begin = .; + INIT_DATA + __initdata_end = .; + } + + .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { + . = ALIGN(16); + __setup_start = .; + *(.init.setup) + __setup_end = .; + } + + .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { + __initcall_start = .; + INITCALLS + __initcall_end = .; + } + + .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { + __con_initcall_start = .; + *(.con_initcall.init) + __con_initcall_end = .; + } + + .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { + __x86_cpu_dev_start = .; + *(.x86_cpu_dev.init) + __x86_cpu_dev_end = .; + } + + SECURITY_INIT + . = ALIGN(8); - __alt_instructions = .; - *(.altinstructions) - __alt_instructions_end = .; - } - .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { - *(.altinstr_replacement) - } - /* .exit.text is discard at runtime, not link time, to deal with references - from .altinstructions and .eh_frame */ - .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { - EXIT_TEXT - } - .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { - EXIT_DATA - } + .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { + __parainstructions = .; + *(.parainstructions) + __parainstructions_end = .; + } + + .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { + . = ALIGN(8); + __alt_instructions = .; + *(.altinstructions) + __alt_instructions_end = .; + } + + .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { + *(.altinstr_replacement) + } + + /* + * .exit.text is discard at runtime, not link time, to deal with + * references from .altinstructions and .eh_frame + */ + .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { + EXIT_TEXT + } + + .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { + EXIT_DATA + } #ifdef CONFIG_BLK_DEV_INITRD - . = ALIGN(PAGE_SIZE); - .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { - __initramfs_start = .; - *(.init.ramfs) - __initramfs_end = .; - } + . = ALIGN(PAGE_SIZE); + .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { + __initramfs_start = .; + *(.init.ramfs) + __initramfs_end = .; + } #endif #ifdef CONFIG_SMP - /* - * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the - * output PHDR, so the next output section - __data_nosave - should - * start another section data.init2. Also, pda should be at the head of - * percpu area. Preallocate it and define the percpu offset symbol - * so that it can be accessed as a percpu variable. - */ - . = ALIGN(PAGE_SIZE); - PERCPU_VADDR(0, :percpu) + /* + * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the + * output PHDR, so the next output section - __data_nosave - should + * start another section data.init2. Also, pda should be at the head of + * percpu area. Preallocate it and define the percpu offset symbol + * so that it can be accessed as a percpu variable. + */ + . = ALIGN(PAGE_SIZE); + PERCPU_VADDR(0, :percpu) #else - PERCPU(PAGE_SIZE) + PERCPU(PAGE_SIZE) #endif - . = ALIGN(PAGE_SIZE); - __init_end = .; - - .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { - . = ALIGN(PAGE_SIZE); - __nosave_begin = .; - *(.data.nosave) - . = ALIGN(PAGE_SIZE); - __nosave_end = .; - } :data.init2 /* use another section data.init2, see PERCPU_VADDR() above */ - - .bss : AT(ADDR(.bss) - LOAD_OFFSET) { . = ALIGN(PAGE_SIZE); - __bss_start = .; /* BSS */ - *(.bss.page_aligned) - *(.bss) - __bss_stop = .; - } + __init_end = .; + + .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { + . = ALIGN(PAGE_SIZE); + __nosave_begin = .; + *(.data.nosave) + . = ALIGN(PAGE_SIZE); + __nosave_end = .; + } :data.init2 + /* use another section data.init2, see PERCPU_VADDR() above */ + + .bss : AT(ADDR(.bss) - LOAD_OFFSET) { + . = ALIGN(PAGE_SIZE); + __bss_start = .; /* BSS */ + *(.bss.page_aligned) + *(.bss) + __bss_stop = .; + } - .brk : AT(ADDR(.brk) - LOAD_OFFSET) { - . = ALIGN(PAGE_SIZE); - __brk_base = . ; - . += 64 * 1024 ; /* 64k alignment slop space */ - *(.brk_reservation) /* areas brk users have reserved */ - __brk_limit = . ; - } - - _end = . ; - - /* Sections to be discarded */ - /DISCARD/ : { - *(.exitcall.exit) - *(.eh_frame) - *(.discard) + .brk : AT(ADDR(.brk) - LOAD_OFFSET) { + . = ALIGN(PAGE_SIZE); + __brk_base = .; + . += 64 * 1024; /* 64k alignment slop space */ + *(.brk_reservation) /* areas brk users have reserved */ + __brk_limit = .; } - STABS_DEBUG + _end = . ; - DWARF_DEBUG + /* Sections to be discarded */ + /DISCARD/ : { + *(.exitcall.exit) + *(.eh_frame) + *(.discard) + } + + STABS_DEBUG + DWARF_DEBUG } - /* - * Per-cpu symbols which need to be offset from __per_cpu_load - * for the boot processor. - */ +/* + * Per-cpu symbols which need to be offset from __per_cpu_load + * for the boot processor. + */ #define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load INIT_PER_CPU(gdt_page); INIT_PER_CPU(irq_stack_union); -- cgit v0.10.2 From 51b26ada79b605ed709ddcedbb6012e8f8e0ebed Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 26 Apr 2009 10:12:47 -0700 Subject: x86: unify arch/x86/boot/compressed/vmlinux_*.lds Look at the: diff -u arch/x86/boot/compressed/vmlinux_*.lds output and realize that they're basially exactly the same except for trivial naming differences, and the fact that the 64-bit version has a "pgtable" thing. So unify them. There's some trivial cleanup there (make the output format a Kconfig thing rather than doing #ifdef's for it, and unify both 32-bit and 64-bit BSS end to "_ebss", where 32-bit used to use the traditional "_end"), but other than that it's really very mindless and straigt conversion. For example, I think we should aim to remove "startup_32" vs "startup_64", and just call it "startup", and get rid of one more difference. I didn't do that. Also, notice the comment in the unified vmlinux.lds.S talks about "head_64" and "startup_32" which is an odd and incorrect mix, but that was actually what the old 64-bit only lds file had, so the confusion isn't new, and now that mixing is arguably more accurate thanks to the vmlinux.lds.S file being shared between the two cases ;) [ Impact: cleanup, unification ] Signed-off-by: Linus Torvalds Acked-by: Sam Ravnborg Signed-off-by: Ingo Molnar diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index bc25b9f..039c3f0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -47,6 +47,11 @@ config X86 select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_LZMA +config OUTPUT_FORMAT + string + default "elf32-i386" if X86_32 + default "elf64-x86-64" if X86_64 + config ARCH_DEFCONFIG string default "arch/x86/configs/i386_defconfig" if X86_32 diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 65551c9..0f4b5e2 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -19,7 +19,7 @@ KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ LDFLAGS := -m elf_$(UTS_MACHINE) LDFLAGS_vmlinux := -T -$(obj)/vmlinux: $(src)/vmlinux_$(BITS).lds $(obj)/head_$(BITS).o $(obj)/misc.o $(obj)/piggy.o FORCE +$(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o $(obj)/piggy.o FORCE $(call if_changed,ld) @: diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 3a8a866..85bd328 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -88,9 +88,9 @@ ENTRY(startup_32) * where decompression in place becomes safe. */ pushl %esi - leal _end(%ebp), %esi - leal _end(%ebx), %edi - movl $(_end - startup_32), %ecx + leal _ebss(%ebp), %esi + leal _ebss(%ebx), %edi + movl $(_ebss - startup_32), %ecx std rep movsb @@ -121,7 +121,7 @@ relocated: */ xorl %eax,%eax leal _edata(%ebx),%edi - leal _end(%ebx), %ecx + leal _ebss(%ebx), %ecx subl %edi,%ecx cld rep diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S new file mode 100644 index 0000000..ffcb1913 --- /dev/null +++ b/arch/x86/boot/compressed/vmlinux.lds.S @@ -0,0 +1,57 @@ +OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT) + +#ifdef CONFIG_X86_64 +OUTPUT_ARCH(i386:x86-64) +ENTRY(startup_64) +#else +OUTPUT_ARCH(i386) +ENTRY(startup_32) +#endif + +SECTIONS +{ + /* Be careful parts of head_64.S assume startup_32 is at + * address 0. + */ + . = 0; + .text.head : { + _head = . ; + *(.text.head) + _ehead = . ; + } + .rodata.compressed : { + *(.rodata.compressed) + } + .text : { + _text = .; /* Text */ + *(.text) + *(.text.*) + _etext = . ; + } + .rodata : { + _rodata = . ; + *(.rodata) /* read-only data */ + *(.rodata.*) + _erodata = . ; + } + .data : { + _data = . ; + *(.data) + *(.data.*) + _edata = . ; + } + .bss : { + _bss = . ; + *(.bss) + *(.bss.*) + *(COMMON) +#ifdef CONFIG_X86_64 + . = ALIGN(8); + _end_before_pgt = . ; + . = ALIGN(4096); + pgtable = . ; + . = . + 4096 * 6; +#endif + _ebss = .; + } +} diff --git a/arch/x86/boot/compressed/vmlinux_32.lds b/arch/x86/boot/compressed/vmlinux_32.lds deleted file mode 100644 index bb3c483..0000000 --- a/arch/x86/boot/compressed/vmlinux_32.lds +++ /dev/null @@ -1,43 +0,0 @@ -OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") -OUTPUT_ARCH(i386) -ENTRY(startup_32) -SECTIONS -{ - /* Be careful parts of head_32.S assume startup_32 is at - * address 0. - */ - . = 0; - .text.head : { - _head = . ; - *(.text.head) - _ehead = . ; - } - .rodata.compressed : { - *(.rodata.compressed) - } - .text : { - _text = .; /* Text */ - *(.text) - *(.text.*) - _etext = . ; - } - .rodata : { - _rodata = . ; - *(.rodata) /* read-only data */ - *(.rodata.*) - _erodata = . ; - } - .data : { - _data = . ; - *(.data) - *(.data.*) - _edata = . ; - } - .bss : { - _bss = . ; - *(.bss) - *(.bss.*) - *(COMMON) - _end = . ; - } -} diff --git a/arch/x86/boot/compressed/vmlinux_64.lds b/arch/x86/boot/compressed/vmlinux_64.lds deleted file mode 100644 index bef1ac8..0000000 --- a/arch/x86/boot/compressed/vmlinux_64.lds +++ /dev/null @@ -1,48 +0,0 @@ -OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64") -OUTPUT_ARCH(i386:x86-64) -ENTRY(startup_64) -SECTIONS -{ - /* Be careful parts of head_64.S assume startup_32 is at - * address 0. - */ - . = 0; - .text.head : { - _head = . ; - *(.text.head) - _ehead = . ; - } - .rodata.compressed : { - *(.rodata.compressed) - } - .text : { - _text = .; /* Text */ - *(.text) - *(.text.*) - _etext = . ; - } - .rodata : { - _rodata = . ; - *(.rodata) /* read-only data */ - *(.rodata.*) - _erodata = . ; - } - .data : { - _data = . ; - *(.data) - *(.data.*) - _edata = . ; - } - .bss : { - _bss = . ; - *(.bss) - *(.bss.*) - *(COMMON) - . = ALIGN(8); - _end_before_pgt = . ; - . = ALIGN(4096); - pgtable = . ; - . = . + 4096 * 6; - _ebss = .; - } -} -- cgit v0.10.2 From fc4967b8c6a1540ebce9ac48e44b8d44e7fac971 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 27 Apr 2009 14:06:26 +0900 Subject: sh: update defconfigs for PCI changes. Signed-off-by: Paul Mundt diff --git a/arch/sh/configs/ap325rxa_defconfig b/arch/sh/configs/ap325rxa_defconfig index c8d982a..022f70e 100644 --- a/arch/sh/configs/ap325rxa_defconfig +++ b/arch/sh/configs/ap325rxa_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 17:46:53 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 12:42:06 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_BUG=y @@ -74,6 +75,7 @@ CONFIG_EMBEDDED=y CONFIG_UID16=y CONFIG_SYSCTL_SYSCALL=y # CONFIG_KALLSYMS is not set +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y @@ -92,12 +94,15 @@ CONFIG_SLAB=y # CONFIG_SLUB is not set # CONFIG_SLOB is not set # CONFIG_PROFILING is not set +# CONFIG_MARKERS is not set CONFIG_HAVE_OPROFILE=y CONFIG_HAVE_IOREMAP_PROT=y CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -110,7 +115,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_BLOCK=y # CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -159,6 +163,7 @@ CONFIG_ARCH_SHMOBILE=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set CONFIG_CPU_SUBTYPE_SH7723=y +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set # CONFIG_CPU_SUBTYPE_SH7780 is not set @@ -168,8 +173,6 @@ CONFIG_CPU_SUBTYPE_SH7723=y # CONFIG_CPU_SUBTYPE_SH7343 is not set # CONFIG_CPU_SUBTYPE_SH7722 is not set # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -573,6 +576,7 @@ CONFIG_SCSI_WAIT_SCAN=m CONFIG_SCSI_LOWLEVEL=y # CONFIG_ISCSI_TCP is not set # CONFIG_LIBFC is not set +# CONFIG_LIBFCOE is not set # CONFIG_SCSI_DEBUG is not set # CONFIG_SCSI_DH is not set # CONFIG_SCSI_OSD_INITIATOR is not set @@ -695,6 +699,7 @@ CONFIG_DEVKMEM=y # # Non-8250 serial port support # +# CONFIG_SERIAL_MAX3100 is not set CONFIG_SERIAL_SH_SCI=y CONFIG_SERIAL_SH_SCI_NR_UARTS=6 CONFIG_SERIAL_SH_SCI_CONSOLE=y @@ -1030,6 +1035,7 @@ CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y # CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y CONFIG_EXT3_FS_SECURITY=y @@ -1052,6 +1058,11 @@ CONFIG_INOTIFY_USER=y # CONFIG_FUSE_FS is not set # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set @@ -1100,6 +1111,7 @@ CONFIG_MISC_FILESYSTEMS=y # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y @@ -1191,10 +1203,24 @@ CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_TRACING_SUPPORT=y # # Tracers # +# CONFIG_FUNCTION_TRACER is not set +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_PREEMPT_TRACER is not set +# CONFIG_SCHED_TRACER is not set +# CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set +# CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_SH_STANDARD_BIOS is not set @@ -1303,6 +1329,7 @@ CONFIG_CRYPTO_CBC=y # # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_HW=y +# CONFIG_BINARY_PRINTF is not set # # Library routines diff --git a/arch/sh/configs/cayman_defconfig b/arch/sh/configs/cayman_defconfig index fa5fc1e..40301f8 100644 --- a/arch/sh/configs/cayman_defconfig +++ b/arch/sh/configs/cayman_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 17:49:14 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 13:42:53 2009 # CONFIG_SUPERH=y # CONFIG_SUPERH32 is not set @@ -40,6 +40,7 @@ CONFIG_LOCALVERSION_AUTO=y CONFIG_SWAP=y # CONFIG_SYSVIPC is not set CONFIG_POSIX_MQUEUE=y +CONFIG_POSIX_MQUEUE_SYSCTL=y # CONFIG_BSD_PROCESS_ACCT is not set # CONFIG_TASKSTATS is not set # CONFIG_AUDIT is not set @@ -70,6 +71,7 @@ CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y @@ -89,10 +91,13 @@ CONFIG_SLAB=y # CONFIG_SLUB is not set # CONFIG_SLOB is not set # CONFIG_PROFILING is not set +# CONFIG_MARKERS is not set CONFIG_HAVE_OPROFILE=y CONFIG_HAVE_IOREMAP_PROT=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -105,7 +110,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_BLOCK=y # CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -127,39 +131,6 @@ CONFIG_DEFAULT_IOSCHED="cfq" # System type # CONFIG_CPU_SH5=y -# CONFIG_CPU_SUBTYPE_SH7619 is not set -# CONFIG_CPU_SUBTYPE_SH7201 is not set -# CONFIG_CPU_SUBTYPE_SH7203 is not set -# CONFIG_CPU_SUBTYPE_SH7206 is not set -# CONFIG_CPU_SUBTYPE_SH7263 is not set -# CONFIG_CPU_SUBTYPE_MXG is not set -# CONFIG_CPU_SUBTYPE_SH7705 is not set -# CONFIG_CPU_SUBTYPE_SH7706 is not set -# CONFIG_CPU_SUBTYPE_SH7707 is not set -# CONFIG_CPU_SUBTYPE_SH7708 is not set -# CONFIG_CPU_SUBTYPE_SH7709 is not set -# CONFIG_CPU_SUBTYPE_SH7710 is not set -# CONFIG_CPU_SUBTYPE_SH7712 is not set -# CONFIG_CPU_SUBTYPE_SH7720 is not set -# CONFIG_CPU_SUBTYPE_SH7721 is not set -# CONFIG_CPU_SUBTYPE_SH7750 is not set -# CONFIG_CPU_SUBTYPE_SH7091 is not set -# CONFIG_CPU_SUBTYPE_SH7750R is not set -# CONFIG_CPU_SUBTYPE_SH7750S is not set -# CONFIG_CPU_SUBTYPE_SH7751 is not set -# CONFIG_CPU_SUBTYPE_SH7751R is not set -# CONFIG_CPU_SUBTYPE_SH7760 is not set -# CONFIG_CPU_SUBTYPE_SH4_202 is not set -# CONFIG_CPU_SUBTYPE_SH7723 is not set -# CONFIG_CPU_SUBTYPE_SH7763 is not set -# CONFIG_CPU_SUBTYPE_SH7770 is not set -# CONFIG_CPU_SUBTYPE_SH7780 is not set -# CONFIG_CPU_SUBTYPE_SH7785 is not set -# CONFIG_CPU_SUBTYPE_SH7786 is not set -# CONFIG_CPU_SUBTYPE_SHX3 is not set -# CONFIG_CPU_SUBTYPE_SH7343 is not set -# CONFIG_CPU_SUBTYPE_SH7722 is not set -# CONFIG_CPU_SUBTYPE_SH7366 is not set CONFIG_CPU_SUBTYPE_SH5_101=y # CONFIG_CPU_SUBTYPE_SH5_103 is not set @@ -279,8 +250,6 @@ CONFIG_BOOT_LINK_OFFSET=0x00800000 # CONFIG_PCI=y CONFIG_SH_PCIDMA_NONCOHERENT=y -CONFIG_PCI_AUTO=y -CONFIG_PCI_AUTO_UPDATE_RESOURCES=y # CONFIG_PCIEPORTBUS is not set # CONFIG_ARCH_SUPPORTS_MSI is not set CONFIG_PCI_LEGACY=y @@ -492,6 +461,7 @@ CONFIG_SCSI_LOWLEVEL=y # CONFIG_SCSI_MPT2SAS is not set # CONFIG_SCSI_HPTIOP is not set # CONFIG_LIBFC is not set +# CONFIG_LIBFCOE is not set # CONFIG_FCOE is not set # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_FUTURE_DOMAIN is not set @@ -568,6 +538,7 @@ CONFIG_NETDEV_1000=y # CONFIG_E1000E is not set # CONFIG_IP1000 is not set # CONFIG_IGB is not set +# CONFIG_IGBVF is not set # CONFIG_NS83820 is not set # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set @@ -591,6 +562,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y # CONFIG_IXGBE is not set # CONFIG_IXGB is not set # CONFIG_S2IO is not set +# CONFIG_VXGE is not set # CONFIG_MYRI10GE is not set # CONFIG_NETXEN_NIC is not set # CONFIG_NIU is not set @@ -779,6 +751,7 @@ CONFIG_HWMON=y # CONFIG_SENSORS_F71805F is not set # CONFIG_SENSORS_F71882FG is not set # CONFIG_SENSORS_F75375S is not set +# CONFIG_SENSORS_G760A is not set # CONFIG_SENSORS_GL518SM is not set # CONFIG_SENSORS_GL520SM is not set # CONFIG_SENSORS_IT87 is not set @@ -1042,7 +1015,6 @@ CONFIG_HID=y # # Special HID drivers # -CONFIG_HID_COMPAT=y CONFIG_USB_SUPPORT=y CONFIG_USB_ARCH_HAS_HCD=y CONFIG_USB_ARCH_HAS_OHCI=y @@ -1082,6 +1054,7 @@ CONFIG_EXT2_FS=y # CONFIG_EXT2_FS_XATTR is not set # CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_XATTR=y # CONFIG_EXT3_FS_POSIX_ACL is not set # CONFIG_EXT3_FS_SECURITY is not set @@ -1105,6 +1078,11 @@ CONFIG_INOTIFY_USER=y # CONFIG_FUSE_FS is not set # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set @@ -1146,8 +1124,13 @@ CONFIG_MINIX_FS=y # CONFIG_HPFS_FS is not set # CONFIG_QNX4FS_FS is not set CONFIG_ROMFS_FS=y +CONFIG_ROMFS_BACKED_BY_BLOCK=y +# CONFIG_ROMFS_BACKED_BY_MTD is not set +# CONFIG_ROMFS_BACKED_BY_BOTH is not set +CONFIG_ROMFS_ON_BLOCK=y # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y @@ -1208,6 +1191,9 @@ CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 +CONFIG_DETECT_HUNG_TASK=y +# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set +CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0 CONFIG_SCHED_DEBUG=y CONFIG_SCHEDSTATS=y # CONFIG_TIMER_STATS is not set @@ -1241,15 +1227,21 @@ CONFIG_FRAME_POINTER=y # CONFIG_LATENCYTOP is not set # CONFIG_SYSCTL_SYSCALL_CHECK is not set # CONFIG_PAGE_POISONING is not set +CONFIG_TRACING_SUPPORT=y # # Tracers # # CONFIG_SCHED_TRACER is not set # CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set # CONFIG_BOOT_TRACER is not set # CONFIG_TRACE_BRANCH_PROFILING is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_DYNAMIC_DEBUG is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set # CONFIG_EARLY_SCIF_CONSOLE is not set # CONFIG_DEBUG_BOOTMEM is not set @@ -1354,6 +1346,7 @@ CONFIG_CRYPTO=y # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_HW=y # CONFIG_CRYPTO_DEV_HIFN_795X is not set +# CONFIG_BINARY_PRINTF is not set # # Library routines diff --git a/arch/sh/configs/dreamcast_defconfig b/arch/sh/configs/dreamcast_defconfig index 5c11236..1f3cc98 100644 --- a/arch/sh/configs/dreamcast_defconfig +++ b/arch/sh/configs/dreamcast_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 17:51:48 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 12:44:27 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_BUG=y @@ -71,6 +72,7 @@ CONFIG_UID16=y # CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y @@ -90,6 +92,7 @@ CONFIG_SLAB=y # CONFIG_SLUB is not set # CONFIG_SLOB is not set CONFIG_PROFILING=y +# CONFIG_MARKERS is not set # CONFIG_OPROFILE is not set CONFIG_HAVE_OPROFILE=y # CONFIG_KPROBES is not set @@ -98,6 +101,8 @@ CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -110,7 +115,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_BLOCK=y # CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -156,6 +160,7 @@ CONFIG_CPU_SUBTYPE_SH7091=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set # CONFIG_CPU_SUBTYPE_SH7780 is not set @@ -165,8 +170,6 @@ CONFIG_CPU_SUBTYPE_SH7091=y # CONFIG_CPU_SUBTYPE_SH7343 is not set # CONFIG_CPU_SUBTYPE_SH7722 is not set # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -271,7 +274,7 @@ CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_SH_DMA_API=y CONFIG_SH_DMA=y CONFIG_SH_DMA_IRQ_MULTI=y -CONFIG_NR_ONCHIP_DMA_CHANNELS=6 +CONFIG_NR_ONCHIP_DMA_CHANNELS=4 CONFIG_NR_DMA_CHANNELS_BOOL=y CONFIG_NR_DMA_CHANNELS=9 # CONFIG_PVR2_DMA is not set @@ -320,7 +323,6 @@ CONFIG_CMDLINE="console=ttySC1,115200 panic=3" CONFIG_MAPLE=y CONFIG_PCI=y CONFIG_SH_PCIDMA_NONCOHERENT=y -CONFIG_PCI_AUTO=y # CONFIG_PCIEPORTBUS is not set # CONFIG_ARCH_SUPPORTS_MSI is not set CONFIG_PCI_LEGACY=y @@ -602,6 +604,7 @@ CONFIG_INPUT_MOUSE=y # CONFIG_MOUSE_APPLETOUCH is not set # CONFIG_MOUSE_BCM5974 is not set # CONFIG_MOUSE_VSXXXAA is not set +# CONFIG_MOUSE_MAPLE is not set # CONFIG_INPUT_JOYSTICK is not set # CONFIG_INPUT_TABLET is not set # CONFIG_INPUT_TOUCHSCREEN is not set @@ -812,7 +815,6 @@ CONFIG_HID=y # # Special HID drivers # -CONFIG_HID_COMPAT=y CONFIG_USB_SUPPORT=y CONFIG_USB_ARCH_HAS_HCD=y CONFIG_USB_ARCH_HAS_OHCI=y @@ -867,6 +869,11 @@ CONFIG_INOTIFY_USER=y # CONFIG_FUSE_FS is not set # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set @@ -910,6 +917,7 @@ CONFIG_MISC_FILESYSTEMS=y # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y # CONFIG_NFS_FS is not set # CONFIG_NFSD is not set @@ -947,10 +955,24 @@ CONFIG_FRAME_WARN=1024 CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_TRACING_SUPPORT=y # # Tracers # +# CONFIG_FUNCTION_TRACER is not set +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_PREEMPT_TRACER is not set +# CONFIG_SCHED_TRACER is not set +# CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set +# CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_SH_STANDARD_BIOS is not set @@ -1051,6 +1073,7 @@ CONFIG_CRYPTO=y # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_HW=y # CONFIG_CRYPTO_DEV_HIFN_795X is not set +# CONFIG_BINARY_PRINTF is not set # # Library routines diff --git a/arch/sh/configs/edosk7705_defconfig b/arch/sh/configs/edosk7705_defconfig index f4c34b0..d709245 100644 --- a/arch/sh/configs/edosk7705_defconfig +++ b/arch/sh/configs/edosk7705_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 17:54:02 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 12:45:04 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_FIND_NEXT_BIT=y @@ -56,6 +57,7 @@ CONFIG_EMBEDDED=y # CONFIG_UID16 is not set # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_KALLSYMS is not set +# CONFIG_STRIP_ASM_SYMS is not set # CONFIG_HOTPLUG is not set # CONFIG_PRINTK is not set # CONFIG_BUG is not set @@ -74,12 +76,15 @@ CONFIG_SHMEM=y CONFIG_SLUB=y # CONFIG_SLOB is not set # CONFIG_PROFILING is not set +# CONFIG_MARKERS is not set CONFIG_HAVE_OPROFILE=y CONFIG_HAVE_IOREMAP_PROT=y CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_BASE_SMALL=1 # CONFIG_MODULES is not set @@ -114,6 +119,7 @@ CONFIG_CPU_SUBTYPE_SH7705=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set # CONFIG_CPU_SUBTYPE_SH7780 is not set @@ -123,8 +129,6 @@ CONFIG_CPU_SUBTYPE_SH7705=y # CONFIG_CPU_SUBTYPE_SH7343 is not set # CONFIG_CPU_SUBTYPE_SH7722 is not set # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -382,6 +386,10 @@ CONFIG_SSB_POSSIBLE=y # CONFIG_FUSE_FS is not set # +# Caches +# + +# # Pseudo filesystems # # CONFIG_PROC_FS is not set @@ -409,10 +417,22 @@ CONFIG_FRAME_WARN=1024 CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_TRACING_SUPPORT=y # # Tracers # +# CONFIG_FUNCTION_TRACER is not set +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_SCHED_TRACER is not set +# CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set +# CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_SH_STANDARD_BIOS is not set @@ -426,6 +446,7 @@ CONFIG_HAVE_ARCH_KGDB=y # CONFIG_SECURITYFS is not set # CONFIG_SECURITY_FILE_CAPABILITIES is not set # CONFIG_CRYPTO is not set +# CONFIG_BINARY_PRINTF is not set # # Library routines diff --git a/arch/sh/configs/edosk7760_defconfig b/arch/sh/configs/edosk7760_defconfig index 7825c26..a822b1d 100644 --- a/arch/sh/configs/edosk7760_defconfig +++ b/arch/sh/configs/edosk7760_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 17:54:57 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 12:45:25 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_BUG=y @@ -40,6 +41,7 @@ CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y +CONFIG_POSIX_MQUEUE_SYSCTL=y CONFIG_BSD_PROCESS_ACCT=y # CONFIG_BSD_PROCESS_ACCT_V3 is not set # CONFIG_TASKSTATS is not set @@ -67,7 +69,6 @@ CONFIG_INITRAMFS_SOURCE="" CONFIG_RD_GZIP=y # CONFIG_RD_BZIP2 is not set # CONFIG_RD_LZMA is not set -CONFIG_INITRAMFS_COMPRESSION_NONE=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_SYSCTL=y CONFIG_ANON_INODES=y @@ -77,6 +78,7 @@ CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y @@ -96,6 +98,7 @@ CONFIG_COMPAT_BRK=y CONFIG_SLUB=y # CONFIG_SLOB is not set # CONFIG_PROFILING is not set +# CONFIG_MARKERS is not set CONFIG_HAVE_OPROFILE=y # CONFIG_KPROBES is not set CONFIG_HAVE_IOREMAP_PROT=y @@ -103,6 +106,8 @@ CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -115,7 +120,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_BLOCK=y # CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -161,6 +165,7 @@ CONFIG_CPU_SH4=y CONFIG_CPU_SUBTYPE_SH7760=y # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set # CONFIG_CPU_SUBTYPE_SH7780 is not set @@ -170,8 +175,6 @@ CONFIG_CPU_SUBTYPE_SH7760=y # CONFIG_CPU_SUBTYPE_SH7343 is not set # CONFIG_CPU_SUBTYPE_SH7722 is not set # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -815,6 +818,7 @@ CONFIG_EXT2_FS_XATTR=y # CONFIG_EXT2_FS_SECURITY is not set CONFIG_EXT2_FS_XIP=y CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_XATTR=y # CONFIG_EXT3_FS_POSIX_ACL is not set # CONFIG_EXT3_FS_SECURITY is not set @@ -839,6 +843,11 @@ CONFIG_INOTIFY_USER=y CONFIG_GENERIC_ACL=y # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set @@ -883,6 +892,7 @@ CONFIG_MISC_FILESYSTEMS=y # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y # CONFIG_NFS_V3 is not set @@ -964,6 +974,9 @@ CONFIG_DEBUG_SHIRQ=y CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 +CONFIG_DETECT_HUNG_TASK=y +# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set +CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0 # CONFIG_SCHED_DEBUG is not set # CONFIG_SCHEDSTATS is not set CONFIG_TIMER_STATS=y @@ -1001,6 +1014,7 @@ CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_TRACING_SUPPORT=y # # Tracers @@ -1010,9 +1024,14 @@ CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y # CONFIG_PREEMPT_TRACER is not set # CONFIG_SCHED_TRACER is not set # CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set # CONFIG_BOOT_TRACER is not set # CONFIG_TRACE_BRANCH_PROFILING is not set # CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_KGDB is not set @@ -1126,6 +1145,7 @@ CONFIG_CRYPTO_DES=y # # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_HW=y +# CONFIG_BINARY_PRINTF is not set # # Library routines diff --git a/arch/sh/configs/espt_defconfig b/arch/sh/configs/espt_defconfig index ebb4c37..c5b5007 100644 --- a/arch/sh/configs/espt_defconfig +++ b/arch/sh/configs/espt_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 17:58:18 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 12:46:26 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_BUG=y @@ -78,6 +79,7 @@ CONFIG_UID16=y # CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y @@ -106,6 +108,8 @@ CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -117,7 +121,6 @@ CONFIG_MODULES=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_BLOCK=y # CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -164,6 +167,7 @@ CONFIG_CPU_SH4A=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set CONFIG_CPU_SUBTYPE_SH7763=y # CONFIG_CPU_SUBTYPE_SH7770 is not set # CONFIG_CPU_SUBTYPE_SH7780 is not set @@ -173,8 +177,6 @@ CONFIG_CPU_SUBTYPE_SH7763=y # CONFIG_CPU_SUBTYPE_SH7343 is not set # CONFIG_CPU_SUBTYPE_SH7722 is not set # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -548,6 +550,7 @@ CONFIG_SCSI_WAIT_SCAN=m CONFIG_SCSI_LOWLEVEL=y # CONFIG_ISCSI_TCP is not set # CONFIG_LIBFC is not set +# CONFIG_LIBFCOE is not set # CONFIG_SCSI_DEBUG is not set # CONFIG_SCSI_DH is not set # CONFIG_SCSI_OSD_INITIATOR is not set @@ -919,6 +922,7 @@ CONFIG_EXT2_FS=y # CONFIG_EXT2_FS_XATTR is not set # CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_XATTR=y # CONFIG_EXT3_FS_POSIX_ACL is not set # CONFIG_EXT3_FS_SECURITY is not set @@ -943,6 +947,11 @@ CONFIG_AUTOFS4_FS=y CONFIG_GENERIC_ACL=y # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set @@ -985,8 +994,13 @@ CONFIG_CRAMFS=y # CONFIG_HPFS_FS is not set # CONFIG_QNX4FS_FS is not set CONFIG_ROMFS_FS=y +CONFIG_ROMFS_BACKED_BY_BLOCK=y +# CONFIG_ROMFS_BACKED_BY_MTD is not set +# CONFIG_ROMFS_BACKED_BY_BOTH is not set +CONFIG_ROMFS_ON_BLOCK=y # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y # CONFIG_NFS_V3 is not set @@ -1075,11 +1089,25 @@ CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y CONFIG_RING_BUFFER=y CONFIG_TRACING=y +CONFIG_TRACING_SUPPORT=y # # Tracers # +# CONFIG_FUNCTION_TRACER is not set +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_SCHED_TRACER is not set +# CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set +# CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_FTRACE_STARTUP_TEST is not set # CONFIG_DYNAMIC_DEBUG is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_SH_STANDARD_BIOS is not set @@ -1179,6 +1207,7 @@ CONFIG_CRYPTO=y # # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_HW=y +CONFIG_BINARY_PRINTF=y # # Library routines diff --git a/arch/sh/configs/hp6xx_defconfig b/arch/sh/configs/hp6xx_defconfig index 82b113a..8e13027 100644 --- a/arch/sh/configs/hp6xx_defconfig +++ b/arch/sh/configs/hp6xx_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 18:01:05 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 12:47:15 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_BUG=y @@ -67,6 +68,7 @@ CONFIG_UID16=y # CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y @@ -85,12 +87,15 @@ CONFIG_SLAB=y # CONFIG_SLUB is not set # CONFIG_SLOB is not set # CONFIG_PROFILING is not set +# CONFIG_MARKERS is not set CONFIG_HAVE_OPROFILE=y CONFIG_HAVE_IOREMAP_PROT=y CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -98,7 +103,6 @@ CONFIG_BASE_SMALL=0 # CONFIG_MODULES is not set CONFIG_BLOCK=y # CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -144,6 +148,7 @@ CONFIG_CPU_SUBTYPE_SH7709=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set # CONFIG_CPU_SUBTYPE_SH7780 is not set @@ -153,8 +158,6 @@ CONFIG_CPU_SUBTYPE_SH7709=y # CONFIG_CPU_SUBTYPE_SH7343 is not set # CONFIG_CPU_SUBTYPE_SH7722 is not set # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -385,6 +388,7 @@ CONFIG_BLK_DEV_SD=y # CONFIG_SCSI_SRP_ATTRS is not set CONFIG_SCSI_LOWLEVEL=y # CONFIG_LIBFC is not set +# CONFIG_LIBFCOE is not set # CONFIG_SCSI_DEBUG is not set # CONFIG_SCSI_LOWLEVEL_PCMCIA is not set # CONFIG_SCSI_DH is not set @@ -431,6 +435,7 @@ CONFIG_KEYBOARD_HP6XX=y # CONFIG_INPUT_JOYSTICK is not set # CONFIG_INPUT_TABLET is not set CONFIG_INPUT_TOUCHSCREEN=y +# CONFIG_TOUCHSCREEN_AD7879 is not set # CONFIG_TOUCHSCREEN_FUJITSU is not set # CONFIG_TOUCHSCREEN_GUNZE is not set # CONFIG_TOUCHSCREEN_ELO is not set @@ -674,6 +679,11 @@ CONFIG_INOTIFY_USER=y # CONFIG_FUSE_FS is not set # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set @@ -719,6 +729,7 @@ CONFIG_MISC_FILESYSTEMS=y # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set # # Partition Types @@ -786,10 +797,23 @@ CONFIG_FRAME_WARN=1024 CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_TRACING_SUPPORT=y # # Tracers # +# CONFIG_FUNCTION_TRACER is not set +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_SCHED_TRACER is not set +# CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set +# CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_SH_STANDARD_BIOS is not set @@ -898,6 +922,7 @@ CONFIG_CRYPTO_MD5=y # # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set +# CONFIG_BINARY_PRINTF is not set # # Library routines diff --git a/arch/sh/configs/landisk_defconfig b/arch/sh/configs/landisk_defconfig index b6fa4a7..7f549ae 100644 --- a/arch/sh/configs/landisk_defconfig +++ b/arch/sh/configs/landisk_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 18:02:54 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 12:47:48 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_BUG=y @@ -69,6 +70,7 @@ CONFIG_UID16=y # CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS=y CONFIG_KALLSYMS_EXTRA_PASS=y +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y @@ -88,6 +90,7 @@ CONFIG_SLAB=y # CONFIG_SLUB is not set # CONFIG_SLOB is not set # CONFIG_PROFILING is not set +# CONFIG_MARKERS is not set CONFIG_HAVE_OPROFILE=y # CONFIG_KPROBES is not set CONFIG_HAVE_IOREMAP_PROT=y @@ -95,6 +98,8 @@ CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -107,7 +112,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_BLOCK=y # CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -153,6 +157,7 @@ CONFIG_CPU_SUBTYPE_SH7751R=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set # CONFIG_CPU_SUBTYPE_SH7780 is not set @@ -162,8 +167,6 @@ CONFIG_CPU_SUBTYPE_SH7751R=y # CONFIG_CPU_SUBTYPE_SH7343 is not set # CONFIG_CPU_SUBTYPE_SH7722 is not set # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -292,8 +295,6 @@ CONFIG_BOOT_LINK_OFFSET=0x00800000 # CONFIG_PCI=y CONFIG_SH_PCIDMA_NONCOHERENT=y -CONFIG_PCI_AUTO=y -CONFIG_PCI_AUTO_UPDATE_RESOURCES=y # CONFIG_PCIEPORTBUS is not set # CONFIG_ARCH_SUPPORTS_MSI is not set CONFIG_PCI_LEGACY=y @@ -602,6 +603,7 @@ CONFIG_SCSI_LOWLEVEL=y # CONFIG_SCSI_MPT2SAS is not set # CONFIG_SCSI_HPTIOP is not set # CONFIG_LIBFC is not set +# CONFIG_LIBFCOE is not set # CONFIG_FCOE is not set # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_FUTURE_DOMAIN is not set @@ -706,6 +708,7 @@ CONFIG_NETDEV_1000=y # CONFIG_E1000E is not set # CONFIG_IP1000 is not set # CONFIG_IGB is not set +# CONFIG_IGBVF is not set # CONFIG_NS83820 is not set # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set @@ -729,6 +732,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y # CONFIG_IXGBE is not set # CONFIG_IXGB is not set # CONFIG_S2IO is not set +# CONFIG_VXGE is not set # CONFIG_MYRI10GE is not set # CONFIG_NETXEN_NIC is not set # CONFIG_NIU is not set @@ -922,6 +926,7 @@ CONFIG_VIDEO_HELPER_CHIPS_AUTO=y # CONFIG_SOC_CAMERA is not set CONFIG_V4L_USB_DRIVERS=y # CONFIG_USB_VIDEO_CLASS is not set +CONFIG_USB_VIDEO_CLASS_INPUT_EVDEV=y # CONFIG_USB_GSPCA is not set # CONFIG_VIDEO_HDPVR is not set CONFIG_VIDEO_USBVIDEO=m @@ -994,15 +999,17 @@ CONFIG_USB_HID=m # # Special HID drivers # -CONFIG_HID_COMPAT=y CONFIG_HID_A4TECH=m CONFIG_HID_APPLE=m CONFIG_HID_BELKIN=m CONFIG_HID_CHERRY=m CONFIG_HID_CHICONY=m CONFIG_HID_CYPRESS=m +# CONFIG_DRAGONRISE_FF is not set CONFIG_HID_EZKEY=m +# CONFIG_HID_KYE is not set CONFIG_HID_GYRATION=m +# CONFIG_HID_KENSINGTON is not set CONFIG_HID_LOGITECH=m # CONFIG_LOGITECH_FF is not set # CONFIG_LOGIRUMBLEPAD2_FF is not set @@ -1197,6 +1204,7 @@ CONFIG_EXT2_FS=y # CONFIG_EXT2_FS_XATTR is not set # CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_XATTR=y # CONFIG_EXT3_FS_POSIX_ACL is not set # CONFIG_EXT3_FS_SECURITY is not set @@ -1222,6 +1230,11 @@ CONFIG_INOTIFY_USER=y # CONFIG_FUSE_FS is not set # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # CONFIG_ISO9660_FS=m @@ -1270,10 +1283,15 @@ CONFIG_MISC_FILESYSTEMS=y # CONFIG_HPFS_FS is not set # CONFIG_QNX4FS_FS is not set CONFIG_ROMFS_FS=y +CONFIG_ROMFS_BACKED_BY_BLOCK=y +# CONFIG_ROMFS_BACKED_BY_MTD is not set +# CONFIG_ROMFS_BACKED_BY_BOTH is not set +CONFIG_ROMFS_ON_BLOCK=y # CONFIG_SYSV_FS is not set CONFIG_UFS_FS=m # CONFIG_UFS_FS_WRITE is not set # CONFIG_UFS_DEBUG is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=m CONFIG_NFS_V3=y @@ -1364,10 +1382,23 @@ CONFIG_FRAME_WARN=1024 CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_TRACING_SUPPORT=y # # Tracers # +# CONFIG_FUNCTION_TRACER is not set +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_SCHED_TRACER is not set +# CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set +# CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y CONFIG_SH_STANDARD_BIOS=y @@ -1469,6 +1500,7 @@ CONFIG_CRYPTO=y # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_HW=y # CONFIG_CRYPTO_DEV_HIFN_795X is not set +# CONFIG_BINARY_PRINTF is not set # # Library routines diff --git a/arch/sh/configs/lboxre2_defconfig b/arch/sh/configs/lboxre2_defconfig index 92c515c..a7db539 100644 --- a/arch/sh/configs/lboxre2_defconfig +++ b/arch/sh/configs/lboxre2_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 18:06:51 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 12:48:54 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_BUG=y @@ -69,6 +70,7 @@ CONFIG_UID16=y # CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS=y CONFIG_KALLSYMS_EXTRA_PASS=y +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y @@ -88,6 +90,7 @@ CONFIG_SLAB=y # CONFIG_SLUB is not set # CONFIG_SLOB is not set # CONFIG_PROFILING is not set +# CONFIG_MARKERS is not set CONFIG_HAVE_OPROFILE=y # CONFIG_KPROBES is not set CONFIG_HAVE_IOREMAP_PROT=y @@ -95,6 +98,8 @@ CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -107,7 +112,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_BLOCK=y # CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -153,6 +157,7 @@ CONFIG_CPU_SUBTYPE_SH7751R=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set # CONFIG_CPU_SUBTYPE_SH7780 is not set @@ -162,8 +167,6 @@ CONFIG_CPU_SUBTYPE_SH7751R=y # CONFIG_CPU_SUBTYPE_SH7343 is not set # CONFIG_CPU_SUBTYPE_SH7722 is not set # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -293,8 +296,6 @@ CONFIG_CMDLINE="console=ttySC1,115200 root=/dev/sda1" # CONFIG_PCI=y CONFIG_SH_PCIDMA_NONCOHERENT=y -CONFIG_PCI_AUTO=y -CONFIG_PCI_AUTO_UPDATE_RESOURCES=y # CONFIG_PCIEPORTBUS is not set # CONFIG_ARCH_SUPPORTS_MSI is not set CONFIG_PCI_LEGACY=y @@ -542,6 +543,7 @@ CONFIG_SCSI_LOWLEVEL=y # CONFIG_SCSI_MPT2SAS is not set # CONFIG_SCSI_HPTIOP is not set # CONFIG_LIBFC is not set +# CONFIG_LIBFCOE is not set # CONFIG_FCOE is not set # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_FUTURE_DOMAIN is not set @@ -702,6 +704,7 @@ CONFIG_NETDEV_1000=y # CONFIG_E1000E is not set # CONFIG_IP1000 is not set # CONFIG_IGB is not set +# CONFIG_IGBVF is not set # CONFIG_NS83820 is not set # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set @@ -725,6 +728,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y # CONFIG_IXGBE is not set # CONFIG_IXGB is not set # CONFIG_S2IO is not set +# CONFIG_VXGE is not set # CONFIG_MYRI10GE is not set # CONFIG_NETXEN_NIC is not set # CONFIG_NIU is not set @@ -931,7 +935,6 @@ CONFIG_HID=y # # Special HID drivers # -CONFIG_HID_COMPAT=y CONFIG_USB_SUPPORT=y CONFIG_USB_ARCH_HAS_HCD=y CONFIG_USB_ARCH_HAS_OHCI=y @@ -1007,6 +1010,7 @@ CONFIG_EXT2_FS=y # CONFIG_EXT2_FS_XATTR is not set # CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_XATTR=y # CONFIG_EXT3_FS_POSIX_ACL is not set # CONFIG_EXT3_FS_SECURITY is not set @@ -1029,6 +1033,11 @@ CONFIG_INOTIFY_USER=y # CONFIG_FUSE_FS is not set # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set @@ -1073,8 +1082,13 @@ CONFIG_MISC_FILESYSTEMS=y # CONFIG_HPFS_FS is not set # CONFIG_QNX4FS_FS is not set CONFIG_ROMFS_FS=y +CONFIG_ROMFS_BACKED_BY_BLOCK=y +# CONFIG_ROMFS_BACKED_BY_MTD is not set +# CONFIG_ROMFS_BACKED_BY_BOTH is not set +CONFIG_ROMFS_ON_BLOCK=y # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y # CONFIG_NFS_FS is not set # CONFIG_NFSD is not set @@ -1151,10 +1165,23 @@ CONFIG_FRAME_WARN=1024 CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_TRACING_SUPPORT=y # # Tracers # +# CONFIG_FUNCTION_TRACER is not set +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_SCHED_TRACER is not set +# CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set +# CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y CONFIG_SH_STANDARD_BIOS=y @@ -1256,6 +1283,7 @@ CONFIG_CRYPTO=y # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_HW=y # CONFIG_CRYPTO_DEV_HIFN_795X is not set +# CONFIG_BINARY_PRINTF is not set # # Library routines diff --git a/arch/sh/configs/magicpanelr2_defconfig b/arch/sh/configs/magicpanelr2_defconfig index 26586c2..58bec61 100644 --- a/arch/sh/configs/magicpanelr2_defconfig +++ b/arch/sh/configs/magicpanelr2_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 18:07:39 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 12:49:32 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_BUG=y @@ -39,6 +40,7 @@ CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y +CONFIG_POSIX_MQUEUE_SYSCTL=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y # CONFIG_TASKSTATS is not set @@ -66,7 +68,6 @@ CONFIG_INITRAMFS_SOURCE="" CONFIG_RD_GZIP=y # CONFIG_RD_BZIP2 is not set # CONFIG_RD_LZMA is not set -CONFIG_INITRAMFS_COMPRESSION_NONE=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_SYSCTL=y CONFIG_ANON_INODES=y @@ -76,6 +77,7 @@ CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y @@ -94,6 +96,7 @@ CONFIG_SLAB=y # CONFIG_SLUB is not set # CONFIG_SLOB is not set # CONFIG_PROFILING is not set +# CONFIG_MARKERS is not set CONFIG_HAVE_OPROFILE=y # CONFIG_KPROBES is not set CONFIG_HAVE_IOREMAP_PROT=y @@ -101,6 +104,8 @@ CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -113,7 +118,6 @@ CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y CONFIG_BLOCK=y # CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -159,6 +163,7 @@ CONFIG_CPU_SUBTYPE_SH7720=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set # CONFIG_CPU_SUBTYPE_SH7780 is not set @@ -168,8 +173,6 @@ CONFIG_CPU_SUBTYPE_SH7720=y # CONFIG_CPU_SUBTYPE_SH7343 is not set # CONFIG_CPU_SUBTYPE_SH7722 is not set # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -813,6 +816,7 @@ CONFIG_EXT2_FS=y # CONFIG_EXT2_FS_XATTR is not set # CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set # CONFIG_EXT3_FS_XATTR is not set # CONFIG_EXT4_FS is not set CONFIG_JBD=y @@ -831,6 +835,11 @@ CONFIG_FILE_LOCKING=y # CONFIG_FUSE_FS is not set # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set @@ -884,6 +893,7 @@ CONFIG_JFFS2_RTIME=y # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y @@ -965,6 +975,7 @@ CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y # CONFIG_DEBUG_SHIRQ is not set # CONFIG_DETECT_SOFTLOCKUP is not set +# CONFIG_DETECT_HUNG_TASK is not set # CONFIG_SCHED_DEBUG is not set # CONFIG_SCHEDSTATS is not set # CONFIG_TIMER_STATS is not set @@ -1000,6 +1011,7 @@ CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_TRACING_SUPPORT=y # # Tracers @@ -1008,9 +1020,14 @@ CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y # CONFIG_IRQSOFF_TRACER is not set # CONFIG_SCHED_TRACER is not set # CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set # CONFIG_BOOT_TRACER is not set # CONFIG_TRACE_BRANCH_PROFILING is not set # CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_KGDB is not set @@ -1035,6 +1052,7 @@ CONFIG_DUMP_CODE=y # CONFIG_SECURITYFS is not set # CONFIG_SECURITY_FILE_CAPABILITIES is not set # CONFIG_CRYPTO is not set +# CONFIG_BINARY_PRINTF is not set # # Library routines diff --git a/arch/sh/configs/microdev_defconfig b/arch/sh/configs/microdev_defconfig index 7517835..2886fc8 100644 --- a/arch/sh/configs/microdev_defconfig +++ b/arch/sh/configs/microdev_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 18:11:13 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 12:50:51 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_BUG=y @@ -65,7 +66,6 @@ CONFIG_INITRAMFS_SOURCE="" CONFIG_RD_GZIP=y # CONFIG_RD_BZIP2 is not set # CONFIG_RD_LZMA is not set -CONFIG_INITRAMFS_COMPRESSION_NONE=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_SYSCTL=y CONFIG_ANON_INODES=y @@ -74,6 +74,7 @@ CONFIG_UID16=y # CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y @@ -92,12 +93,15 @@ CONFIG_SLAB=y # CONFIG_SLUB is not set # CONFIG_SLOB is not set # CONFIG_PROFILING is not set +# CONFIG_MARKERS is not set CONFIG_HAVE_OPROFILE=y CONFIG_HAVE_IOREMAP_PROT=y CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -105,7 +109,6 @@ CONFIG_BASE_SMALL=0 # CONFIG_MODULES is not set CONFIG_BLOCK=y # CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -151,6 +154,7 @@ CONFIG_CPU_SH4=y # CONFIG_CPU_SUBTYPE_SH7760 is not set CONFIG_CPU_SUBTYPE_SH4_202=y # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set # CONFIG_CPU_SUBTYPE_SH7780 is not set @@ -160,8 +164,6 @@ CONFIG_CPU_SUBTYPE_SH4_202=y # CONFIG_CPU_SUBTYPE_SH7343 is not set # CONFIG_CPU_SUBTYPE_SH7722 is not set # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -647,6 +649,7 @@ CONFIG_EXT2_FS=y # CONFIG_EXT2_FS_XATTR is not set # CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_XATTR=y # CONFIG_EXT3_FS_POSIX_ACL is not set # CONFIG_EXT3_FS_SECURITY is not set @@ -669,6 +672,11 @@ CONFIG_INOTIFY_USER=y # CONFIG_FUSE_FS is not set # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set @@ -715,6 +723,7 @@ CONFIG_MISC_FILESYSTEMS=y # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y @@ -802,10 +811,24 @@ CONFIG_FRAME_WARN=1024 CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_TRACING_SUPPORT=y # # Tracers # +# CONFIG_FUNCTION_TRACER is not set +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_PREEMPT_TRACER is not set +# CONFIG_SCHED_TRACER is not set +# CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set +# CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_SH_STANDARD_BIOS is not set @@ -914,6 +937,7 @@ CONFIG_CRYPTO_DES=y # # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_HW=y +# CONFIG_BINARY_PRINTF is not set # # Library routines diff --git a/arch/sh/configs/migor_defconfig b/arch/sh/configs/migor_defconfig index a8720f9..8ecceb4 100644 --- a/arch/sh/configs/migor_defconfig +++ b/arch/sh/configs/migor_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 18:14:03 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 12:51:34 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_BUG=y @@ -67,7 +68,6 @@ CONFIG_INITRAMFS_SOURCE="" CONFIG_RD_GZIP=y # CONFIG_RD_BZIP2 is not set # CONFIG_RD_LZMA is not set -CONFIG_INITRAMFS_COMPRESSION_NONE=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_SYSCTL=y CONFIG_ANON_INODES=y @@ -76,6 +76,7 @@ CONFIG_UID16=y # CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y @@ -104,6 +105,8 @@ CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -115,7 +118,6 @@ CONFIG_MODULES=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_BLOCK=y # CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -165,6 +167,7 @@ CONFIG_ARCH_SHMOBILE=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set # CONFIG_CPU_SUBTYPE_SH7780 is not set @@ -174,8 +177,6 @@ CONFIG_ARCH_SHMOBILE=y # CONFIG_CPU_SUBTYPE_SH7343 is not set CONFIG_CPU_SUBTYPE_SH7722=y # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -577,6 +578,7 @@ CONFIG_SCSI_WAIT_SCAN=m CONFIG_SCSI_LOWLEVEL=y # CONFIG_ISCSI_TCP is not set # CONFIG_LIBFC is not set +# CONFIG_LIBFCOE is not set # CONFIG_SCSI_DEBUG is not set # CONFIG_SCSI_DH is not set # CONFIG_SCSI_OSD_INITIATOR is not set @@ -873,7 +875,6 @@ CONFIG_HID=y # # Special HID drivers # -CONFIG_HID_COMPAT=y CONFIG_USB_SUPPORT=y CONFIG_USB_ARCH_HAS_HCD=y # CONFIG_USB_ARCH_HAS_OHCI is not set @@ -1012,6 +1013,11 @@ CONFIG_FILE_LOCKING=y # CONFIG_FUSE_FS is not set # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set @@ -1056,6 +1062,7 @@ CONFIG_MISC_FILESYSTEMS=y # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y # CONFIG_NFS_V3 is not set @@ -1105,11 +1112,25 @@ CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y CONFIG_RING_BUFFER=y CONFIG_TRACING=y +CONFIG_TRACING_SUPPORT=y # # Tracers # +# CONFIG_FUNCTION_TRACER is not set +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_SCHED_TRACER is not set +# CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set +# CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_FTRACE_STARTUP_TEST is not set # CONFIG_DYNAMIC_DEBUG is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_SH_STANDARD_BIOS is not set @@ -1218,6 +1239,7 @@ CONFIG_CRYPTO_WORKQUEUE=y # # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set +CONFIG_BINARY_PRINTF=y # # Library routines diff --git a/arch/sh/configs/polaris_defconfig b/arch/sh/configs/polaris_defconfig index df2d177..2b95072 100644 --- a/arch/sh/configs/polaris_defconfig +++ b/arch/sh/configs/polaris_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 18:16:48 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 12:52:19 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_BUG=y @@ -40,6 +41,7 @@ CONFIG_LOCALVERSION="" CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y +CONFIG_POSIX_MQUEUE_SYSCTL=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y # CONFIG_TASKSTATS is not set @@ -76,6 +78,7 @@ CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y @@ -94,6 +97,7 @@ CONFIG_SLAB=y # CONFIG_SLUB is not set # CONFIG_SLOB is not set # CONFIG_PROFILING is not set +# CONFIG_MARKERS is not set CONFIG_HAVE_OPROFILE=y # CONFIG_KPROBES is not set CONFIG_HAVE_IOREMAP_PROT=y @@ -101,6 +105,8 @@ CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -113,7 +119,6 @@ CONFIG_MODVERSIONS=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_BLOCK=y # CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -159,6 +164,7 @@ CONFIG_CPU_SUBTYPE_SH7709=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set # CONFIG_CPU_SUBTYPE_SH7780 is not set @@ -168,8 +174,6 @@ CONFIG_CPU_SUBTYPE_SH7709=y # CONFIG_CPU_SUBTYPE_SH7343 is not set # CONFIG_CPU_SUBTYPE_SH7722 is not set # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -778,6 +782,11 @@ CONFIG_FILE_LOCKING=y # CONFIG_FUSE_FS is not set # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set @@ -831,6 +840,7 @@ CONFIG_JFFS2_RTIME=y # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y @@ -874,6 +884,9 @@ CONFIG_DEBUG_SHIRQ=y CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 +CONFIG_DETECT_HUNG_TASK=y +# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set +CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0 # CONFIG_SCHED_DEBUG is not set # CONFIG_SCHEDSTATS is not set # CONFIG_TIMER_STATS is not set @@ -914,6 +927,7 @@ CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_TRACING_SUPPORT=y # # Tracers @@ -923,9 +937,14 @@ CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y # CONFIG_PREEMPT_TRACER is not set # CONFIG_SCHED_TRACER is not set # CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set # CONFIG_BOOT_TRACER is not set # CONFIG_TRACE_BRANCH_PROFILING is not set # CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_KGDB is not set @@ -950,6 +969,7 @@ CONFIG_DUMP_CODE=y # CONFIG_SECURITYFS is not set # CONFIG_SECURITY_FILE_CAPABILITIES is not set # CONFIG_CRYPTO is not set +# CONFIG_BINARY_PRINTF is not set # # Library routines diff --git a/arch/sh/configs/r7780mp_defconfig b/arch/sh/configs/r7780mp_defconfig index 7def4df..68e5eff 100644 --- a/arch/sh/configs/r7780mp_defconfig +++ b/arch/sh/configs/r7780mp_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 18:20:17 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 12:53:28 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_BUG=y @@ -78,6 +79,7 @@ CONFIG_UID16=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y @@ -107,6 +109,8 @@ CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_BASE_SMALL=0 @@ -118,7 +122,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_BLOCK=y # CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -165,6 +168,7 @@ CONFIG_CPU_SH4A=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set CONFIG_CPU_SUBTYPE_SH7780=y @@ -174,8 +178,6 @@ CONFIG_CPU_SUBTYPE_SH7780=y # CONFIG_CPU_SUBTYPE_SH7343 is not set # CONFIG_CPU_SUBTYPE_SH7722 is not set # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -313,8 +315,6 @@ CONFIG_CMDLINE="console=ttySC0,115200 root=/dev/sda1" # CONFIG_PCI=y CONFIG_SH_PCIDMA_NONCOHERENT=y -CONFIG_PCI_AUTO=y -CONFIG_PCI_AUTO_UPDATE_RESOURCES=y # CONFIG_PCIEPORTBUS is not set # CONFIG_ARCH_SUPPORTS_MSI is not set CONFIG_PCI_LEGACY=y @@ -536,6 +536,7 @@ CONFIG_SCSI_LOWLEVEL=y # CONFIG_SCSI_MPT2SAS is not set # CONFIG_SCSI_HPTIOP is not set # CONFIG_LIBFC is not set +# CONFIG_LIBFCOE is not set # CONFIG_FCOE is not set # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_FUTURE_DOMAIN is not set @@ -696,6 +697,7 @@ CONFIG_E1000=m # CONFIG_E1000E is not set # CONFIG_IP1000 is not set # CONFIG_IGB is not set +# CONFIG_IGBVF is not set # CONFIG_NS83820 is not set # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set @@ -719,6 +721,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y # CONFIG_IXGBE is not set # CONFIG_IXGB is not set # CONFIG_S2IO is not set +# CONFIG_VXGE is not set # CONFIG_MYRI10GE is not set # CONFIG_NETXEN_NIC is not set # CONFIG_NIU is not set @@ -920,6 +923,7 @@ CONFIG_HWMON=y # CONFIG_SENSORS_F71805F is not set # CONFIG_SENSORS_F71882FG is not set # CONFIG_SENSORS_F75375S is not set +# CONFIG_SENSORS_G760A is not set # CONFIG_SENSORS_GL518SM is not set # CONFIG_SENSORS_GL520SM is not set # CONFIG_SENSORS_IT87 is not set @@ -1027,7 +1031,6 @@ CONFIG_HID=y # # Special HID drivers # -CONFIG_HID_COMPAT=y CONFIG_USB_SUPPORT=y CONFIG_USB_ARCH_HAS_HCD=y CONFIG_USB_ARCH_HAS_OHCI=y @@ -1120,6 +1123,7 @@ CONFIG_EXT2_FS=y # CONFIG_EXT2_FS_XATTR is not set # CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_XATTR=y # CONFIG_EXT3_FS_POSIX_ACL is not set # CONFIG_EXT3_FS_SECURITY is not set @@ -1143,6 +1147,11 @@ CONFIG_INOTIFY_USER=y CONFIG_FUSE_FS=m # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set @@ -1191,6 +1200,7 @@ CONFIG_MINIX_FS=y # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y @@ -1279,6 +1289,9 @@ CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 +CONFIG_DETECT_HUNG_TASK=y +# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set +CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0 CONFIG_SCHED_DEBUG=y # CONFIG_SCHEDSTATS is not set # CONFIG_TIMER_STATS is not set @@ -1316,6 +1329,7 @@ CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y CONFIG_RING_BUFFER=y CONFIG_TRACING=y +CONFIG_TRACING_SUPPORT=y # # Tracers @@ -1325,11 +1339,16 @@ CONFIG_TRACING=y # CONFIG_PREEMPT_TRACER is not set # CONFIG_SCHED_TRACER is not set # CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set # CONFIG_BOOT_TRACER is not set # CONFIG_TRACE_BRANCH_PROFILING is not set # CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_FTRACE_STARTUP_TEST is not set # CONFIG_DYNAMIC_DEBUG is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_KGDB is not set @@ -1449,6 +1468,7 @@ CONFIG_CRYPTO_DES=y # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_HW=y # CONFIG_CRYPTO_DEV_HIFN_795X is not set +CONFIG_BINARY_PRINTF=y # # Library routines diff --git a/arch/sh/configs/r7785rp_defconfig b/arch/sh/configs/r7785rp_defconfig index cb134ff..890fe3c 100644 --- a/arch/sh/configs/r7785rp_defconfig +++ b/arch/sh/configs/r7785rp_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 18:24:35 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 12:55:10 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_BUG=y @@ -43,6 +44,7 @@ CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y +CONFIG_POSIX_MQUEUE_SYSCTL=y CONFIG_BSD_PROCESS_ACCT=y # CONFIG_BSD_PROCESS_ACCT_V3 is not set # CONFIG_TASKSTATS is not set @@ -78,6 +80,7 @@ CONFIG_UID16=y CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y @@ -108,6 +111,8 @@ CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -120,7 +125,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_BLOCK=y # CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -168,6 +172,7 @@ CONFIG_CPU_SHX2=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set # CONFIG_CPU_SUBTYPE_SH7780 is not set @@ -177,8 +182,6 @@ CONFIG_CPU_SUBTYPE_SH7785=y # CONFIG_CPU_SUBTYPE_SH7343 is not set # CONFIG_CPU_SUBTYPE_SH7722 is not set # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -337,8 +340,6 @@ CONFIG_CMDLINE="console=ttySC0,115200 root=/dev/sda1" # CONFIG_PCI=y CONFIG_SH_PCIDMA_NONCOHERENT=y -CONFIG_PCI_AUTO=y -CONFIG_PCI_AUTO_UPDATE_RESOURCES=y # CONFIG_PCIEPORTBUS is not set # CONFIG_ARCH_SUPPORTS_MSI is not set # CONFIG_PCI_LEGACY is not set @@ -561,6 +562,7 @@ CONFIG_SCSI_LOWLEVEL=y # CONFIG_SCSI_MPT2SAS is not set # CONFIG_SCSI_HPTIOP is not set # CONFIG_LIBFC is not set +# CONFIG_LIBFCOE is not set # CONFIG_FCOE is not set # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_FUTURE_DOMAIN is not set @@ -698,6 +700,7 @@ CONFIG_NETDEV_1000=y # CONFIG_E1000E is not set # CONFIG_IP1000 is not set # CONFIG_IGB is not set +# CONFIG_IGBVF is not set # CONFIG_NS83820 is not set # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set @@ -721,6 +724,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y # CONFIG_IXGBE is not set # CONFIG_IXGB is not set # CONFIG_S2IO is not set +# CONFIG_VXGE is not set # CONFIG_MYRI10GE is not set # CONFIG_NETXEN_NIC is not set # CONFIG_NIU is not set @@ -948,6 +952,7 @@ CONFIG_HWMON=y # CONFIG_SENSORS_F71805F is not set # CONFIG_SENSORS_F71882FG is not set # CONFIG_SENSORS_F75375S is not set +# CONFIG_SENSORS_G760A is not set # CONFIG_SENSORS_GL518SM is not set # CONFIG_SENSORS_GL520SM is not set # CONFIG_SENSORS_IT87 is not set @@ -970,6 +975,7 @@ CONFIG_HWMON=y # CONFIG_SENSORS_PC87360 is not set # CONFIG_SENSORS_PC87427 is not set # CONFIG_SENSORS_PCF8591 is not set +# CONFIG_SENSORS_SHT15 is not set # CONFIG_SENSORS_SIS5595 is not set # CONFIG_SENSORS_DME1737 is not set # CONFIG_SENSORS_SMSC47M1 is not set @@ -1109,7 +1115,6 @@ CONFIG_HID=y # # Special HID drivers # -CONFIG_HID_COMPAT=y CONFIG_USB_SUPPORT=y CONFIG_USB_ARCH_HAS_HCD=y CONFIG_USB_ARCH_HAS_OHCI=y @@ -1202,6 +1207,7 @@ CONFIG_EXT2_FS=y # CONFIG_EXT2_FS_XATTR is not set # CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_XATTR=y # CONFIG_EXT3_FS_POSIX_ACL is not set # CONFIG_EXT3_FS_SECURITY is not set @@ -1225,6 +1231,11 @@ CONFIG_INOTIFY_USER=y CONFIG_FUSE_FS=m # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set @@ -1273,6 +1284,7 @@ CONFIG_MINIX_FS=y # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y @@ -1359,6 +1371,7 @@ CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y # CONFIG_DEBUG_SHIRQ is not set # CONFIG_DETECT_SOFTLOCKUP is not set +# CONFIG_DETECT_HUNG_TASK is not set CONFIG_SCHED_DEBUG=y # CONFIG_SCHEDSTATS is not set # CONFIG_TIMER_STATS is not set @@ -1401,6 +1414,7 @@ CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y CONFIG_RING_BUFFER=y CONFIG_TRACING=y +CONFIG_TRACING_SUPPORT=y # # Tracers @@ -1410,11 +1424,16 @@ CONFIG_TRACING=y # CONFIG_PREEMPT_TRACER is not set # CONFIG_SCHED_TRACER is not set # CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set # CONFIG_BOOT_TRACER is not set # CONFIG_TRACE_BRANCH_PROFILING is not set # CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_FTRACE_STARTUP_TEST is not set # CONFIG_DYNAMIC_DEBUG is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_KGDB is not set @@ -1534,6 +1553,7 @@ CONFIG_CRYPTO_DES=y # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_HW=y # CONFIG_CRYPTO_DEV_HIFN_795X is not set +CONFIG_BINARY_PRINTF=y # # Library routines diff --git a/arch/sh/configs/rsk7201_defconfig b/arch/sh/configs/rsk7201_defconfig index a037c74..fa43957 100644 --- a/arch/sh/configs/rsk7201_defconfig +++ b/arch/sh/configs/rsk7201_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 18:29:08 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 12:56:29 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_BUG=y @@ -65,7 +66,6 @@ CONFIG_INITRAMFS_SOURCE="" CONFIG_RD_GZIP=y # CONFIG_RD_BZIP2 is not set # CONFIG_RD_LZMA is not set -CONFIG_INITRAMFS_COMPRESSION_NONE=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_SYSCTL=y CONFIG_ANON_INODES=y @@ -74,6 +74,7 @@ CONFIG_UID16=y CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y @@ -100,6 +101,8 @@ CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_RT_MUTEXES=y CONFIG_BASE_SMALL=0 @@ -110,7 +113,6 @@ CONFIG_MODULES=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_BLOCK=y # CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -157,6 +159,7 @@ CONFIG_CPU_SUBTYPE_SH7201=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set # CONFIG_CPU_SUBTYPE_SH7780 is not set @@ -166,8 +169,6 @@ CONFIG_CPU_SUBTYPE_SH7201=y # CONFIG_CPU_SUBTYPE_SH7343 is not set # CONFIG_CPU_SUBTYPE_SH7722 is not set # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -603,6 +604,11 @@ CONFIG_EXT2_FS=y # CONFIG_FUSE_FS is not set # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set @@ -651,8 +657,13 @@ CONFIG_JFFS2_RTIME=y # CONFIG_HPFS_FS is not set # CONFIG_QNX4FS_FS is not set CONFIG_ROMFS_FS=y +CONFIG_ROMFS_BACKED_BY_BLOCK=y +# CONFIG_ROMFS_BACKED_BY_MTD is not set +# CONFIG_ROMFS_BACKED_BY_BOTH is not set +CONFIG_ROMFS_ON_BLOCK=y # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set # # Partition Types @@ -686,11 +697,24 @@ CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y CONFIG_RING_BUFFER=y CONFIG_TRACING=y +CONFIG_TRACING_SUPPORT=y # # Tracers # +# CONFIG_FUNCTION_TRACER is not set +# CONFIG_SCHED_TRACER is not set +# CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set +# CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_FTRACE_STARTUP_TEST is not set # CONFIG_DYNAMIC_DEBUG is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_SH_STANDARD_BIOS is not set @@ -705,6 +729,7 @@ CONFIG_HAVE_ARCH_KGDB=y # CONFIG_SECURITYFS is not set # CONFIG_SECURITY_FILE_CAPABILITIES is not set # CONFIG_CRYPTO is not set +CONFIG_BINARY_PRINTF=y # # Library routines diff --git a/arch/sh/configs/rsk7203_defconfig b/arch/sh/configs/rsk7203_defconfig index 9ae28e8..e3a65f8 100644 --- a/arch/sh/configs/rsk7203_defconfig +++ b/arch/sh/configs/rsk7203_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 18:30:34 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 12:57:06 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_BUG=y @@ -39,6 +40,7 @@ CONFIG_LOCALVERSION="" CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y +CONFIG_POSIX_MQUEUE_SYSCTL=y CONFIG_BSD_PROCESS_ACCT=y # CONFIG_BSD_PROCESS_ACCT_V3 is not set # CONFIG_TASKSTATS is not set @@ -70,7 +72,6 @@ CONFIG_INITRAMFS_SOURCE="" CONFIG_RD_GZIP=y # CONFIG_RD_BZIP2 is not set # CONFIG_RD_LZMA is not set -CONFIG_INITRAMFS_COMPRESSION_NONE=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_SYSCTL=y CONFIG_ANON_INODES=y @@ -80,6 +81,7 @@ CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y @@ -106,6 +108,8 @@ CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_RT_MUTEXES=y CONFIG_BASE_SMALL=0 @@ -116,7 +120,6 @@ CONFIG_MODULES=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_BLOCK=y # CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -163,6 +166,7 @@ CONFIG_CPU_SUBTYPE_SH7203=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set # CONFIG_CPU_SUBTYPE_SH7780 is not set @@ -172,8 +176,6 @@ CONFIG_CPU_SUBTYPE_SH7203=y # CONFIG_CPU_SUBTYPE_SH7343 is not set # CONFIG_CPU_SUBTYPE_SH7722 is not set # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -750,15 +752,17 @@ CONFIG_USB_HID=y # # Special HID drivers # -CONFIG_HID_COMPAT=y CONFIG_HID_A4TECH=y CONFIG_HID_APPLE=y CONFIG_HID_BELKIN=y CONFIG_HID_CHERRY=y CONFIG_HID_CHICONY=y CONFIG_HID_CYPRESS=y +# CONFIG_DRAGONRISE_FF is not set CONFIG_HID_EZKEY=y +# CONFIG_HID_KYE is not set CONFIG_HID_GYRATION=y +# CONFIG_HID_KENSINGTON is not set CONFIG_HID_LOGITECH=y # CONFIG_LOGITECH_FF is not set # CONFIG_LOGIRUMBLEPAD2_FF is not set @@ -874,6 +878,7 @@ CONFIG_LEDS_CLASS=y # LED drivers # CONFIG_LEDS_GPIO=y +CONFIG_LEDS_GPIO_PLATFORM=y # # LED Triggers @@ -882,6 +887,7 @@ CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=y CONFIG_LEDS_TRIGGER_HEARTBEAT=y CONFIG_LEDS_TRIGGER_BACKLIGHT=y +# CONFIG_LEDS_TRIGGER_GPIO is not set CONFIG_LEDS_TRIGGER_DEFAULT_ON=y # @@ -951,6 +957,11 @@ CONFIG_FILE_LOCKING=y # CONFIG_FUSE_FS is not set # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set @@ -989,8 +1000,13 @@ CONFIG_MISC_FILESYSTEMS=y # CONFIG_HPFS_FS is not set # CONFIG_QNX4FS_FS is not set CONFIG_ROMFS_FS=y +CONFIG_ROMFS_BACKED_BY_BLOCK=y +# CONFIG_ROMFS_BACKED_BY_MTD is not set +# CONFIG_ROMFS_BACKED_BY_BOTH is not set +CONFIG_ROMFS_ON_BLOCK=y # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y # CONFIG_NFS_V3 is not set @@ -1033,6 +1049,9 @@ CONFIG_DEBUG_SHIRQ=y CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 +CONFIG_DETECT_HUNG_TASK=y +# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set +CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0 CONFIG_SCHED_DEBUG=y # CONFIG_SCHEDSTATS is not set # CONFIG_TIMER_STATS is not set @@ -1076,6 +1095,7 @@ CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y CONFIG_RING_BUFFER=y CONFIG_TRACING=y +CONFIG_TRACING_SUPPORT=y # # Tracers @@ -1083,11 +1103,16 @@ CONFIG_TRACING=y # CONFIG_FUNCTION_TRACER is not set # CONFIG_SCHED_TRACER is not set # CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set # CONFIG_BOOT_TRACER is not set # CONFIG_TRACE_BRANCH_PROFILING is not set # CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_FTRACE_STARTUP_TEST is not set # CONFIG_DYNAMIC_DEBUG is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_KGDB is not set @@ -1111,6 +1136,7 @@ CONFIG_DUMP_CODE=y # CONFIG_SECURITYFS is not set # CONFIG_SECURITY_FILE_CAPABILITIES is not set # CONFIG_CRYPTO is not set +CONFIG_BINARY_PRINTF=y # # Library routines diff --git a/arch/sh/configs/rts7751r2d1_defconfig b/arch/sh/configs/rts7751r2d1_defconfig index c0f741a..a4a59f6 100644 --- a/arch/sh/configs/rts7751r2d1_defconfig +++ b/arch/sh/configs/rts7751r2d1_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 18:33:25 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 12:58:13 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_BUG=y @@ -70,6 +71,7 @@ CONFIG_UID16=y # CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y @@ -99,6 +101,8 @@ CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -110,7 +114,6 @@ CONFIG_MODULES=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_BLOCK=y # CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -156,6 +159,7 @@ CONFIG_CPU_SUBTYPE_SH7751R=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set # CONFIG_CPU_SUBTYPE_SH7780 is not set @@ -165,8 +169,6 @@ CONFIG_CPU_SUBTYPE_SH7751R=y # CONFIG_CPU_SUBTYPE_SH7343 is not set # CONFIG_CPU_SUBTYPE_SH7722 is not set # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -302,8 +304,6 @@ CONFIG_CMDLINE="console=tty0 console=ttySC0,115200 root=/dev/sda1 earlyprintk=se # CONFIG_PCI=y CONFIG_SH_PCIDMA_NONCOHERENT=y -CONFIG_PCI_AUTO=y -CONFIG_PCI_AUTO_UPDATE_RESOURCES=y # CONFIG_PCIEPORTBUS is not set # CONFIG_ARCH_SUPPORTS_MSI is not set CONFIG_PCI_LEGACY=y @@ -513,6 +513,7 @@ CONFIG_SCSI_LOWLEVEL=y # CONFIG_SCSI_MPT2SAS is not set # CONFIG_SCSI_HPTIOP is not set # CONFIG_LIBFC is not set +# CONFIG_LIBFCOE is not set # CONFIG_FCOE is not set # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_FUTURE_DOMAIN is not set @@ -672,6 +673,7 @@ CONFIG_NETDEV_1000=y # CONFIG_E1000E is not set # CONFIG_IP1000 is not set # CONFIG_IGB is not set +# CONFIG_IGBVF is not set # CONFIG_NS83820 is not set # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set @@ -695,6 +697,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y # CONFIG_IXGBE is not set # CONFIG_IXGB is not set # CONFIG_S2IO is not set +# CONFIG_VXGE is not set # CONFIG_MYRI10GE is not set # CONFIG_NETXEN_NIC is not set # CONFIG_NIU is not set @@ -793,6 +796,7 @@ CONFIG_SERIAL_8250_RUNTIME_UARTS=4 # # Non-8250 serial port support # +# CONFIG_SERIAL_MAX3100 is not set CONFIG_SERIAL_SH_SCI=y CONFIG_SERIAL_SH_SCI_NR_UARTS=1 CONFIG_SERIAL_SH_SCI_CONSOLE=y @@ -1079,15 +1083,17 @@ CONFIG_USB_HID=y # # Special HID drivers # -CONFIG_HID_COMPAT=y CONFIG_HID_A4TECH=y CONFIG_HID_APPLE=y CONFIG_HID_BELKIN=y CONFIG_HID_CHERRY=y CONFIG_HID_CHICONY=y CONFIG_HID_CYPRESS=y +# CONFIG_DRAGONRISE_FF is not set CONFIG_HID_EZKEY=y +# CONFIG_HID_KYE is not set CONFIG_HID_GYRATION=y +# CONFIG_HID_KENSINGTON is not set CONFIG_HID_LOGITECH=y # CONFIG_LOGITECH_FF is not set # CONFIG_LOGIRUMBLEPAD2_FF is not set @@ -1291,6 +1297,11 @@ CONFIG_INOTIFY_USER=y # CONFIG_FUSE_FS is not set # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set @@ -1337,6 +1348,7 @@ CONFIG_MINIX_FS=y # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y # CONFIG_NFS_FS is not set # CONFIG_NFSD is not set @@ -1417,11 +1429,25 @@ CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y CONFIG_RING_BUFFER=y CONFIG_TRACING=y +CONFIG_TRACING_SUPPORT=y # # Tracers # +# CONFIG_FUNCTION_TRACER is not set +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_SCHED_TRACER is not set +# CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set +# CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_FTRACE_STARTUP_TEST is not set # CONFIG_DYNAMIC_DEBUG is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_SH_STANDARD_BIOS is not set @@ -1524,6 +1550,7 @@ CONFIG_CRYPTO=y # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_HW=y # CONFIG_CRYPTO_DEV_HIFN_795X is not set +CONFIG_BINARY_PRINTF=y # # Library routines diff --git a/arch/sh/configs/rts7751r2dplus_defconfig b/arch/sh/configs/rts7751r2dplus_defconfig index 8feef62..2bc1c97 100644 --- a/arch/sh/configs/rts7751r2dplus_defconfig +++ b/arch/sh/configs/rts7751r2dplus_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 18:34:12 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 12:59:01 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_BUG=y @@ -70,6 +71,7 @@ CONFIG_UID16=y # CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y @@ -99,6 +101,8 @@ CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -110,7 +114,6 @@ CONFIG_MODULES=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_BLOCK=y # CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -156,6 +159,7 @@ CONFIG_CPU_SUBTYPE_SH7751R=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set # CONFIG_CPU_SUBTYPE_SH7780 is not set @@ -165,8 +169,6 @@ CONFIG_CPU_SUBTYPE_SH7751R=y # CONFIG_CPU_SUBTYPE_SH7343 is not set # CONFIG_CPU_SUBTYPE_SH7722 is not set # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -302,8 +304,6 @@ CONFIG_CMDLINE="console=tty0 console=ttySC0,115200 root=/dev/sda1 earlyprintk=se # CONFIG_PCI=y CONFIG_SH_PCIDMA_NONCOHERENT=y -CONFIG_PCI_AUTO=y -CONFIG_PCI_AUTO_UPDATE_RESOURCES=y # CONFIG_PCIEPORTBUS is not set # CONFIG_ARCH_SUPPORTS_MSI is not set CONFIG_PCI_LEGACY=y @@ -513,6 +513,7 @@ CONFIG_SCSI_LOWLEVEL=y # CONFIG_SCSI_MPT2SAS is not set # CONFIG_SCSI_HPTIOP is not set # CONFIG_LIBFC is not set +# CONFIG_LIBFCOE is not set # CONFIG_FCOE is not set # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_FUTURE_DOMAIN is not set @@ -672,6 +673,7 @@ CONFIG_NETDEV_1000=y # CONFIG_E1000E is not set # CONFIG_IP1000 is not set # CONFIG_IGB is not set +# CONFIG_IGBVF is not set # CONFIG_NS83820 is not set # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set @@ -695,6 +697,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y # CONFIG_IXGBE is not set # CONFIG_IXGB is not set # CONFIG_S2IO is not set +# CONFIG_VXGE is not set # CONFIG_MYRI10GE is not set # CONFIG_NETXEN_NIC is not set # CONFIG_NIU is not set @@ -793,6 +796,7 @@ CONFIG_SERIAL_8250_RUNTIME_UARTS=4 # # Non-8250 serial port support # +# CONFIG_SERIAL_MAX3100 is not set CONFIG_SERIAL_SH_SCI=y CONFIG_SERIAL_SH_SCI_NR_UARTS=1 CONFIG_SERIAL_SH_SCI_CONSOLE=y @@ -1079,15 +1083,17 @@ CONFIG_USB_HID=y # # Special HID drivers # -CONFIG_HID_COMPAT=y CONFIG_HID_A4TECH=y CONFIG_HID_APPLE=y CONFIG_HID_BELKIN=y CONFIG_HID_CHERRY=y CONFIG_HID_CHICONY=y CONFIG_HID_CYPRESS=y +# CONFIG_DRAGONRISE_FF is not set CONFIG_HID_EZKEY=y +# CONFIG_HID_KYE is not set CONFIG_HID_GYRATION=y +# CONFIG_HID_KENSINGTON is not set CONFIG_HID_LOGITECH=y # CONFIG_LOGITECH_FF is not set # CONFIG_LOGIRUMBLEPAD2_FF is not set @@ -1291,6 +1297,11 @@ CONFIG_INOTIFY_USER=y # CONFIG_FUSE_FS is not set # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set @@ -1337,6 +1348,7 @@ CONFIG_MINIX_FS=y # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y # CONFIG_NFS_FS is not set # CONFIG_NFSD is not set @@ -1417,11 +1429,25 @@ CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y CONFIG_RING_BUFFER=y CONFIG_TRACING=y +CONFIG_TRACING_SUPPORT=y # # Tracers # +# CONFIG_FUNCTION_TRACER is not set +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_SCHED_TRACER is not set +# CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set +# CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_FTRACE_STARTUP_TEST is not set # CONFIG_DYNAMIC_DEBUG is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_SH_STANDARD_BIOS is not set @@ -1524,6 +1550,7 @@ CONFIG_CRYPTO=y # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_HW=y # CONFIG_CRYPTO_DEV_HIFN_795X is not set +CONFIG_BINARY_PRINTF=y # # Library routines diff --git a/arch/sh/configs/sdk7780_defconfig b/arch/sh/configs/sdk7780_defconfig index 739e229..a629b79 100644 --- a/arch/sh/configs/sdk7780_defconfig +++ b/arch/sh/configs/sdk7780_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 18:34:43 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 12:59:32 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_BUG=y @@ -41,6 +42,7 @@ CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y +CONFIG_POSIX_MQUEUE_SYSCTL=y CONFIG_BSD_PROCESS_ACCT=y # CONFIG_BSD_PROCESS_ACCT_V3 is not set # CONFIG_TASKSTATS is not set @@ -73,6 +75,7 @@ CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y @@ -93,6 +96,7 @@ CONFIG_COMPAT_BRK=y CONFIG_SLUB=y # CONFIG_SLOB is not set # CONFIG_PROFILING is not set +# CONFIG_MARKERS is not set CONFIG_HAVE_OPROFILE=y # CONFIG_KPROBES is not set CONFIG_HAVE_IOREMAP_PROT=y @@ -100,6 +104,8 @@ CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -112,7 +118,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_BLOCK=y CONFIG_LBD=y -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -159,6 +164,7 @@ CONFIG_CPU_SH4A=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set CONFIG_CPU_SUBTYPE_SH7780=y @@ -168,8 +174,6 @@ CONFIG_CPU_SUBTYPE_SH7780=y # CONFIG_CPU_SUBTYPE_SH7343 is not set # CONFIG_CPU_SUBTYPE_SH7722 is not set # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -310,8 +314,6 @@ CONFIG_CMDLINE="mem=128M console=tty0 console=ttySC0,115200 ip=bootp root=/dev/n # CONFIG_PCI=y CONFIG_SH_PCIDMA_NONCOHERENT=y -CONFIG_PCI_AUTO=y -CONFIG_PCI_AUTO_UPDATE_RESOURCES=y # CONFIG_PCIEPORTBUS is not set # CONFIG_ARCH_SUPPORTS_MSI is not set # CONFIG_PCI_LEGACY is not set @@ -646,6 +648,7 @@ CONFIG_SCSI_LOWLEVEL=y # CONFIG_SCSI_MPT2SAS is not set # CONFIG_SCSI_HPTIOP is not set # CONFIG_LIBFC is not set +# CONFIG_LIBFCOE is not set # CONFIG_FCOE is not set # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_FUTURE_DOMAIN is not set @@ -1091,15 +1094,17 @@ CONFIG_USB_HID=y # # Special HID drivers # -CONFIG_HID_COMPAT=y CONFIG_HID_A4TECH=y CONFIG_HID_APPLE=y CONFIG_HID_BELKIN=y CONFIG_HID_CHERRY=y CONFIG_HID_CHICONY=y CONFIG_HID_CYPRESS=y +# CONFIG_DRAGONRISE_FF is not set CONFIG_HID_EZKEY=y +# CONFIG_HID_KYE is not set CONFIG_HID_GYRATION=y +# CONFIG_HID_KENSINGTON is not set CONFIG_HID_LOGITECH=y # CONFIG_LOGITECH_FF is not set # CONFIG_LOGIRUMBLEPAD2_FF is not set @@ -1257,6 +1262,7 @@ CONFIG_EXT2_FS_POSIX_ACL=y # CONFIG_EXT2_FS_SECURITY is not set # CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y # CONFIG_EXT3_FS_SECURITY is not set @@ -1281,6 +1287,11 @@ CONFIG_AUTOFS4_FS=y CONFIG_GENERIC_ACL=y # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # CONFIG_ISO9660_FS=y @@ -1331,6 +1342,7 @@ CONFIG_MINIX_FS=y # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y @@ -1418,6 +1430,9 @@ CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 +CONFIG_DETECT_HUNG_TASK=y +# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set +CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0 # CONFIG_SCHED_DEBUG is not set # CONFIG_SCHEDSTATS is not set CONFIG_TIMER_STATS=y @@ -1455,6 +1470,7 @@ CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_TRACING_SUPPORT=y # # Tracers @@ -1464,9 +1480,14 @@ CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y # CONFIG_PREEMPT_TRACER is not set # CONFIG_SCHED_TRACER is not set # CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set # CONFIG_BOOT_TRACER is not set # CONFIG_TRACE_BRANCH_PROFILING is not set # CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_KGDB is not set @@ -1580,6 +1601,7 @@ CONFIG_CRYPTO_DES=y # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_HW=y # CONFIG_CRYPTO_DEV_HIFN_795X is not set +# CONFIG_BINARY_PRINTF is not set # # Library routines diff --git a/arch/sh/configs/se7206_defconfig b/arch/sh/configs/se7206_defconfig index d30e0a7..5caf85a 100644 --- a/arch/sh/configs/se7206_defconfig +++ b/arch/sh/configs/se7206_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 18:39:37 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 13:01:02 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_BUG=y @@ -40,6 +41,7 @@ CONFIG_LOCALVERSION_AUTO=y CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y +CONFIG_POSIX_MQUEUE_SYSCTL=y CONFIG_BSD_PROCESS_ACCT=y # CONFIG_BSD_PROCESS_ACCT_V3 is not set # CONFIG_TASKSTATS is not set @@ -63,6 +65,7 @@ CONFIG_CGROUP_DEBUG=y CONFIG_CGROUP_NS=y # CONFIG_CGROUP_FREEZER is not set CONFIG_CGROUP_DEVICE=y +# CONFIG_CPUSETS is not set CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y CONFIG_CGROUP_MEM_RES_CTLR=y @@ -80,7 +83,6 @@ CONFIG_INITRAMFS_SOURCE="" CONFIG_RD_GZIP=y # CONFIG_RD_BZIP2 is not set # CONFIG_RD_LZMA is not set -CONFIG_INITRAMFS_COMPRESSION_NONE=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_SYSCTL=y CONFIG_ANON_INODES=y @@ -90,6 +92,7 @@ CONFIG_EMBEDDED=y CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y @@ -116,6 +119,8 @@ CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_RT_MUTEXES=y CONFIG_BASE_SMALL=0 @@ -127,7 +132,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_BLOCK=y # CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -174,6 +178,7 @@ CONFIG_CPU_SUBTYPE_SH7206=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set # CONFIG_CPU_SUBTYPE_SH7780 is not set @@ -183,8 +188,6 @@ CONFIG_CPU_SUBTYPE_SH7206=y # CONFIG_CPU_SUBTYPE_SH7343 is not set # CONFIG_CPU_SUBTYPE_SH7722 is not set # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -746,6 +749,11 @@ CONFIG_FILE_LOCKING=y # CONFIG_FUSE_FS is not set # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set @@ -785,8 +793,13 @@ CONFIG_CRAMFS=y # CONFIG_HPFS_FS is not set # CONFIG_QNX4FS_FS is not set CONFIG_ROMFS_FS=y +CONFIG_ROMFS_BACKED_BY_BLOCK=y +# CONFIG_ROMFS_BACKED_BY_MTD is not set +# CONFIG_ROMFS_BACKED_BY_BOTH is not set +CONFIG_ROMFS_ON_BLOCK=y # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y @@ -831,6 +844,9 @@ CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 +CONFIG_DETECT_HUNG_TASK=y +# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set +CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0 CONFIG_SCHED_DEBUG=y # CONFIG_SCHEDSTATS is not set # CONFIG_TIMER_STATS is not set @@ -869,6 +885,7 @@ CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y CONFIG_RING_BUFFER=y CONFIG_TRACING=y +CONFIG_TRACING_SUPPORT=y # # Tracers @@ -876,11 +893,16 @@ CONFIG_TRACING=y # CONFIG_FUNCTION_TRACER is not set # CONFIG_SCHED_TRACER is not set # CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set # CONFIG_BOOT_TRACER is not set # CONFIG_TRACE_BRANCH_PROFILING is not set # CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_FTRACE_STARTUP_TEST is not set # CONFIG_DYNAMIC_DEBUG is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_KGDB is not set @@ -991,6 +1013,7 @@ CONFIG_CRYPTO_LZO=y # # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set +CONFIG_BINARY_PRINTF=y # # Library routines diff --git a/arch/sh/configs/se7343_defconfig b/arch/sh/configs/se7343_defconfig index fbb72d0..004d531 100644 --- a/arch/sh/configs/se7343_defconfig +++ b/arch/sh/configs/se7343_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 18:42:00 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 13:01:44 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_BUG=y @@ -40,6 +41,7 @@ CONFIG_LOCALVERSION_AUTO=y CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y +CONFIG_POSIX_MQUEUE_SYSCTL=y # CONFIG_BSD_PROCESS_ACCT is not set # CONFIG_TASKSTATS is not set # CONFIG_AUDIT is not set @@ -73,6 +75,7 @@ CONFIG_UID16=y # CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y @@ -91,6 +94,7 @@ CONFIG_SLAB=y # CONFIG_SLUB is not set # CONFIG_SLOB is not set # CONFIG_PROFILING is not set +# CONFIG_MARKERS is not set CONFIG_HAVE_OPROFILE=y # CONFIG_KPROBES is not set CONFIG_HAVE_IOREMAP_PROT=y @@ -98,6 +102,8 @@ CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_BASE_SMALL=0 @@ -109,7 +115,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_BLOCK=y # CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -158,6 +163,7 @@ CONFIG_ARCH_SHMOBILE=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set # CONFIG_CPU_SUBTYPE_SH7780 is not set @@ -167,8 +173,6 @@ CONFIG_ARCH_SHMOBILE=y CONFIG_CPU_SUBTYPE_SH7343=y # CONFIG_CPU_SUBTYPE_SH7722 is not set # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -769,6 +773,7 @@ CONFIG_VIDEO_HELPER_CHIPS_AUTO=y # CONFIG_SOC_CAMERA is not set CONFIG_V4L_USB_DRIVERS=y # CONFIG_USB_VIDEO_CLASS is not set +CONFIG_USB_VIDEO_CLASS_INPUT_EVDEV=y CONFIG_USB_GSPCA=m # CONFIG_USB_M5602 is not set # CONFIG_USB_STV06XX is not set @@ -800,6 +805,7 @@ CONFIG_USB_GSPCA=m # CONFIG_VIDEO_PVRUSB2 is not set # CONFIG_VIDEO_HDPVR is not set # CONFIG_VIDEO_EM28XX is not set +# CONFIG_VIDEO_CX231XX is not set # CONFIG_VIDEO_USBVISION is not set # CONFIG_USB_VICAM is not set # CONFIG_USB_IBMCAM is not set @@ -813,6 +819,7 @@ CONFIG_USB_GSPCA=m # CONFIG_USB_STV680 is not set # CONFIG_USB_ZC0301 is not set # CONFIG_USB_PWC is not set +CONFIG_USB_PWC_INPUT_EVDEV=y # CONFIG_USB_ZR364XX is not set # CONFIG_USB_STKWEBCAM is not set # CONFIG_USB_S2255 is not set @@ -914,15 +921,17 @@ CONFIG_USB_HID=y # # Special HID drivers # -CONFIG_HID_COMPAT=y CONFIG_HID_A4TECH=y CONFIG_HID_APPLE=y CONFIG_HID_BELKIN=y CONFIG_HID_CHERRY=y CONFIG_HID_CHICONY=y CONFIG_HID_CYPRESS=y +# CONFIG_DRAGONRISE_FF is not set CONFIG_HID_EZKEY=y +# CONFIG_HID_KYE is not set CONFIG_HID_GYRATION=y +# CONFIG_HID_KENSINGTON is not set CONFIG_HID_LOGITECH=y # CONFIG_LOGITECH_FF is not set # CONFIG_LOGIRUMBLEPAD2_FF is not set @@ -1050,6 +1059,7 @@ CONFIG_EXT2_FS=y # CONFIG_EXT2_FS_XATTR is not set # CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_XATTR=y # CONFIG_EXT3_FS_POSIX_ACL is not set # CONFIG_EXT3_FS_SECURITY is not set @@ -1071,6 +1081,11 @@ CONFIG_FILE_LOCKING=y # CONFIG_FUSE_FS is not set # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set @@ -1125,6 +1140,7 @@ CONFIG_CRAMFS=y # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y @@ -1174,10 +1190,23 @@ CONFIG_FRAME_WARN=1024 CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_TRACING_SUPPORT=y # # Tracers # +# CONFIG_FUNCTION_TRACER is not set +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_SCHED_TRACER is not set +# CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set +# CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_SH_STANDARD_BIOS is not set @@ -1279,6 +1308,7 @@ CONFIG_CRYPTO=y # # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_HW=y +# CONFIG_BINARY_PRINTF is not set # # Library routines diff --git a/arch/sh/configs/se7619_defconfig b/arch/sh/configs/se7619_defconfig index 125304e..edbece5 100644 --- a/arch/sh/configs/se7619_defconfig +++ b/arch/sh/configs/se7619_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 18:44:53 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 13:02:32 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_BUG=y @@ -61,6 +62,7 @@ CONFIG_EMBEDDED=y # CONFIG_UID16 is not set # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_KALLSYMS is not set +# CONFIG_STRIP_ASM_SYMS is not set # CONFIG_HOTPLUG is not set CONFIG_PRINTK=y CONFIG_BUG=y @@ -78,11 +80,14 @@ CONFIG_SLAB=y # CONFIG_SLUB is not set # CONFIG_SLOB is not set # CONFIG_PROFILING is not set +# CONFIG_MARKERS is not set CONFIG_HAVE_OPROFILE=y CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_BASE_SMALL=1 @@ -134,6 +139,7 @@ CONFIG_CPU_SUBTYPE_SH7619=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set # CONFIG_CPU_SUBTYPE_SH7780 is not set @@ -143,8 +149,6 @@ CONFIG_CPU_SUBTYPE_SH7619=y # CONFIG_CPU_SUBTYPE_SH7343 is not set # CONFIG_CPU_SUBTYPE_SH7722 is not set # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -513,7 +517,6 @@ CONFIG_HID=y # # Special HID drivers # -CONFIG_HID_COMPAT=y CONFIG_USB_SUPPORT=y CONFIG_USB_ARCH_HAS_HCD=y # CONFIG_USB_ARCH_HAS_OHCI is not set @@ -564,6 +567,11 @@ CONFIG_FILE_LOCKING=y # CONFIG_FUSE_FS is not set # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set @@ -601,8 +609,13 @@ CONFIG_MISC_FILESYSTEMS=y # CONFIG_HPFS_FS is not set # CONFIG_QNX4FS_FS is not set CONFIG_ROMFS_FS=y +CONFIG_ROMFS_BACKED_BY_BLOCK=y +# CONFIG_ROMFS_BACKED_BY_MTD is not set +# CONFIG_ROMFS_BACKED_BY_BOTH is not set +CONFIG_ROMFS_ON_BLOCK=y # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set # # Partition Types @@ -630,10 +643,21 @@ CONFIG_FRAME_WARN=1024 CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_TRACING_SUPPORT=y # # Tracers # +# CONFIG_FUNCTION_TRACER is not set +# CONFIG_SCHED_TRACER is not set +# CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set +# CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_SH_STANDARD_BIOS is not set @@ -647,6 +671,7 @@ CONFIG_HAVE_ARCH_KGDB=y # CONFIG_SECURITYFS is not set # CONFIG_SECURITY_FILE_CAPABILITIES is not set # CONFIG_CRYPTO is not set +# CONFIG_BINARY_PRINTF is not set # # Library routines diff --git a/arch/sh/configs/se7705_defconfig b/arch/sh/configs/se7705_defconfig index 0308abf..bae161c 100644 --- a/arch/sh/configs/se7705_defconfig +++ b/arch/sh/configs/se7705_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 18:45:56 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 13:02:52 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_BUG=y @@ -62,7 +63,6 @@ CONFIG_INITRAMFS_SOURCE="" CONFIG_RD_GZIP=y # CONFIG_RD_BZIP2 is not set # CONFIG_RD_LZMA is not set -CONFIG_INITRAMFS_COMPRESSION_NONE=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_SYSCTL=y CONFIG_ANON_INODES=y @@ -70,6 +70,7 @@ CONFIG_EMBEDDED=y CONFIG_UID16=y # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_KALLSYMS is not set +# CONFIG_STRIP_ASM_SYMS is not set # CONFIG_HOTPLUG is not set CONFIG_PRINTK=y CONFIG_BUG=y @@ -88,12 +89,15 @@ CONFIG_SLAB=y # CONFIG_SLUB is not set # CONFIG_SLOB is not set # CONFIG_PROFILING is not set +# CONFIG_MARKERS is not set CONFIG_HAVE_OPROFILE=y CONFIG_HAVE_IOREMAP_PROT=y CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -150,6 +154,7 @@ CONFIG_CPU_SUBTYPE_SH7705=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set # CONFIG_CPU_SUBTYPE_SH7780 is not set @@ -159,8 +164,6 @@ CONFIG_CPU_SUBTYPE_SH7705=y # CONFIG_CPU_SUBTYPE_SH7343 is not set # CONFIG_CPU_SUBTYPE_SH7722 is not set # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -698,7 +701,6 @@ CONFIG_HID=y # # Special HID drivers # -CONFIG_HID_COMPAT=y CONFIG_USB_SUPPORT=y CONFIG_USB_ARCH_HAS_HCD=y # CONFIG_USB_ARCH_HAS_OHCI is not set @@ -752,6 +754,11 @@ CONFIG_INOTIFY_USER=y # CONFIG_FUSE_FS is not set # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set @@ -804,6 +811,7 @@ CONFIG_JFFS2_RTIME=y # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y # CONFIG_NFS_V3 is not set @@ -847,10 +855,23 @@ CONFIG_FRAME_WARN=1024 CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_TRACING_SUPPORT=y # # Tracers # +# CONFIG_FUNCTION_TRACER is not set +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_PREEMPT_TRACER is not set +# CONFIG_SCHED_TRACER is not set +# CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set +# CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_SH_STANDARD_BIOS is not set @@ -949,6 +970,7 @@ CONFIG_CRYPTO=y # # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_HW=y +# CONFIG_BINARY_PRINTF is not set # # Library routines diff --git a/arch/sh/configs/se7712_defconfig b/arch/sh/configs/se7712_defconfig index a8c24b7..330043f 100644 --- a/arch/sh/configs/se7712_defconfig +++ b/arch/sh/configs/se7712_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 18:48:18 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 13:03:27 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_FIND_NEXT_BIT=y @@ -38,6 +39,7 @@ CONFIG_LOCALVERSION="" CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y +CONFIG_POSIX_MQUEUE_SYSCTL=y CONFIG_BSD_PROCESS_ACCT=y # CONFIG_BSD_PROCESS_ACCT_V3 is not set # CONFIG_TASKSTATS is not set @@ -69,6 +71,7 @@ CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y # CONFIG_BUG is not set @@ -87,6 +90,7 @@ CONFIG_SLAB=y # CONFIG_SLUB is not set # CONFIG_SLOB is not set # CONFIG_PROFILING is not set +# CONFIG_MARKERS is not set CONFIG_HAVE_OPROFILE=y # CONFIG_KPROBES is not set CONFIG_HAVE_IOREMAP_PROT=y @@ -94,6 +98,8 @@ CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -105,7 +111,6 @@ CONFIG_MODULES=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_BLOCK=y # CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -151,6 +156,7 @@ CONFIG_CPU_SUBTYPE_SH7712=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set # CONFIG_CPU_SUBTYPE_SH7780 is not set @@ -160,8 +166,6 @@ CONFIG_CPU_SUBTYPE_SH7712=y # CONFIG_CPU_SUBTYPE_SH7343 is not set # CONFIG_CPU_SUBTYPE_SH7722 is not set # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -585,6 +589,7 @@ CONFIG_SCSI_WAIT_SCAN=m CONFIG_SCSI_LOWLEVEL=y # CONFIG_ISCSI_TCP is not set # CONFIG_LIBFC is not set +# CONFIG_LIBFCOE is not set # CONFIG_SCSI_DEBUG is not set # CONFIG_SCSI_DH is not set # CONFIG_SCSI_OSD_INITIATOR is not set @@ -812,6 +817,7 @@ CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y # CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_XATTR=y # CONFIG_EXT3_FS_POSIX_ACL is not set # CONFIG_EXT3_FS_SECURITY is not set @@ -833,6 +839,11 @@ CONFIG_FILE_LOCKING=y # CONFIG_FUSE_FS is not set # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set @@ -887,6 +898,7 @@ CONFIG_CRAMFS=y # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y # CONFIG_NFS_V3 is not set @@ -927,6 +939,7 @@ CONFIG_FRAME_WARN=1024 CONFIG_DEBUG_KERNEL=y # CONFIG_DEBUG_SHIRQ is not set # CONFIG_DETECT_SOFTLOCKUP is not set +# CONFIG_DETECT_HUNG_TASK is not set CONFIG_SCHED_DEBUG=y # CONFIG_SCHEDSTATS is not set # CONFIG_TIMER_STATS is not set @@ -961,6 +974,7 @@ CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_TRACING_SUPPORT=y # # Tracers @@ -969,9 +983,14 @@ CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y # CONFIG_IRQSOFF_TRACER is not set # CONFIG_SCHED_TRACER is not set # CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set # CONFIG_BOOT_TRACER is not set # CONFIG_TRACE_BRANCH_PROFILING is not set # CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_KGDB is not set @@ -1090,6 +1109,7 @@ CONFIG_CRYPTO_DEFLATE=y # # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_HW=y +# CONFIG_BINARY_PRINTF is not set # # Library routines diff --git a/arch/sh/configs/se7721_defconfig b/arch/sh/configs/se7721_defconfig index 4b79c25..5647891 100644 --- a/arch/sh/configs/se7721_defconfig +++ b/arch/sh/configs/se7721_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 18:51:44 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 13:04:19 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_FIND_NEXT_BIT=y @@ -38,6 +39,7 @@ CONFIG_LOCALVERSION="" CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y +CONFIG_POSIX_MQUEUE_SYSCTL=y CONFIG_BSD_PROCESS_ACCT=y # CONFIG_BSD_PROCESS_ACCT_V3 is not set # CONFIG_TASKSTATS is not set @@ -73,6 +75,7 @@ CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y # CONFIG_BUG is not set @@ -91,6 +94,7 @@ CONFIG_SLAB=y # CONFIG_SLUB is not set # CONFIG_SLOB is not set # CONFIG_PROFILING is not set +# CONFIG_MARKERS is not set CONFIG_HAVE_OPROFILE=y # CONFIG_KPROBES is not set CONFIG_HAVE_IOREMAP_PROT=y @@ -98,6 +102,8 @@ CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -109,7 +115,6 @@ CONFIG_MODULES=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_BLOCK=y # CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -155,6 +160,7 @@ CONFIG_CPU_SUBTYPE_SH7721=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set # CONFIG_CPU_SUBTYPE_SH7780 is not set @@ -164,8 +170,6 @@ CONFIG_CPU_SUBTYPE_SH7721=y # CONFIG_CPU_SUBTYPE_SH7343 is not set # CONFIG_CPU_SUBTYPE_SH7722 is not set # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -776,15 +780,17 @@ CONFIG_USB_HID=y # # Special HID drivers # -CONFIG_HID_COMPAT=y CONFIG_HID_A4TECH=y CONFIG_HID_APPLE=y CONFIG_HID_BELKIN=y CONFIG_HID_CHERRY=y CONFIG_HID_CHICONY=y CONFIG_HID_CYPRESS=y +# CONFIG_DRAGONRISE_FF is not set CONFIG_HID_EZKEY=y +# CONFIG_HID_KYE is not set CONFIG_HID_GYRATION=y +# CONFIG_HID_KENSINGTON is not set CONFIG_HID_LOGITECH=y # CONFIG_LOGITECH_FF is not set # CONFIG_LOGIRUMBLEPAD2_FF is not set @@ -943,6 +949,7 @@ CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT2_FS_SECURITY=y # CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_XATTR=y # CONFIG_EXT3_FS_POSIX_ACL is not set # CONFIG_EXT3_FS_SECURITY is not set @@ -964,6 +971,11 @@ CONFIG_FILE_LOCKING=y # CONFIG_FUSE_FS is not set # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set @@ -1021,6 +1033,7 @@ CONFIG_CRAMFS=y # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set # CONFIG_NETWORK_FILESYSTEMS is not set # @@ -1085,6 +1098,7 @@ CONFIG_FRAME_WARN=1024 CONFIG_DEBUG_KERNEL=y # CONFIG_DEBUG_SHIRQ is not set # CONFIG_DETECT_SOFTLOCKUP is not set +# CONFIG_DETECT_HUNG_TASK is not set CONFIG_SCHED_DEBUG=y # CONFIG_SCHEDSTATS is not set # CONFIG_TIMER_STATS is not set @@ -1119,6 +1133,7 @@ CONFIG_FRAME_POINTER=y CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_TRACING_SUPPORT=y # # Tracers @@ -1127,9 +1142,14 @@ CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y # CONFIG_IRQSOFF_TRACER is not set # CONFIG_SCHED_TRACER is not set # CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set # CONFIG_BOOT_TRACER is not set # CONFIG_TRACE_BRANCH_PROFILING is not set # CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_KGDB is not set @@ -1248,6 +1268,7 @@ CONFIG_CRYPTO_DEFLATE=y # # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_HW=y +# CONFIG_BINARY_PRINTF is not set # # Library routines diff --git a/arch/sh/configs/se7722_defconfig b/arch/sh/configs/se7722_defconfig index 82bdaac..726fdbd 100644 --- a/arch/sh/configs/se7722_defconfig +++ b/arch/sh/configs/se7722_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 18:55:10 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 13:05:29 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_BUG=y @@ -69,7 +70,6 @@ CONFIG_INITRAMFS_SOURCE="" CONFIG_RD_GZIP=y # CONFIG_RD_BZIP2 is not set # CONFIG_RD_LZMA is not set -CONFIG_INITRAMFS_COMPRESSION_NONE=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_SYSCTL=y CONFIG_ANON_INODES=y @@ -78,6 +78,7 @@ CONFIG_UID16=y CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y @@ -97,6 +98,7 @@ CONFIG_COMPAT_BRK=y CONFIG_SLUB=y # CONFIG_SLOB is not set CONFIG_PROFILING=y +# CONFIG_MARKERS is not set # CONFIG_OPROFILE is not set CONFIG_HAVE_OPROFILE=y # CONFIG_KPROBES is not set @@ -105,6 +107,8 @@ CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -117,7 +121,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_BLOCK=y # CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -167,6 +170,7 @@ CONFIG_ARCH_SHMOBILE=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set # CONFIG_CPU_SUBTYPE_SH7780 is not set @@ -176,8 +180,6 @@ CONFIG_ARCH_SHMOBILE=y # CONFIG_CPU_SUBTYPE_SH7343 is not set CONFIG_CPU_SUBTYPE_SH7722=y # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -486,6 +488,7 @@ CONFIG_SCSI_WAIT_SCAN=m CONFIG_SCSI_LOWLEVEL=y # CONFIG_ISCSI_TCP is not set # CONFIG_LIBFC is not set +# CONFIG_LIBFCOE is not set # CONFIG_SCSI_DEBUG is not set # CONFIG_SCSI_DH is not set # CONFIG_SCSI_OSD_INITIATOR is not set @@ -692,7 +695,6 @@ CONFIG_HID=y # # Special HID drivers # -CONFIG_HID_COMPAT=y CONFIG_USB_SUPPORT=y CONFIG_USB_ARCH_HAS_HCD=y # CONFIG_USB_ARCH_HAS_OHCI is not set @@ -766,6 +768,7 @@ CONFIG_EXT2_FS=y # CONFIG_EXT2_FS_XATTR is not set # CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_XATTR=y # CONFIG_EXT3_FS_POSIX_ACL is not set # CONFIG_EXT3_FS_SECURITY is not set @@ -789,6 +792,11 @@ CONFIG_INOTIFY_USER=y # CONFIG_FUSE_FS is not set # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set @@ -832,6 +840,7 @@ CONFIG_MISC_FILESYSTEMS=y # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y # CONFIG_NFS_FS is not set # CONFIG_NFSD is not set @@ -872,11 +881,25 @@ CONFIG_DEBUG_FS=y CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_TRACING_SUPPORT=y # # Tracers # +# CONFIG_FUNCTION_TRACER is not set +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_PREEMPT_TRACER is not set +# CONFIG_SCHED_TRACER is not set +# CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set +# CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_DYNAMIC_DEBUG is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y CONFIG_SH_STANDARD_BIOS=y @@ -977,6 +1000,7 @@ CONFIG_CRYPTO=y # # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_HW=y +# CONFIG_BINARY_PRINTF is not set # # Library routines diff --git a/arch/sh/configs/se7750_defconfig b/arch/sh/configs/se7750_defconfig index ceef6d9..ed1a123 100644 --- a/arch/sh/configs/se7750_defconfig +++ b/arch/sh/configs/se7750_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 18:57:31 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 13:06:02 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_BUG=y @@ -70,6 +71,7 @@ CONFIG_UID16=y # CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +# CONFIG_STRIP_ASM_SYMS is not set # CONFIG_HOTPLUG is not set CONFIG_PRINTK=y CONFIG_BUG=y @@ -88,6 +90,7 @@ CONFIG_SLAB=y # CONFIG_SLUB is not set # CONFIG_SLOB is not set # CONFIG_PROFILING is not set +# CONFIG_MARKERS is not set CONFIG_HAVE_OPROFILE=y # CONFIG_KPROBES is not set CONFIG_HAVE_IOREMAP_PROT=y @@ -95,6 +98,8 @@ CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -106,7 +111,6 @@ CONFIG_MODULES=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_BLOCK=y # CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -152,6 +156,7 @@ CONFIG_CPU_SUBTYPE_SH7750=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set # CONFIG_CPU_SUBTYPE_SH7780 is not set @@ -161,8 +166,6 @@ CONFIG_CPU_SUBTYPE_SH7750=y # CONFIG_CPU_SUBTYPE_SH7343 is not set # CONFIG_CPU_SUBTYPE_SH7722 is not set # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -553,6 +556,7 @@ CONFIG_SCSI_WAIT_SCAN=m CONFIG_SCSI_LOWLEVEL=y # CONFIG_ISCSI_TCP is not set # CONFIG_LIBFC is not set +# CONFIG_LIBFCOE is not set # CONFIG_SCSI_DEBUG is not set # CONFIG_SCSI_DH is not set # CONFIG_SCSI_OSD_INITIATOR is not set @@ -775,6 +779,11 @@ CONFIG_INOTIFY_USER=y # CONFIG_FUSE_FS is not set # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set @@ -829,6 +838,7 @@ CONFIG_JFFS2_RTIME=y # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y # CONFIG_NFS_V3 is not set @@ -886,10 +896,23 @@ CONFIG_FRAME_WARN=1024 CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_TRACING_SUPPORT=y # # Tracers # +# CONFIG_FUNCTION_TRACER is not set +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_SCHED_TRACER is not set +# CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set +# CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_SH_STANDARD_BIOS is not set @@ -989,6 +1012,7 @@ CONFIG_CRYPTO=y # # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_HW=y +# CONFIG_BINARY_PRINTF is not set # # Library routines diff --git a/arch/sh/configs/se7751_defconfig b/arch/sh/configs/se7751_defconfig index 67fc26b..55f3b78 100644 --- a/arch/sh/configs/se7751_defconfig +++ b/arch/sh/configs/se7751_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 18:59:59 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 13:06:44 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_BUG=y @@ -65,7 +66,6 @@ CONFIG_INITRAMFS_SOURCE="" CONFIG_RD_GZIP=y # CONFIG_RD_BZIP2 is not set # CONFIG_RD_LZMA is not set -CONFIG_INITRAMFS_COMPRESSION_NONE=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_SYSCTL=y CONFIG_ANON_INODES=y @@ -74,6 +74,7 @@ CONFIG_UID16=y # CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +# CONFIG_STRIP_ASM_SYMS is not set # CONFIG_HOTPLUG is not set CONFIG_PRINTK=y CONFIG_BUG=y @@ -92,6 +93,7 @@ CONFIG_SLAB=y # CONFIG_SLUB is not set # CONFIG_SLOB is not set # CONFIG_PROFILING is not set +# CONFIG_MARKERS is not set CONFIG_HAVE_OPROFILE=y # CONFIG_KPROBES is not set CONFIG_HAVE_IOREMAP_PROT=y @@ -99,6 +101,8 @@ CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -110,7 +114,6 @@ CONFIG_MODULES=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_BLOCK=y # CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -156,6 +159,7 @@ CONFIG_CPU_SUBTYPE_SH7751=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set # CONFIG_CPU_SUBTYPE_SH7780 is not set @@ -165,8 +169,6 @@ CONFIG_CPU_SUBTYPE_SH7751=y # CONFIG_CPU_SUBTYPE_SH7343 is not set # CONFIG_CPU_SUBTYPE_SH7722 is not set # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -742,6 +744,11 @@ CONFIG_INOTIFY_USER=y # CONFIG_FUSE_FS is not set # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set @@ -796,6 +803,7 @@ CONFIG_JFFS2_RTIME=y # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y # CONFIG_NFS_FS is not set # CONFIG_NFSD is not set @@ -833,10 +841,23 @@ CONFIG_FRAME_WARN=1024 CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_TRACING_SUPPORT=y # # Tracers # +# CONFIG_FUNCTION_TRACER is not set +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_SCHED_TRACER is not set +# CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set +# CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_SH_STANDARD_BIOS is not set @@ -936,6 +957,7 @@ CONFIG_CRYPTO=y # # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_HW=y +# CONFIG_BINARY_PRINTF is not set # # Library routines diff --git a/arch/sh/configs/se7780_defconfig b/arch/sh/configs/se7780_defconfig index ebce23c..c4f0af3 100644 --- a/arch/sh/configs/se7780_defconfig +++ b/arch/sh/configs/se7780_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 19:02:05 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 13:07:14 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_BUG=y @@ -67,6 +68,7 @@ CONFIG_EMBEDDED=y CONFIG_UID16=y CONFIG_SYSCTL_SYSCALL=y # CONFIG_KALLSYMS is not set +# CONFIG_STRIP_ASM_SYMS is not set # CONFIG_HOTPLUG is not set CONFIG_PRINTK=y CONFIG_BUG=y @@ -86,12 +88,15 @@ CONFIG_SLAB=y # CONFIG_SLUB is not set # CONFIG_SLOB is not set # CONFIG_PROFILING is not set +# CONFIG_MARKERS is not set CONFIG_HAVE_OPROFILE=y CONFIG_HAVE_IOREMAP_PROT=y CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -103,7 +108,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_BLOCK=y # CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_INTEGRITY is not set # @@ -149,6 +153,7 @@ CONFIG_CPU_SH4A=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set CONFIG_CPU_SUBTYPE_SH7780=y @@ -158,8 +163,6 @@ CONFIG_CPU_SUBTYPE_SH7780=y # CONFIG_CPU_SUBTYPE_SH7343 is not set # CONFIG_CPU_SUBTYPE_SH7722 is not set # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -285,8 +288,6 @@ CONFIG_CMDLINE="console=ttySC0.115200 root=/dev/sda1" # CONFIG_PCI=y CONFIG_SH_PCIDMA_NONCOHERENT=y -CONFIG_PCI_AUTO=y -CONFIG_PCI_AUTO_UPDATE_RESOURCES=y # CONFIG_PCIEPORTBUS is not set # CONFIG_ARCH_SUPPORTS_MSI is not set CONFIG_PCI_LEGACY=y @@ -558,6 +559,7 @@ CONFIG_SCSI_LOWLEVEL=y # CONFIG_SCSI_MPT2SAS is not set # CONFIG_SCSI_HPTIOP is not set # CONFIG_LIBFC is not set +# CONFIG_LIBFCOE is not set # CONFIG_FCOE is not set # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_FUTURE_DOMAIN is not set @@ -957,15 +959,17 @@ CONFIG_USB_HID=y # # Special HID drivers # -CONFIG_HID_COMPAT=y CONFIG_HID_A4TECH=y CONFIG_HID_APPLE=y CONFIG_HID_BELKIN=y CONFIG_HID_CHERRY=y CONFIG_HID_CHICONY=y CONFIG_HID_CYPRESS=y +# CONFIG_DRAGONRISE_FF is not set CONFIG_HID_EZKEY=y +# CONFIG_HID_KYE is not set CONFIG_HID_GYRATION=y +# CONFIG_HID_KENSINGTON is not set CONFIG_HID_LOGITECH=y # CONFIG_LOGITECH_FF is not set # CONFIG_LOGIRUMBLEPAD2_FF is not set @@ -1122,6 +1126,10 @@ CONFIG_INOTIFY_USER=y # CONFIG_FUSE_FS is not set # +# Caches +# + +# # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set @@ -1245,11 +1253,24 @@ CONFIG_DEBUG_FS=y CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_TRACING_SUPPORT=y # # Tracers # +# CONFIG_FUNCTION_TRACER is not set +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_SCHED_TRACER is not set +# CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set +# CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_DYNAMIC_DEBUG is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_SH_STANDARD_BIOS is not set @@ -1345,6 +1366,7 @@ CONFIG_CRYPTO=y # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_HW=y # CONFIG_CRYPTO_DEV_HIFN_795X is not set +# CONFIG_BINARY_PRINTF is not set # # Library routines diff --git a/arch/sh/configs/sh03_defconfig b/arch/sh/configs/sh03_defconfig index 6fcdb09..f9c6e51 100644 --- a/arch/sh/configs/sh03_defconfig +++ b/arch/sh/configs/sh03_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 19:04:59 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 13:07:56 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_BUG=y @@ -41,6 +42,7 @@ CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y +CONFIG_POSIX_MQUEUE_SYSCTL=y CONFIG_BSD_PROCESS_ACCT=y # CONFIG_BSD_PROCESS_ACCT_V3 is not set # CONFIG_TASKSTATS is not set @@ -67,7 +69,6 @@ CONFIG_INITRAMFS_SOURCE="" CONFIG_RD_GZIP=y # CONFIG_RD_BZIP2 is not set # CONFIG_RD_LZMA is not set -CONFIG_INITRAMFS_COMPRESSION_NONE=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_SYSCTL=y CONFIG_ANON_INODES=y @@ -76,6 +77,7 @@ CONFIG_UID16=y # CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y @@ -105,6 +107,8 @@ CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -117,7 +121,6 @@ CONFIG_MODVERSIONS=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_BLOCK=y # CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -163,6 +166,7 @@ CONFIG_CPU_SUBTYPE_SH7751=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set # CONFIG_CPU_SUBTYPE_SH7780 is not set @@ -172,8 +176,6 @@ CONFIG_CPU_SUBTYPE_SH7751=y # CONFIG_CPU_SUBTYPE_SH7343 is not set # CONFIG_CPU_SUBTYPE_SH7722 is not set # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -300,8 +302,6 @@ CONFIG_CMDLINE="console=ttySC1,115200 mem=64M root=/dev/nfs" # CONFIG_PCI=y CONFIG_SH_PCIDMA_NONCOHERENT=y -CONFIG_PCI_AUTO=y -CONFIG_PCI_AUTO_UPDATE_RESOURCES=y # CONFIG_PCIEPORTBUS is not set # CONFIG_ARCH_SUPPORTS_MSI is not set CONFIG_PCI_LEGACY=y @@ -562,6 +562,7 @@ CONFIG_SCSI_LOWLEVEL=y # CONFIG_SCSI_MPT2SAS is not set # CONFIG_SCSI_HPTIOP is not set # CONFIG_LIBFC is not set +# CONFIG_LIBFCOE is not set # CONFIG_FCOE is not set # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_FUTURE_DOMAIN is not set @@ -656,6 +657,7 @@ CONFIG_NETDEV_1000=y # CONFIG_E1000E is not set # CONFIG_IP1000 is not set # CONFIG_IGB is not set +# CONFIG_IGBVF is not set # CONFIG_NS83820 is not set # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set @@ -679,6 +681,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y # CONFIG_IXGBE is not set # CONFIG_IXGB is not set # CONFIG_S2IO is not set +# CONFIG_VXGE is not set # CONFIG_MYRI10GE is not set # CONFIG_NETXEN_NIC is not set # CONFIG_NIU is not set @@ -887,7 +890,6 @@ CONFIG_HID=y # # Special HID drivers # -CONFIG_HID_COMPAT=y CONFIG_USB_SUPPORT=y CONFIG_USB_ARCH_HAS_HCD=y CONFIG_USB_ARCH_HAS_OHCI=y @@ -929,6 +931,7 @@ CONFIG_EXT2_FS_XATTR=y # CONFIG_EXT2_FS_SECURITY is not set # CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y # CONFIG_EXT3_FS_SECURITY is not set @@ -952,6 +955,11 @@ CONFIG_AUTOFS4_FS=y # CONFIG_FUSE_FS is not set # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # CONFIG_ISO9660_FS=m @@ -1001,6 +1009,7 @@ CONFIG_MISC_FILESYSTEMS=y # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y @@ -1112,11 +1121,26 @@ CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y CONFIG_RING_BUFFER=y CONFIG_TRACING=y +CONFIG_TRACING_SUPPORT=y # # Tracers # +# CONFIG_FUNCTION_TRACER is not set +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_PREEMPT_TRACER is not set +# CONFIG_SCHED_TRACER is not set +# CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set +# CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_FTRACE_STARTUP_TEST is not set # CONFIG_DYNAMIC_DEBUG is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y CONFIG_SH_STANDARD_BIOS=y @@ -1228,6 +1252,7 @@ CONFIG_CRYPTO_DEFLATE=y # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_HW=y # CONFIG_CRYPTO_DEV_HIFN_795X is not set +CONFIG_BINARY_PRINTF=y # # Library routines diff --git a/arch/sh/configs/sh7710voipgw_defconfig b/arch/sh/configs/sh7710voipgw_defconfig index 1ab37c0..48b5809 100644 --- a/arch/sh/configs/sh7710voipgw_defconfig +++ b/arch/sh/configs/sh7710voipgw_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 19:09:01 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 13:09:16 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_BUG=y @@ -39,6 +40,7 @@ CONFIG_LOCALVERSION_AUTO=y CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y +CONFIG_POSIX_MQUEUE_SYSCTL=y # CONFIG_BSD_PROCESS_ACCT is not set # CONFIG_TASKSTATS is not set # CONFIG_AUDIT is not set @@ -72,6 +74,7 @@ CONFIG_UID16=y # CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y @@ -90,6 +93,7 @@ CONFIG_SLAB=y # CONFIG_SLUB is not set # CONFIG_SLOB is not set # CONFIG_PROFILING is not set +# CONFIG_MARKERS is not set CONFIG_HAVE_OPROFILE=y # CONFIG_KPROBES is not set CONFIG_HAVE_IOREMAP_PROT=y @@ -97,6 +101,8 @@ CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_BASE_SMALL=0 @@ -108,7 +114,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_BLOCK=y # CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -154,6 +159,7 @@ CONFIG_CPU_SUBTYPE_SH7710=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set # CONFIG_CPU_SUBTYPE_SH7780 is not set @@ -163,8 +169,6 @@ CONFIG_CPU_SUBTYPE_SH7710=y # CONFIG_CPU_SUBTYPE_SH7343 is not set # CONFIG_CPU_SUBTYPE_SH7722 is not set # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -728,7 +732,6 @@ CONFIG_HID=y # # Special HID drivers # -CONFIG_HID_COMPAT=y CONFIG_USB_SUPPORT=y CONFIG_USB_ARCH_HAS_HCD=y # CONFIG_USB_ARCH_HAS_OHCI is not set @@ -780,6 +783,11 @@ CONFIG_FILE_LOCKING=y # CONFIG_FUSE_FS is not set # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set @@ -834,6 +842,7 @@ CONFIG_JFFS2_RTIME=y # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y # CONFIG_NFS_FS is not set # CONFIG_NFSD is not set @@ -871,11 +880,24 @@ CONFIG_DEBUG_FS=y CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_TRACING_SUPPORT=y # # Tracers # +# CONFIG_FUNCTION_TRACER is not set +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_SCHED_TRACER is not set +# CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set +# CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_DYNAMIC_DEBUG is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_SH_STANDARD_BIOS is not set @@ -975,6 +997,7 @@ CONFIG_CRYPTO=y # # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_HW=y +# CONFIG_BINARY_PRINTF is not set # # Library routines diff --git a/arch/sh/configs/sh7724_generic_defconfig b/arch/sh/configs/sh7724_generic_defconfig index 268d04e..ec8f18c 100644 --- a/arch/sh/configs/sh7724_generic_defconfig +++ b/arch/sh/configs/sh7724_generic_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.30-rc2 -# Thu Apr 16 15:42:20 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 13:09:47 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y diff --git a/arch/sh/configs/sh7763rdp_defconfig b/arch/sh/configs/sh7763rdp_defconfig index c79bb84..f77bc79 100644 --- a/arch/sh/configs/sh7763rdp_defconfig +++ b/arch/sh/configs/sh7763rdp_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 19:10:57 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 13:10:12 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_BUG=y @@ -78,6 +79,7 @@ CONFIG_UID16=y # CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y @@ -106,6 +108,8 @@ CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -117,7 +121,6 @@ CONFIG_MODULES=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_BLOCK=y # CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -164,6 +167,7 @@ CONFIG_CPU_SH4A=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set CONFIG_CPU_SUBTYPE_SH7763=y # CONFIG_CPU_SUBTYPE_SH7770 is not set # CONFIG_CPU_SUBTYPE_SH7780 is not set @@ -173,8 +177,6 @@ CONFIG_CPU_SUBTYPE_SH7763=y # CONFIG_CPU_SUBTYPE_SH7343 is not set # CONFIG_CPU_SUBTYPE_SH7722 is not set # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -555,6 +557,7 @@ CONFIG_SCSI_WAIT_SCAN=m CONFIG_SCSI_LOWLEVEL=y # CONFIG_ISCSI_TCP is not set # CONFIG_LIBFC is not set +# CONFIG_LIBFCOE is not set # CONFIG_SCSI_DEBUG is not set # CONFIG_SCSI_DH is not set # CONFIG_SCSI_OSD_INITIATOR is not set @@ -941,6 +944,7 @@ CONFIG_EXT2_FS=y # CONFIG_EXT2_FS_XATTR is not set # CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_XATTR=y # CONFIG_EXT3_FS_POSIX_ACL is not set # CONFIG_EXT3_FS_SECURITY is not set @@ -965,6 +969,11 @@ CONFIG_AUTOFS4_FS=y CONFIG_GENERIC_ACL=y # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set @@ -1012,6 +1021,7 @@ CONFIG_MISC_FILESYSTEMS=y # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y # CONFIG_NFS_V3 is not set @@ -1100,11 +1110,25 @@ CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y CONFIG_RING_BUFFER=y CONFIG_TRACING=y +CONFIG_TRACING_SUPPORT=y # # Tracers # +# CONFIG_FUNCTION_TRACER is not set +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_SCHED_TRACER is not set +# CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set +# CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_FTRACE_STARTUP_TEST is not set # CONFIG_DYNAMIC_DEBUG is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_SH_STANDARD_BIOS is not set @@ -1204,6 +1228,7 @@ CONFIG_CRYPTO=y # # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_HW=y +CONFIG_BINARY_PRINTF=y # # Library routines diff --git a/arch/sh/configs/sh7785lcr_32bit_defconfig b/arch/sh/configs/sh7785lcr_32bit_defconfig index a6cf4505..1c55b80 100644 --- a/arch/sh/configs/sh7785lcr_32bit_defconfig +++ b/arch/sh/configs/sh7785lcr_32bit_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 19:12:18 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 13:10:53 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_BUG=y @@ -79,6 +80,7 @@ CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y @@ -98,6 +100,7 @@ CONFIG_SLAB=y # CONFIG_SLUB is not set # CONFIG_SLOB is not set CONFIG_PROFILING=y +# CONFIG_MARKERS is not set # CONFIG_OPROFILE is not set CONFIG_HAVE_OPROFILE=y # CONFIG_KPROBES is not set @@ -106,6 +109,8 @@ CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -118,7 +123,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_BLOCK=y # CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -166,6 +170,7 @@ CONFIG_CPU_SHX2=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set # CONFIG_CPU_SUBTYPE_SH7780 is not set @@ -175,8 +180,6 @@ CONFIG_CPU_SUBTYPE_SH7785=y # CONFIG_CPU_SUBTYPE_SH7343 is not set # CONFIG_CPU_SUBTYPE_SH7722 is not set # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -310,8 +313,6 @@ CONFIG_BOOT_LINK_OFFSET=0x00800000 # CONFIG_PCI=y CONFIG_SH_PCIDMA_NONCOHERENT=y -CONFIG_PCI_AUTO=y -CONFIG_PCI_AUTO_UPDATE_RESOURCES=y # CONFIG_PCIEPORTBUS is not set # CONFIG_ARCH_SUPPORTS_MSI is not set CONFIG_PCI_LEGACY=y @@ -672,6 +673,7 @@ CONFIG_NETDEV_1000=y # CONFIG_E1000E is not set # CONFIG_IP1000 is not set # CONFIG_IGB is not set +# CONFIG_IGBVF is not set # CONFIG_NS83820 is not set # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set @@ -1009,15 +1011,17 @@ CONFIG_USB_HID=y # # Special HID drivers # -CONFIG_HID_COMPAT=y CONFIG_HID_A4TECH=y CONFIG_HID_APPLE=y CONFIG_HID_BELKIN=y CONFIG_HID_CHERRY=y CONFIG_HID_CHICONY=y CONFIG_HID_CYPRESS=y +# CONFIG_DRAGONRISE_FF is not set CONFIG_HID_EZKEY=y +# CONFIG_HID_KYE is not set CONFIG_HID_GYRATION=y +# CONFIG_HID_KENSINGTON is not set CONFIG_HID_LOGITECH=y # CONFIG_LOGITECH_FF is not set # CONFIG_LOGIRUMBLEPAD2_FF is not set @@ -1218,6 +1222,7 @@ CONFIG_EXT2_FS=y # CONFIG_EXT2_FS_XATTR is not set # CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_XATTR=y # CONFIG_EXT3_FS_POSIX_ACL is not set # CONFIG_EXT3_FS_SECURITY is not set @@ -1240,6 +1245,11 @@ CONFIG_INOTIFY_USER=y # CONFIG_FUSE_FS is not set # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set @@ -1289,6 +1299,7 @@ CONFIG_MINIX_FS=y # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y @@ -1377,6 +1388,9 @@ CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 +CONFIG_DETECT_HUNG_TASK=y +# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set +CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0 CONFIG_SCHED_DEBUG=y # CONFIG_SCHEDSTATS is not set # CONFIG_TIMER_STATS is not set @@ -1413,6 +1427,7 @@ CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_TRACING_SUPPORT=y # # Tracers @@ -1422,9 +1437,14 @@ CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y # CONFIG_PREEMPT_TRACER is not set # CONFIG_SCHED_TRACER is not set # CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set # CONFIG_BOOT_TRACER is not set # CONFIG_TRACE_BRANCH_PROFILING is not set # CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_KGDB is not set @@ -1542,6 +1562,7 @@ CONFIG_CRYPTO_DES=y # # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set +# CONFIG_BINARY_PRINTF is not set # # Library routines diff --git a/arch/sh/configs/sh7785lcr_defconfig b/arch/sh/configs/sh7785lcr_defconfig index e4fac2e..4385fe9 100644 --- a/arch/sh/configs/sh7785lcr_defconfig +++ b/arch/sh/configs/sh7785lcr_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.30-rc2 -# Wed Apr 22 19:17:56 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 13:11:48 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y @@ -307,8 +307,6 @@ CONFIG_BOOT_LINK_OFFSET=0x00800000 # CONFIG_PCI=y CONFIG_SH_PCIDMA_NONCOHERENT=y -CONFIG_PCI_AUTO=y -CONFIG_PCI_AUTO_UPDATE_RESOURCES=y # CONFIG_PCIEPORTBUS is not set # CONFIG_ARCH_SUPPORTS_MSI is not set CONFIG_PCI_LEGACY=y diff --git a/arch/sh/configs/shmin_defconfig b/arch/sh/configs/shmin_defconfig index d695e90..4e12025 100644 --- a/arch/sh/configs/shmin_defconfig +++ b/arch/sh/configs/shmin_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 19:19:03 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 13:12:41 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_FIND_NEXT_BIT=y @@ -63,6 +64,7 @@ CONFIG_EMBEDDED=y # CONFIG_UID16 is not set # CONFIG_SYSCTL_SYSCALL is not set # CONFIG_KALLSYMS is not set +# CONFIG_STRIP_ASM_SYMS is not set # CONFIG_HOTPLUG is not set CONFIG_PRINTK=y # CONFIG_BUG is not set @@ -81,12 +83,15 @@ CONFIG_COMPAT_BRK=y # CONFIG_SLUB is not set CONFIG_SLOB=y # CONFIG_PROFILING is not set +# CONFIG_MARKERS is not set CONFIG_HAVE_OPROFILE=y CONFIG_HAVE_IOREMAP_PROT=y CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_BASE_SMALL=1 # CONFIG_MODULES is not set @@ -137,6 +142,7 @@ CONFIG_CPU_SUBTYPE_SH7706=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set # CONFIG_CPU_SUBTYPE_SH7780 is not set @@ -146,8 +152,6 @@ CONFIG_CPU_SUBTYPE_SH7706=y # CONFIG_CPU_SUBTYPE_SH7343 is not set # CONFIG_CPU_SUBTYPE_SH7722 is not set # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -675,6 +679,11 @@ CONFIG_FILE_LOCKING=y # CONFIG_FUSE_FS is not set # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set @@ -718,6 +727,7 @@ CONFIG_CRAMFS=y # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y @@ -762,10 +772,22 @@ CONFIG_FRAME_WARN=1024 CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_TRACING_SUPPORT=y # # Tracers # +# CONFIG_FUNCTION_TRACER is not set +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_SCHED_TRACER is not set +# CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set +# CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y CONFIG_SH_STANDARD_BIOS=y @@ -864,6 +886,7 @@ CONFIG_CRYPTO=y # # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_HW=y +# CONFIG_BINARY_PRINTF is not set # # Library routines diff --git a/arch/sh/configs/shx3_defconfig b/arch/sh/configs/shx3_defconfig index e3651f5..c088144 100644 --- a/arch/sh/configs/shx3_defconfig +++ b/arch/sh/configs/shx3_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 19:20:54 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 13:13:12 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_BUG=y @@ -42,6 +43,7 @@ CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y +CONFIG_POSIX_MQUEUE_SYSCTL=y CONFIG_BSD_PROCESS_ACCT=y # CONFIG_BSD_PROCESS_ACCT_V3 is not set # CONFIG_TASKSTATS is not set @@ -96,6 +98,7 @@ CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y @@ -126,6 +129,8 @@ CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_USE_GENERIC_SMP_HELPERS=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_RT_MUTEXES=y CONFIG_BASE_SMALL=0 @@ -138,7 +143,6 @@ CONFIG_MODULE_UNLOAD=y CONFIG_STOP_MACHINE=y CONFIG_BLOCK=y # CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -186,6 +190,7 @@ CONFIG_CPU_SHX3=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set # CONFIG_CPU_SUBTYPE_SH7780 is not set @@ -195,8 +200,6 @@ CONFIG_CPU_SUBTYPE_SHX3=y # CONFIG_CPU_SUBTYPE_SH7343 is not set # CONFIG_CPU_SUBTYPE_SH7722 is not set # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -553,6 +556,7 @@ CONFIG_SCSI_WAIT_SCAN=m CONFIG_SCSI_LOWLEVEL=y # CONFIG_ISCSI_TCP is not set # CONFIG_LIBFC is not set +# CONFIG_LIBFCOE is not set # CONFIG_SCSI_DEBUG is not set # CONFIG_SCSI_DH is not set # CONFIG_SCSI_OSD_INITIATOR is not set @@ -646,6 +650,7 @@ CONFIG_DEVKMEM=y # # Non-8250 serial port support # +# CONFIG_SERIAL_MAX3100 is not set CONFIG_SERIAL_SH_SCI=y CONFIG_SERIAL_SH_SCI_NR_UARTS=2 CONFIG_SERIAL_SH_SCI_CONSOLE=y @@ -985,6 +990,7 @@ CONFIG_EXT2_FS=y # CONFIG_EXT2_FS_XATTR is not set # CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_XATTR=y # CONFIG_EXT3_FS_POSIX_ACL is not set # CONFIG_EXT3_FS_SECURITY is not set @@ -1008,6 +1014,11 @@ CONFIG_INOTIFY_USER=y # CONFIG_FUSE_FS is not set # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set @@ -1051,6 +1062,7 @@ CONFIG_MISC_FILESYSTEMS=y # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y # CONFIG_NFS_FS is not set # CONFIG_NFSD is not set @@ -1085,6 +1097,9 @@ CONFIG_DEBUG_SHIRQ=y CONFIG_DETECT_SOFTLOCKUP=y # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 +CONFIG_DETECT_HUNG_TASK=y +# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set +CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0 CONFIG_SCHED_DEBUG=y # CONFIG_SCHEDSTATS is not set # CONFIG_TIMER_STATS is not set @@ -1124,6 +1139,7 @@ CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y CONFIG_RING_BUFFER=y CONFIG_TRACING=y +CONFIG_TRACING_SUPPORT=y # # Tracers @@ -1133,11 +1149,16 @@ CONFIG_TRACING=y # CONFIG_PREEMPT_TRACER is not set # CONFIG_SCHED_TRACER is not set # CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set # CONFIG_BOOT_TRACER is not set # CONFIG_TRACE_BRANCH_PROFILING is not set # CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_FTRACE_STARTUP_TEST is not set # CONFIG_DYNAMIC_DEBUG is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_KGDB is not set @@ -1245,6 +1266,7 @@ CONFIG_CRYPTO=y # # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_HW=y +CONFIG_BINARY_PRINTF=y # # Library routines diff --git a/arch/sh/configs/snapgear_defconfig b/arch/sh/configs/snapgear_defconfig index 6960f60..54a7a3c 100644 --- a/arch/sh/configs/snapgear_defconfig +++ b/arch/sh/configs/snapgear_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 19:21:39 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 13:14:00 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_BUG=y @@ -64,7 +65,6 @@ CONFIG_INITRAMFS_SOURCE="" CONFIG_RD_GZIP=y # CONFIG_RD_BZIP2 is not set # CONFIG_RD_LZMA is not set -CONFIG_INITRAMFS_COMPRESSION_NONE=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_SYSCTL=y CONFIG_ANON_INODES=y @@ -73,6 +73,7 @@ CONFIG_UID16=y # CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +# CONFIG_STRIP_ASM_SYMS is not set # CONFIG_HOTPLUG is not set CONFIG_PRINTK=y CONFIG_BUG=y @@ -92,12 +93,15 @@ CONFIG_SLAB=y # CONFIG_SLUB is not set # CONFIG_SLOB is not set # CONFIG_PROFILING is not set +# CONFIG_MARKERS is not set CONFIG_HAVE_OPROFILE=y CONFIG_HAVE_IOREMAP_PROT=y CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -105,7 +109,6 @@ CONFIG_BASE_SMALL=0 # CONFIG_MODULES is not set CONFIG_BLOCK=y # CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -151,6 +154,7 @@ CONFIG_CPU_SUBTYPE_SH7751R=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set # CONFIG_CPU_SUBTYPE_SH7780 is not set @@ -160,8 +164,6 @@ CONFIG_CPU_SUBTYPE_SH7751R=y # CONFIG_CPU_SUBTYPE_SH7343 is not set # CONFIG_CPU_SUBTYPE_SH7722 is not set # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -295,8 +297,6 @@ CONFIG_BOOT_LINK_OFFSET=0x00800000 # CONFIG_PCI=y CONFIG_SH_PCIDMA_NONCOHERENT=y -CONFIG_PCI_AUTO=y -CONFIG_PCI_AUTO_UPDATE_RESOURCES=y # CONFIG_PCIEPORTBUS is not set # CONFIG_ARCH_SUPPORTS_MSI is not set CONFIG_PCI_LEGACY=y @@ -763,6 +763,11 @@ CONFIG_FILE_LOCKING=y # CONFIG_FUSE_FS is not set # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set @@ -805,8 +810,13 @@ CONFIG_CRAMFS=y # CONFIG_HPFS_FS is not set # CONFIG_QNX4FS_FS is not set CONFIG_ROMFS_FS=y +CONFIG_ROMFS_BACKED_BY_BLOCK=y +# CONFIG_ROMFS_BACKED_BY_MTD is not set +# CONFIG_ROMFS_BACKED_BY_BOTH is not set +CONFIG_ROMFS_ON_BLOCK=y # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y # CONFIG_NFS_FS is not set # CONFIG_NFSD is not set @@ -844,10 +854,23 @@ CONFIG_FRAME_WARN=1024 CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_TRACING_SUPPORT=y # # Tracers # +# CONFIG_FUNCTION_TRACER is not set +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_SCHED_TRACER is not set +# CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set +# CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_SH_STANDARD_BIOS is not set @@ -862,6 +885,7 @@ CONFIG_HAVE_ARCH_KGDB=y # CONFIG_SECURITYFS is not set # CONFIG_SECURITY_FILE_CAPABILITIES is not set # CONFIG_CRYPTO is not set +# CONFIG_BINARY_PRINTF is not set # # Library routines diff --git a/arch/sh/configs/systemh_defconfig b/arch/sh/configs/systemh_defconfig index 7ea639b..dbe7e54 100644 --- a/arch/sh/configs/systemh_defconfig +++ b/arch/sh/configs/systemh_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 19:23:31 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 13:14:33 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_BUG=y @@ -61,7 +62,6 @@ CONFIG_INITRAMFS_SOURCE="" CONFIG_RD_GZIP=y # CONFIG_RD_BZIP2 is not set # CONFIG_RD_LZMA is not set -CONFIG_INITRAMFS_COMPRESSION_NONE=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_SYSCTL=y CONFIG_ANON_INODES=y @@ -70,6 +70,7 @@ CONFIG_UID16=y # CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +# CONFIG_STRIP_ASM_SYMS is not set # CONFIG_HOTPLUG is not set CONFIG_PRINTK=y CONFIG_BUG=y @@ -88,6 +89,7 @@ CONFIG_SLAB=y # CONFIG_SLUB is not set # CONFIG_SLOB is not set # CONFIG_PROFILING is not set +# CONFIG_MARKERS is not set CONFIG_HAVE_OPROFILE=y # CONFIG_KPROBES is not set CONFIG_HAVE_IOREMAP_PROT=y @@ -95,6 +97,8 @@ CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -107,7 +111,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_BLOCK=y # CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -153,6 +156,7 @@ CONFIG_CPU_SUBTYPE_SH7751R=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set # CONFIG_CPU_SUBTYPE_SH7780 is not set @@ -162,8 +166,6 @@ CONFIG_CPU_SUBTYPE_SH7751R=y # CONFIG_CPU_SUBTYPE_SH7343 is not set # CONFIG_CPU_SUBTYPE_SH7722 is not set # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -506,6 +508,11 @@ CONFIG_INOTIFY_USER=y # CONFIG_FUSE_FS is not set # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set @@ -547,8 +554,13 @@ CONFIG_CRAMFS=y # CONFIG_HPFS_FS is not set # CONFIG_QNX4FS_FS is not set CONFIG_ROMFS_FS=y +CONFIG_ROMFS_BACKED_BY_BLOCK=y +# CONFIG_ROMFS_BACKED_BY_MTD is not set +# CONFIG_ROMFS_BACKED_BY_BOTH is not set +CONFIG_ROMFS_ON_BLOCK=y # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set # # Partition Types @@ -577,10 +589,24 @@ CONFIG_FRAME_WARN=1024 CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_TRACING_SUPPORT=y # # Tracers # +# CONFIG_FUNCTION_TRACER is not set +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_PREEMPT_TRACER is not set +# CONFIG_SCHED_TRACER is not set +# CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set +# CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_SH_STANDARD_BIOS is not set @@ -595,6 +621,7 @@ CONFIG_HAVE_ARCH_KGDB=y # CONFIG_SECURITYFS is not set # CONFIG_SECURITY_FILE_CAPABILITIES is not set # CONFIG_CRYPTO is not set +# CONFIG_BINARY_PRINTF is not set # # Library routines diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig index bbeb4c6..8ca94ef 100644 --- a/arch/sh/configs/titan_defconfig +++ b/arch/sh/configs/titan_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 19:24:55 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 13:14:55 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_BUG=y @@ -40,6 +41,7 @@ CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y +CONFIG_POSIX_MQUEUE_SYSCTL=y # CONFIG_BSD_PROCESS_ACCT is not set # CONFIG_TASKSTATS is not set # CONFIG_AUDIT is not set @@ -66,7 +68,6 @@ CONFIG_INITRAMFS_SOURCE="" CONFIG_RD_GZIP=y # CONFIG_RD_BZIP2 is not set # CONFIG_RD_LZMA is not set -CONFIG_INITRAMFS_COMPRESSION_NONE=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_SYSCTL=y CONFIG_ANON_INODES=y @@ -76,6 +77,7 @@ CONFIG_UID16=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set # CONFIG_KALLSYMS_EXTRA_PASS is not set +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y @@ -95,6 +97,7 @@ CONFIG_SLAB=y # CONFIG_SLUB is not set # CONFIG_SLOB is not set # CONFIG_PROFILING is not set +# CONFIG_MARKERS is not set CONFIG_HAVE_OPROFILE=y # CONFIG_KPROBES is not set CONFIG_HAVE_IOREMAP_PROT=y @@ -102,6 +105,8 @@ CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -114,7 +119,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_BLOCK=y # CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -160,6 +164,7 @@ CONFIG_CPU_SUBTYPE_SH7751R=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set # CONFIG_CPU_SUBTYPE_SH7780 is not set @@ -169,8 +174,6 @@ CONFIG_CPU_SUBTYPE_SH7751R=y # CONFIG_CPU_SUBTYPE_SH7343 is not set # CONFIG_CPU_SUBTYPE_SH7722 is not set # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -305,8 +308,6 @@ CONFIG_CMDLINE="console=ttySC1,38400N81 root=/dev/nfs ip=:::::eth1:autoconf rw" # CONFIG_PCI=y CONFIG_SH_PCIDMA_NONCOHERENT=y -CONFIG_PCI_AUTO=y -CONFIG_PCI_AUTO_UPDATE_RESOURCES=y # CONFIG_PCIEPORTBUS is not set # CONFIG_ARCH_SUPPORTS_MSI is not set CONFIG_PCI_LEGACY=y @@ -789,6 +790,7 @@ CONFIG_SCSI_LOWLEVEL=y # CONFIG_SCSI_MPT2SAS is not set # CONFIG_SCSI_HPTIOP is not set # CONFIG_LIBFC is not set +# CONFIG_LIBFCOE is not set # CONFIG_FCOE is not set # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_FUTURE_DOMAIN is not set @@ -906,6 +908,7 @@ CONFIG_NETDEV_1000=y # CONFIG_E1000E is not set # CONFIG_IP1000 is not set # CONFIG_IGB is not set +# CONFIG_IGBVF is not set # CONFIG_NS83820 is not set # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set @@ -929,6 +932,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y # CONFIG_IXGBE is not set # CONFIG_IXGB is not set # CONFIG_S2IO is not set +# CONFIG_VXGE is not set # CONFIG_MYRI10GE is not set # CONFIG_NETXEN_NIC is not set # CONFIG_NIU is not set @@ -1182,7 +1186,6 @@ CONFIG_HID=y # # Special HID drivers # -CONFIG_HID_COMPAT=y CONFIG_USB_SUPPORT=y CONFIG_USB_ARCH_HAS_HCD=y CONFIG_USB_ARCH_HAS_OHCI=y @@ -1393,6 +1396,7 @@ CONFIG_EXT2_FS=y # CONFIG_EXT2_FS_XATTR is not set # CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set # CONFIG_EXT3_FS_XATTR is not set # CONFIG_EXT4_FS is not set CONFIG_JBD=y @@ -1419,6 +1423,11 @@ CONFIG_INOTIFY_USER=y CONFIG_FUSE_FS=m # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # CONFIG_ISO9660_FS=m @@ -1467,8 +1476,13 @@ CONFIG_MISC_FILESYSTEMS=y # CONFIG_HPFS_FS is not set # CONFIG_QNX4FS_FS is not set CONFIG_ROMFS_FS=y +CONFIG_ROMFS_BACKED_BY_BLOCK=y +# CONFIG_ROMFS_BACKED_BY_MTD is not set +# CONFIG_ROMFS_BACKED_BY_BOTH is not set +CONFIG_ROMFS_ON_BLOCK=y # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y @@ -1576,6 +1590,7 @@ CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y # CONFIG_DEBUG_SHIRQ is not set # CONFIG_DETECT_SOFTLOCKUP is not set +# CONFIG_DETECT_HUNG_TASK is not set CONFIG_SCHED_DEBUG=y # CONFIG_SCHEDSTATS is not set # CONFIG_TIMER_STATS is not set @@ -1610,6 +1625,7 @@ CONFIG_SCHED_DEBUG=y CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_TRACING_SUPPORT=y # # Tracers @@ -1618,9 +1634,14 @@ CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y # CONFIG_IRQSOFF_TRACER is not set # CONFIG_SCHED_TRACER is not set # CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set # CONFIG_BOOT_TRACER is not set # CONFIG_TRACE_BRANCH_PROFILING is not set # CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_KGDB is not set @@ -1741,6 +1762,7 @@ CONFIG_CRYPTO_DEFLATE=y # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_HW=y # CONFIG_CRYPTO_DEV_HIFN_795X is not set +# CONFIG_BINARY_PRINTF is not set # # Library routines diff --git a/arch/sh/configs/ul2_defconfig b/arch/sh/configs/ul2_defconfig index 34f5192..bfb4d98 100644 --- a/arch/sh/configs/ul2_defconfig +++ b/arch/sh/configs/ul2_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 19:30:27 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 13:17:05 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_BUG=y @@ -69,7 +70,6 @@ CONFIG_INITRAMFS_SOURCE="" CONFIG_RD_GZIP=y # CONFIG_RD_BZIP2 is not set # CONFIG_RD_LZMA is not set -CONFIG_INITRAMFS_COMPRESSION_NONE=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_SYSCTL=y CONFIG_ANON_INODES=y @@ -78,6 +78,7 @@ CONFIG_UID16=y CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y @@ -97,6 +98,7 @@ CONFIG_COMPAT_BRK=y CONFIG_SLUB=y # CONFIG_SLOB is not set CONFIG_PROFILING=y +# CONFIG_MARKERS is not set # CONFIG_OPROFILE is not set CONFIG_HAVE_OPROFILE=y # CONFIG_KPROBES is not set @@ -105,6 +107,8 @@ CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -117,7 +121,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_BLOCK=y # CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -167,6 +170,7 @@ CONFIG_ARCH_SHMOBILE=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set # CONFIG_CPU_SUBTYPE_SH7780 is not set @@ -176,8 +180,6 @@ CONFIG_ARCH_SHMOBILE=y # CONFIG_CPU_SUBTYPE_SH7343 is not set # CONFIG_CPU_SUBTYPE_SH7722 is not set CONFIG_CPU_SUBTYPE_SH7366=y -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -584,6 +586,7 @@ CONFIG_SCSI_WAIT_SCAN=m CONFIG_SCSI_LOWLEVEL=y # CONFIG_ISCSI_TCP is not set # CONFIG_LIBFC is not set +# CONFIG_LIBFCOE is not set # CONFIG_SCSI_DEBUG is not set # CONFIG_SCSI_DH is not set # CONFIG_SCSI_OSD_INITIATOR is not set @@ -936,6 +939,7 @@ CONFIG_EXT2_FS=y # CONFIG_EXT2_FS_XATTR is not set # CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_XATTR=y # CONFIG_EXT3_FS_POSIX_ACL is not set # CONFIG_EXT3_FS_SECURITY is not set @@ -958,6 +962,11 @@ CONFIG_INOTIFY_USER=y # CONFIG_FUSE_FS is not set # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set @@ -1005,6 +1014,7 @@ CONFIG_CRAMFS=y # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y # CONFIG_NFS_V3 is not set @@ -1095,10 +1105,24 @@ CONFIG_FRAME_WARN=1024 CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_TRACING_SUPPORT=y # # Tracers # +# CONFIG_FUNCTION_TRACER is not set +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_PREEMPT_TRACER is not set +# CONFIG_SCHED_TRACER is not set +# CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set +# CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_SH_STANDARD_BIOS is not set @@ -1208,6 +1232,7 @@ CONFIG_CRYPTO_ARC4=y # # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_HW=y +# CONFIG_BINARY_PRINTF is not set # # Library routines diff --git a/arch/sh/configs/urquell_defconfig b/arch/sh/configs/urquell_defconfig index d174b1a..512664f 100644 --- a/arch/sh/configs/urquell_defconfig +++ b/arch/sh/configs/urquell_defconfig @@ -1,10 +1,11 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.29 -# Thu Apr 2 19:33:39 2009 +# Linux kernel version: 2.6.30-rc3 +# Mon Apr 27 14:02:55 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y +# CONFIG_SUPERH64 is not set CONFIG_ARCH_DEFCONFIG="arch/sh/configs/shx3_defconfig" CONFIG_RWSEM_GENERIC_SPINLOCK=y CONFIG_GENERIC_BUG=y @@ -76,6 +77,7 @@ CONFIG_UID16=y CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +# CONFIG_STRIP_ASM_SYMS is not set CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y @@ -94,6 +96,7 @@ CONFIG_SLAB=y # CONFIG_SLUB is not set # CONFIG_SLOB is not set CONFIG_PROFILING=y +# CONFIG_MARKERS is not set # CONFIG_OPROFILE is not set CONFIG_HAVE_OPROFILE=y # CONFIG_KPROBES is not set @@ -102,6 +105,8 @@ CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_CLK=y +CONFIG_HAVE_DMA_API_DEBUG=y +# CONFIG_SLOW_WORK is not set CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -114,7 +119,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_MODULE_SRCVERSION_ALL is not set CONFIG_BLOCK=y # CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEV_INTEGRITY is not set @@ -162,6 +166,7 @@ CONFIG_CPU_SHX3=y # CONFIG_CPU_SUBTYPE_SH7760 is not set # CONFIG_CPU_SUBTYPE_SH4_202 is not set # CONFIG_CPU_SUBTYPE_SH7723 is not set +# CONFIG_CPU_SUBTYPE_SH7724 is not set # CONFIG_CPU_SUBTYPE_SH7763 is not set # CONFIG_CPU_SUBTYPE_SH7770 is not set # CONFIG_CPU_SUBTYPE_SH7780 is not set @@ -171,8 +176,6 @@ CONFIG_CPU_SUBTYPE_SH7786=y # CONFIG_CPU_SUBTYPE_SH7343 is not set # CONFIG_CPU_SUBTYPE_SH7722 is not set # CONFIG_CPU_SUBTYPE_SH7366 is not set -# CONFIG_CPU_SUBTYPE_SH5_101 is not set -# CONFIG_CPU_SUBTYPE_SH5_103 is not set # # Memory management options @@ -900,15 +903,17 @@ CONFIG_USB_HID=y # # Special HID drivers # -CONFIG_HID_COMPAT=y CONFIG_HID_A4TECH=y CONFIG_HID_APPLE=y CONFIG_HID_BELKIN=y CONFIG_HID_CHERRY=y CONFIG_HID_CHICONY=y CONFIG_HID_CYPRESS=y +# CONFIG_DRAGONRISE_FF is not set CONFIG_HID_EZKEY=y +# CONFIG_HID_KYE is not set CONFIG_HID_GYRATION=y +# CONFIG_HID_KENSINGTON is not set CONFIG_HID_LOGITECH=y # CONFIG_LOGITECH_FF is not set # CONFIG_LOGIRUMBLEPAD2_FF is not set @@ -1046,6 +1051,7 @@ CONFIG_EXT2_FS=y # CONFIG_EXT2_FS_XATTR is not set # CONFIG_EXT2_FS_XIP is not set CONFIG_EXT3_FS=y +# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_XATTR=y # CONFIG_EXT3_FS_POSIX_ACL is not set # CONFIG_EXT3_FS_SECURITY is not set @@ -1068,6 +1074,11 @@ CONFIG_INOTIFY_USER=y # CONFIG_FUSE_FS is not set # +# Caches +# +# CONFIG_FSCACHE is not set + +# # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set @@ -1117,6 +1128,7 @@ CONFIG_MINIX_FS=y # CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set +# CONFIG_NILFS2_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y CONFIG_NFS_V3=y @@ -1209,10 +1221,24 @@ CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_TRACING_SUPPORT=y # # Tracers # +# CONFIG_FUNCTION_TRACER is not set +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_PREEMPT_TRACER is not set +# CONFIG_SCHED_TRACER is not set +# CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_EVENT_TRACER is not set +# CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_DMA_API_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_SH_STANDARD_BIOS is not set @@ -1322,6 +1348,7 @@ CONFIG_CRYPTO_DES=y # # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set +# CONFIG_BINARY_PRINTF is not set # # Library routines -- cgit v0.10.2 From 5be7c0a4d3dfe25091f2e4e524103e81d9e7e180 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 27 Apr 2009 14:40:47 +0900 Subject: sh: select GENERIC_TIME for new CMT driver. Signed-off-by: Paul Mundt diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 4c68fde..1647489 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -476,10 +476,11 @@ config SH_TIMER_CMT bool "CMT clockevents driver" depends on SYS_SUPPORTS_CMT && !SH_CMT select GENERIC_CLOCKEVENTS + select GENERIC_TIME config SH_MTU2 bool "MTU2 timer support" - depends on CPU_SH2A + depends on CPU_SH2A && !GENERIC_TIME default y help This enables the use of the MTU2 as the system timer. -- cgit v0.10.2 From 148be2c15d4a866fbc7a8f55342e4fd4de73be61 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 27 Apr 2009 08:02:14 +0200 Subject: perf_counter tools: move helper library to util/* Clean up the top level directory a bit by moving all the helper libraries to util/*.[ch]. Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile index 690045e..543ccf2 100644 --- a/Documentation/perf_counter/Makefile +++ b/Documentation/perf_counter/Makefile @@ -147,7 +147,7 @@ all:: # broken, or spawning external process is slower than built-in grep perf has). PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE - @$(SHELL_PATH) ./PERF-VERSION-GEN + @$(SHELL_PATH) util/PERF-VERSION-GEN -include PERF-VERSION-FILE uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not') @@ -287,26 +287,28 @@ export PERL_PATH LIB_FILE=libperf.a LIB_H += ../../include/linux/perf_counter.h -LIB_H += levenshtein.h -LIB_H += parse-options.h -LIB_H += quote.h -LIB_H += strbuf.h -LIB_H += run-command.h - -LIB_OBJS += abspath.o -LIB_OBJS += alias.o -LIB_OBJS += config.o -LIB_OBJS += ctype.o -LIB_OBJS += exec_cmd.o -LIB_OBJS += help.o -LIB_OBJS += levenshtein.o -LIB_OBJS += parse-options.o -LIB_OBJS += path.o -LIB_OBJS += run-command.o -LIB_OBJS += quote.o -LIB_OBJS += strbuf.o -LIB_OBJS += usage.o -LIB_OBJS += wrapper.o +LIB_H += util/levenshtein.h +LIB_H += util/parse-options.h +LIB_H += util/quote.h +LIB_H += util/util.h +LIB_H += util/help.h +LIB_H += util/strbuf.h +LIB_H += util/run-command.h + +LIB_OBJS += util/abspath.o +LIB_OBJS += util/alias.o +LIB_OBJS += util/config.o +LIB_OBJS += util/ctype.o +LIB_OBJS += util/exec_cmd.o +LIB_OBJS += util/help.o +LIB_OBJS += util/levenshtein.o +LIB_OBJS += util/parse-options.o +LIB_OBJS += util/path.o +LIB_OBJS += util/run-command.o +LIB_OBJS += util/quote.o +LIB_OBJS += util/strbuf.o +LIB_OBJS += util/usage.o +LIB_OBJS += util/wrapper.o BUILTIN_OBJS += builtin-help.o BUILTIN_OBJS += builtin-record.o @@ -620,10 +622,10 @@ $(BUILT_INS): perf$X ln -s perf$X $@ 2>/dev/null || \ cp perf$X $@ -common-cmds.h: ./generate-cmdlist.sh command-list.txt +common-cmds.h: util/generate-cmdlist.sh command-list.txt common-cmds.h: $(wildcard Documentation/perf-*.txt) - $(QUIET_GEN)./generate-cmdlist.sh > $@+ && mv $@+ $@ + $(QUIET_GEN)util/generate-cmdlist.sh > $@+ && mv $@+ $@ $(patsubst %.sh,%,$(SCRIPT_SH)) : % : %.sh $(QUIET_GEN)$(RM) $@ $@+ && \ @@ -656,7 +658,7 @@ perf.o perf.spec \ %.o: %.S $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) $< -exec_cmd.o: exec_cmd.c PERF-CFLAGS +util/exec_cmd.o: util/exec_cmd.c PERF-CFLAGS $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) \ '-DPERF_EXEC_PATH="$(perfexecdir_SQ)"' \ '-DBINDIR="$(bindir_relative_SQ)"' \ @@ -666,7 +668,7 @@ exec_cmd.o: exec_cmd.c PERF-CFLAGS builtin-init-db.o: builtin-init-db.c PERF-CFLAGS $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DDEFAULT_PERF_TEMPLATE_DIR='"$(template_dir_SQ)"' $< -config.o: config.c PERF-CFLAGS +util/config.o: util/config.c PERF-CFLAGS $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< perf-%$X: %.o $(PERFLIBS) @@ -729,7 +731,7 @@ export NO_SVN_TESTS check: common-cmds.h if sparse; \ then \ - for i in *.c; \ + for i in *.c */*.c; \ do \ sparse $(ALL_CFLAGS) $(SPARSE_FLAGS) $$i || exit; \ done; \ @@ -795,7 +797,7 @@ distclean: clean $(RM) configure clean: - $(RM) *.o $(LIB_FILE) + $(RM) *.o */*.o $(LIB_FILE) $(RM) $(ALL_PROGRAMS) $(BUILT_INS) perf$X $(RM) $(TEST_PROGRAMS) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo common-cmds.h TAGS tags cscope* @@ -836,7 +838,7 @@ coverage-build: coverage-clean -j1 test coverage-report: - gcov -b *.c - grep '^function.*called 0 ' *.c.gcov \ + gcov -b *.c */*.c + grep '^function.*called 0 ' *.c.gcov */*.c.gcov \ | sed -e 's/\([^:]*\)\.gcov: *function \([^ ]*\) called.*/\1: \2/' \ | tee coverage-untested-functions diff --git a/Documentation/perf_counter/PERF-VERSION-GEN b/Documentation/perf_counter/PERF-VERSION-GEN deleted file mode 100755 index c561d15..0000000 --- a/Documentation/perf_counter/PERF-VERSION-GEN +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/sh - -GVF=PERF-VERSION-FILE -DEF_VER=v0.0.1.PERF - -LF=' -' - -# First see if there is a version file (included in release tarballs), -# then try git-describe, then default. -if test -f version -then - VN=$(cat version) || VN="$DEF_VER" -elif test -d .git -o -f .git && - VN=$(git describe --abbrev=4 HEAD 2>/dev/null) && - case "$VN" in - *$LF*) (exit 1) ;; - v[0-9]*) - git update-index -q --refresh - test -z "$(git diff-index --name-only HEAD --)" || - VN="$VN-dirty" ;; - esac -then - VN=$(echo "$VN" | sed -e 's/-/./g'); -else - VN="$DEF_VER" -fi - -VN=$(expr "$VN" : v*'\(.*\)') - -if test -r $GVF -then - VC=$(sed -e 's/^PERF_VERSION = //' <$GVF) -else - VC=unset -fi -test "$VN" = "$VC" || { - echo >&2 "PERF_VERSION = $VN" - echo "PERF_VERSION = $VN" >$GVF -} - - diff --git a/Documentation/perf_counter/abspath.c b/Documentation/perf_counter/abspath.c deleted file mode 100644 index 649f34f..0000000 --- a/Documentation/perf_counter/abspath.c +++ /dev/null @@ -1,117 +0,0 @@ -#include "cache.h" - -/* - * Do not use this for inspecting *tracked* content. When path is a - * symlink to a directory, we do not want to say it is a directory when - * dealing with tracked content in the working tree. - */ -int is_directory(const char *path) -{ - struct stat st; - return (!stat(path, &st) && S_ISDIR(st.st_mode)); -} - -/* We allow "recursive" symbolic links. Only within reason, though. */ -#define MAXDEPTH 5 - -const char *make_absolute_path(const char *path) -{ - static char bufs[2][PATH_MAX + 1], *buf = bufs[0], *next_buf = bufs[1]; - char cwd[1024] = ""; - int buf_index = 1, len; - - int depth = MAXDEPTH; - char *last_elem = NULL; - struct stat st; - - if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX) - die ("Too long path: %.*s", 60, path); - - while (depth--) { - if (!is_directory(buf)) { - char *last_slash = strrchr(buf, '/'); - if (last_slash) { - *last_slash = '\0'; - last_elem = xstrdup(last_slash + 1); - } else { - last_elem = xstrdup(buf); - *buf = '\0'; - } - } - - if (*buf) { - if (!*cwd && !getcwd(cwd, sizeof(cwd))) - die ("Could not get current working directory"); - - if (chdir(buf)) - die ("Could not switch to '%s'", buf); - } - if (!getcwd(buf, PATH_MAX)) - die ("Could not get current working directory"); - - if (last_elem) { - int len = strlen(buf); - if (len + strlen(last_elem) + 2 > PATH_MAX) - die ("Too long path name: '%s/%s'", - buf, last_elem); - buf[len] = '/'; - strcpy(buf + len + 1, last_elem); - free(last_elem); - last_elem = NULL; - } - - if (!lstat(buf, &st) && S_ISLNK(st.st_mode)) { - len = readlink(buf, next_buf, PATH_MAX); - if (len < 0) - die ("Invalid symlink: %s", buf); - if (PATH_MAX <= len) - die("symbolic link too long: %s", buf); - next_buf[len] = '\0'; - buf = next_buf; - buf_index = 1 - buf_index; - next_buf = bufs[buf_index]; - } else - break; - } - - if (*cwd && chdir(cwd)) - die ("Could not change back to '%s'", cwd); - - return buf; -} - -static const char *get_pwd_cwd(void) -{ - static char cwd[PATH_MAX + 1]; - char *pwd; - struct stat cwd_stat, pwd_stat; - if (getcwd(cwd, PATH_MAX) == NULL) - return NULL; - pwd = getenv("PWD"); - if (pwd && strcmp(pwd, cwd)) { - stat(cwd, &cwd_stat); - if (!stat(pwd, &pwd_stat) && - pwd_stat.st_dev == cwd_stat.st_dev && - pwd_stat.st_ino == cwd_stat.st_ino) { - strlcpy(cwd, pwd, PATH_MAX); - } - } - return cwd; -} - -const char *make_nonrelative_path(const char *path) -{ - static char buf[PATH_MAX + 1]; - - if (is_absolute_path(path)) { - if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX) - die("Too long path: %.*s", 60, path); - } else { - const char *cwd = get_pwd_cwd(); - if (!cwd) - die("Cannot determine the current working directory"); - if (snprintf(buf, PATH_MAX, "%s/%s", cwd, path) >= PATH_MAX) - die("Too long path: %.*s", 60, path); - } - return buf; -} diff --git a/Documentation/perf_counter/alias.c b/Documentation/perf_counter/alias.c deleted file mode 100644 index 9b3dd2b..0000000 --- a/Documentation/perf_counter/alias.c +++ /dev/null @@ -1,77 +0,0 @@ -#include "cache.h" - -static const char *alias_key; -static char *alias_val; - -static int alias_lookup_cb(const char *k, const char *v, void *cb) -{ - if (!prefixcmp(k, "alias.") && !strcmp(k+6, alias_key)) { - if (!v) - return config_error_nonbool(k); - alias_val = strdup(v); - return 0; - } - return 0; -} - -char *alias_lookup(const char *alias) -{ - alias_key = alias; - alias_val = NULL; - perf_config(alias_lookup_cb, NULL); - return alias_val; -} - -int split_cmdline(char *cmdline, const char ***argv) -{ - int src, dst, count = 0, size = 16; - char quoted = 0; - - *argv = malloc(sizeof(char*) * size); - - /* split alias_string */ - (*argv)[count++] = cmdline; - for (src = dst = 0; cmdline[src];) { - char c = cmdline[src]; - if (!quoted && isspace(c)) { - cmdline[dst++] = 0; - while (cmdline[++src] - && isspace(cmdline[src])) - ; /* skip */ - if (count >= size) { - size += 16; - *argv = realloc(*argv, sizeof(char*) * size); - } - (*argv)[count++] = cmdline + dst; - } else if (!quoted && (c == '\'' || c == '"')) { - quoted = c; - src++; - } else if (c == quoted) { - quoted = 0; - src++; - } else { - if (c == '\\' && quoted != '\'') { - src++; - c = cmdline[src]; - if (!c) { - free(*argv); - *argv = NULL; - return error("cmdline ends with \\"); - } - } - cmdline[dst++] = c; - src++; - } - } - - cmdline[dst] = 0; - - if (quoted) { - free(*argv); - *argv = NULL; - return error("unclosed quote"); - } - - return count; -} - diff --git a/Documentation/perf_counter/builtin-help.c b/Documentation/perf_counter/builtin-help.c index a136d61..6616de0 100644 --- a/Documentation/perf_counter/builtin-help.c +++ b/Documentation/perf_counter/builtin-help.c @@ -3,13 +3,13 @@ * * Builtin help command */ -#include "cache.h" +#include "util/cache.h" #include "builtin.h" -#include "exec_cmd.h" +#include "util/exec_cmd.h" #include "common-cmds.h" -#include "parse-options.h" -#include "run-command.h" -#include "help.h" +#include "util/parse-options.h" +#include "util/run-command.h" +#include "util/help.h" static struct man_viewer_list { struct man_viewer_list *next; diff --git a/Documentation/perf_counter/builtin-stat.c b/Documentation/perf_counter/builtin-stat.c index 169a2d1..d7ace63 100644 --- a/Documentation/perf_counter/builtin-stat.c +++ b/Documentation/perf_counter/builtin-stat.c @@ -61,7 +61,7 @@ * Released under the GPL v2. (and only v2, not any later version) */ -#include "util.h" +#include "util/util.h" #include #include diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c index 98e8690..dea016f 100644 --- a/Documentation/perf_counter/builtin-top.c +++ b/Documentation/perf_counter/builtin-top.c @@ -42,7 +42,7 @@ * Released under the GPL v2. (and only v2, not any later version) */ -#include "util.h" +#include "util/util.h" #include #include diff --git a/Documentation/perf_counter/builtin.h b/Documentation/perf_counter/builtin.h index 800f86c..d32318a 100644 --- a/Documentation/perf_counter/builtin.h +++ b/Documentation/perf_counter/builtin.h @@ -1,8 +1,8 @@ #ifndef BUILTIN_H #define BUILTIN_H -#include "util.h" -#include "strbuf.h" +#include "util/util.h" +#include "util/strbuf.h" extern const char perf_version_string[]; extern const char perf_usage_string[]; diff --git a/Documentation/perf_counter/cache.h b/Documentation/perf_counter/cache.h deleted file mode 100644 index 7108051..0000000 --- a/Documentation/perf_counter/cache.h +++ /dev/null @@ -1,117 +0,0 @@ -#ifndef CACHE_H -#define CACHE_H - -#include "util.h" -#include "strbuf.h" - -#define PERF_DIR_ENVIRONMENT "PERF_DIR" -#define PERF_WORK_TREE_ENVIRONMENT "PERF_WORK_TREE" -#define DEFAULT_PERF_DIR_ENVIRONMENT ".perf" -#define DB_ENVIRONMENT "PERF_OBJECT_DIRECTORY" -#define INDEX_ENVIRONMENT "PERF_INDEX_FILE" -#define GRAFT_ENVIRONMENT "PERF_GRAFT_FILE" -#define TEMPLATE_DIR_ENVIRONMENT "PERF_TEMPLATE_DIR" -#define CONFIG_ENVIRONMENT "PERF_CONFIG" -#define EXEC_PATH_ENVIRONMENT "PERF_EXEC_PATH" -#define CEILING_DIRECTORIES_ENVIRONMENT "PERF_CEILING_DIRECTORIES" -#define PERFATTRIBUTES_FILE ".perfattributes" -#define INFOATTRIBUTES_FILE "info/attributes" -#define ATTRIBUTE_MACRO_PREFIX "[attr]" - -typedef int (*config_fn_t)(const char *, const char *, void *); -extern int perf_default_config(const char *, const char *, void *); -extern int perf_config_from_file(config_fn_t fn, const char *, void *); -extern int perf_config(config_fn_t fn, void *); -extern int perf_parse_ulong(const char *, unsigned long *); -extern int perf_config_int(const char *, const char *); -extern unsigned long perf_config_ulong(const char *, const char *); -extern int perf_config_bool_or_int(const char *, const char *, int *); -extern int perf_config_bool(const char *, const char *); -extern int perf_config_string(const char **, const char *, const char *); -extern int perf_config_set(const char *, const char *); -extern int perf_config_set_multivar(const char *, const char *, const char *, int); -extern int perf_config_rename_section(const char *, const char *); -extern const char *perf_etc_perfconfig(void); -extern int check_repository_format_version(const char *var, const char *value, void *cb); -extern int perf_config_system(void); -extern int perf_config_global(void); -extern int config_error_nonbool(const char *); -extern const char *config_exclusive_filename; - -#define MAX_PERFNAME (1000) -extern char perf_default_email[MAX_PERFNAME]; -extern char perf_default_name[MAX_PERFNAME]; -extern int user_ident_explicitly_given; - -extern const char *perf_log_output_encoding; -extern const char *perf_mailmap_file; - -/* IO helper functions */ -extern void maybe_flush_or_die(FILE *, const char *); -extern int copy_fd(int ifd, int ofd); -extern int copy_file(const char *dst, const char *src, int mode); -extern ssize_t read_in_full(int fd, void *buf, size_t count); -extern ssize_t write_in_full(int fd, const void *buf, size_t count); -extern void write_or_die(int fd, const void *buf, size_t count); -extern int write_or_whine(int fd, const void *buf, size_t count, const char *msg); -extern int write_or_whine_pipe(int fd, const void *buf, size_t count, const char *msg); -extern void fsync_or_die(int fd, const char *); - -/* pager.c */ -extern void setup_pager(void); -extern const char *pager_program; -extern int pager_in_use(void); -extern int pager_use_color; - -extern const char *editor_program; -extern const char *excludes_file; - -char *alias_lookup(const char *alias); -int split_cmdline(char *cmdline, const char ***argv); - -#define alloc_nr(x) (((x)+16)*3/2) - -/* - * Realloc the buffer pointed at by variable 'x' so that it can hold - * at least 'nr' entries; the number of entries currently allocated - * is 'alloc', using the standard growing factor alloc_nr() macro. - * - * DO NOT USE any expression with side-effect for 'x' or 'alloc'. - */ -#define ALLOC_GROW(x, nr, alloc) \ - do { \ - if ((nr) > alloc) { \ - if (alloc_nr(alloc) < (nr)) \ - alloc = (nr); \ - else \ - alloc = alloc_nr(alloc); \ - x = xrealloc((x), alloc * sizeof(*(x))); \ - } \ - } while(0) - - -static inline int is_absolute_path(const char *path) -{ - return path[0] == '/'; -} - -const char *make_absolute_path(const char *path); -const char *make_nonrelative_path(const char *path); -const char *make_relative_path(const char *abs, const char *base); -int normalize_path_copy(char *dst, const char *src); -int longest_ancestor_length(const char *path, const char *prefix_list); -char *strip_path_suffix(const char *path, const char *suffix); - -extern char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2))); -extern char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2))); - -extern char *mksnpath(char *buf, size_t n, const char *fmt, ...) - __attribute__((format (printf, 3, 4))); -extern char *perf_snpath(char *buf, size_t n, const char *fmt, ...) - __attribute__((format (printf, 3, 4))); -extern char *perf_pathdup(const char *fmt, ...) - __attribute__((format (printf, 1, 2))); - -extern size_t strlcpy(char *dest, const char *src, size_t size); - -#endif /* CACHE_H */ diff --git a/Documentation/perf_counter/config.c b/Documentation/perf_counter/config.c deleted file mode 100644 index 3dd13fa..0000000 --- a/Documentation/perf_counter/config.c +++ /dev/null @@ -1,873 +0,0 @@ -/* - * GIT - The information manager from hell - * - * Copyright (C) Linus Torvalds, 2005 - * Copyright (C) Johannes Schindelin, 2005 - * - */ -#include "util.h" -#include "cache.h" -#include "exec_cmd.h" - -#define MAXNAME (256) - -static FILE *config_file; -static const char *config_file_name; -static int config_linenr; -static int config_file_eof; - -const char *config_exclusive_filename = NULL; - -static int get_next_char(void) -{ - int c; - FILE *f; - - c = '\n'; - if ((f = config_file) != NULL) { - c = fgetc(f); - if (c == '\r') { - /* DOS like systems */ - c = fgetc(f); - if (c != '\n') { - ungetc(c, f); - c = '\r'; - } - } - if (c == '\n') - config_linenr++; - if (c == EOF) { - config_file_eof = 1; - c = '\n'; - } - } - return c; -} - -static char *parse_value(void) -{ - static char value[1024]; - int quote = 0, comment = 0, len = 0, space = 0; - - for (;;) { - int c = get_next_char(); - if (len >= sizeof(value) - 1) - return NULL; - if (c == '\n') { - if (quote) - return NULL; - value[len] = 0; - return value; - } - if (comment) - continue; - if (isspace(c) && !quote) { - space = 1; - continue; - } - if (!quote) { - if (c == ';' || c == '#') { - comment = 1; - continue; - } - } - if (space) { - if (len) - value[len++] = ' '; - space = 0; - } - if (c == '\\') { - c = get_next_char(); - switch (c) { - case '\n': - continue; - case 't': - c = '\t'; - break; - case 'b': - c = '\b'; - break; - case 'n': - c = '\n'; - break; - /* Some characters escape as themselves */ - case '\\': case '"': - break; - /* Reject unknown escape sequences */ - default: - return NULL; - } - value[len++] = c; - continue; - } - if (c == '"') { - quote = 1-quote; - continue; - } - value[len++] = c; - } -} - -static inline int iskeychar(int c) -{ - return isalnum(c) || c == '-'; -} - -static int get_value(config_fn_t fn, void *data, char *name, unsigned int len) -{ - int c; - char *value; - - /* Get the full name */ - for (;;) { - c = get_next_char(); - if (config_file_eof) - break; - if (!iskeychar(c)) - break; - name[len++] = tolower(c); - if (len >= MAXNAME) - return -1; - } - name[len] = 0; - while (c == ' ' || c == '\t') - c = get_next_char(); - - value = NULL; - if (c != '\n') { - if (c != '=') - return -1; - value = parse_value(); - if (!value) - return -1; - } - return fn(name, value, data); -} - -static int get_extended_base_var(char *name, int baselen, int c) -{ - do { - if (c == '\n') - return -1; - c = get_next_char(); - } while (isspace(c)); - - /* We require the format to be '[base "extension"]' */ - if (c != '"') - return -1; - name[baselen++] = '.'; - - for (;;) { - int c = get_next_char(); - if (c == '\n') - return -1; - if (c == '"') - break; - if (c == '\\') { - c = get_next_char(); - if (c == '\n') - return -1; - } - name[baselen++] = c; - if (baselen > MAXNAME / 2) - return -1; - } - - /* Final ']' */ - if (get_next_char() != ']') - return -1; - return baselen; -} - -static int get_base_var(char *name) -{ - int baselen = 0; - - for (;;) { - int c = get_next_char(); - if (config_file_eof) - return -1; - if (c == ']') - return baselen; - if (isspace(c)) - return get_extended_base_var(name, baselen, c); - if (!iskeychar(c) && c != '.') - return -1; - if (baselen > MAXNAME / 2) - return -1; - name[baselen++] = tolower(c); - } -} - -static int perf_parse_file(config_fn_t fn, void *data) -{ - int comment = 0; - int baselen = 0; - static char var[MAXNAME]; - - /* U+FEFF Byte Order Mark in UTF8 */ - static const unsigned char *utf8_bom = (unsigned char *) "\xef\xbb\xbf"; - const unsigned char *bomptr = utf8_bom; - - for (;;) { - int c = get_next_char(); - if (bomptr && *bomptr) { - /* We are at the file beginning; skip UTF8-encoded BOM - * if present. Sane editors won't put this in on their - * own, but e.g. Windows Notepad will do it happily. */ - if ((unsigned char) c == *bomptr) { - bomptr++; - continue; - } else { - /* Do not tolerate partial BOM. */ - if (bomptr != utf8_bom) - break; - /* No BOM at file beginning. Cool. */ - bomptr = NULL; - } - } - if (c == '\n') { - if (config_file_eof) - return 0; - comment = 0; - continue; - } - if (comment || isspace(c)) - continue; - if (c == '#' || c == ';') { - comment = 1; - continue; - } - if (c == '[') { - baselen = get_base_var(var); - if (baselen <= 0) - break; - var[baselen++] = '.'; - var[baselen] = 0; - continue; - } - if (!isalpha(c)) - break; - var[baselen] = tolower(c); - if (get_value(fn, data, var, baselen+1) < 0) - break; - } - die("bad config file line %d in %s", config_linenr, config_file_name); -} - -static int parse_unit_factor(const char *end, unsigned long *val) -{ - if (!*end) - return 1; - else if (!strcasecmp(end, "k")) { - *val *= 1024; - return 1; - } - else if (!strcasecmp(end, "m")) { - *val *= 1024 * 1024; - return 1; - } - else if (!strcasecmp(end, "g")) { - *val *= 1024 * 1024 * 1024; - return 1; - } - return 0; -} - -static int perf_parse_long(const char *value, long *ret) -{ - if (value && *value) { - char *end; - long val = strtol(value, &end, 0); - unsigned long factor = 1; - if (!parse_unit_factor(end, &factor)) - return 0; - *ret = val * factor; - return 1; - } - return 0; -} - -int perf_parse_ulong(const char *value, unsigned long *ret) -{ - if (value && *value) { - char *end; - unsigned long val = strtoul(value, &end, 0); - if (!parse_unit_factor(end, &val)) - return 0; - *ret = val; - return 1; - } - return 0; -} - -static void die_bad_config(const char *name) -{ - if (config_file_name) - die("bad config value for '%s' in %s", name, config_file_name); - die("bad config value for '%s'", name); -} - -int perf_config_int(const char *name, const char *value) -{ - long ret = 0; - if (!perf_parse_long(value, &ret)) - die_bad_config(name); - return ret; -} - -unsigned long perf_config_ulong(const char *name, const char *value) -{ - unsigned long ret; - if (!perf_parse_ulong(value, &ret)) - die_bad_config(name); - return ret; -} - -int perf_config_bool_or_int(const char *name, const char *value, int *is_bool) -{ - *is_bool = 1; - if (!value) - return 1; - if (!*value) - return 0; - if (!strcasecmp(value, "true") || !strcasecmp(value, "yes") || !strcasecmp(value, "on")) - return 1; - if (!strcasecmp(value, "false") || !strcasecmp(value, "no") || !strcasecmp(value, "off")) - return 0; - *is_bool = 0; - return perf_config_int(name, value); -} - -int perf_config_bool(const char *name, const char *value) -{ - int discard; - return !!perf_config_bool_or_int(name, value, &discard); -} - -int perf_config_string(const char **dest, const char *var, const char *value) -{ - if (!value) - return config_error_nonbool(var); - *dest = strdup(value); - return 0; -} - -static int perf_default_core_config(const char *var, const char *value) -{ - /* Add other config variables here and to Documentation/config.txt. */ - return 0; -} - -int perf_default_config(const char *var, const char *value, void *dummy) -{ - if (!prefixcmp(var, "core.")) - return perf_default_core_config(var, value); - - /* Add other config variables here and to Documentation/config.txt. */ - return 0; -} - -int perf_config_from_file(config_fn_t fn, const char *filename, void *data) -{ - int ret; - FILE *f = fopen(filename, "r"); - - ret = -1; - if (f) { - config_file = f; - config_file_name = filename; - config_linenr = 1; - config_file_eof = 0; - ret = perf_parse_file(fn, data); - fclose(f); - config_file_name = NULL; - } - return ret; -} - -const char *perf_etc_perfconfig(void) -{ - static const char *system_wide; - if (!system_wide) - system_wide = system_path(ETC_PERFCONFIG); - return system_wide; -} - -static int perf_env_bool(const char *k, int def) -{ - const char *v = getenv(k); - return v ? perf_config_bool(k, v) : def; -} - -int perf_config_system(void) -{ - return !perf_env_bool("PERF_CONFIG_NOSYSTEM", 0); -} - -int perf_config_global(void) -{ - return !perf_env_bool("PERF_CONFIG_NOGLOBAL", 0); -} - -int perf_config(config_fn_t fn, void *data) -{ - int ret = 0, found = 0; - char *repo_config = NULL; - const char *home = NULL; - - /* Setting $PERF_CONFIG makes perf read _only_ the given config file. */ - if (config_exclusive_filename) - return perf_config_from_file(fn, config_exclusive_filename, data); - if (perf_config_system() && !access(perf_etc_perfconfig(), R_OK)) { - ret += perf_config_from_file(fn, perf_etc_perfconfig(), - data); - found += 1; - } - - home = getenv("HOME"); - if (perf_config_global() && home) { - char *user_config = strdup(mkpath("%s/.perfconfig", home)); - if (!access(user_config, R_OK)) { - ret += perf_config_from_file(fn, user_config, data); - found += 1; - } - free(user_config); - } - - repo_config = perf_pathdup("config"); - if (!access(repo_config, R_OK)) { - ret += perf_config_from_file(fn, repo_config, data); - found += 1; - } - free(repo_config); - if (found == 0) - return -1; - return ret; -} - -/* - * Find all the stuff for perf_config_set() below. - */ - -#define MAX_MATCHES 512 - -static struct { - int baselen; - char* key; - int do_not_match; - regex_t* value_regex; - int multi_replace; - size_t offset[MAX_MATCHES]; - enum { START, SECTION_SEEN, SECTION_END_SEEN, KEY_SEEN } state; - int seen; -} store; - -static int matches(const char* key, const char* value) -{ - return !strcmp(key, store.key) && - (store.value_regex == NULL || - (store.do_not_match ^ - !regexec(store.value_regex, value, 0, NULL, 0))); -} - -static int store_aux(const char* key, const char* value, void *cb) -{ - const char *ep; - size_t section_len; - - switch (store.state) { - case KEY_SEEN: - if (matches(key, value)) { - if (store.seen == 1 && store.multi_replace == 0) { - warning("%s has multiple values", key); - } else if (store.seen >= MAX_MATCHES) { - error("too many matches for %s", key); - return 1; - } - - store.offset[store.seen] = ftell(config_file); - store.seen++; - } - break; - case SECTION_SEEN: - /* - * What we are looking for is in store.key (both - * section and var), and its section part is baselen - * long. We found key (again, both section and var). - * We would want to know if this key is in the same - * section as what we are looking for. We already - * know we are in the same section as what should - * hold store.key. - */ - ep = strrchr(key, '.'); - section_len = ep - key; - - if ((section_len != store.baselen) || - memcmp(key, store.key, section_len+1)) { - store.state = SECTION_END_SEEN; - break; - } - - /* - * Do not increment matches: this is no match, but we - * just made sure we are in the desired section. - */ - store.offset[store.seen] = ftell(config_file); - /* fallthru */ - case SECTION_END_SEEN: - case START: - if (matches(key, value)) { - store.offset[store.seen] = ftell(config_file); - store.state = KEY_SEEN; - store.seen++; - } else { - if (strrchr(key, '.') - key == store.baselen && - !strncmp(key, store.key, store.baselen)) { - store.state = SECTION_SEEN; - store.offset[store.seen] = ftell(config_file); - } - } - } - return 0; -} - -static int store_write_section(int fd, const char* key) -{ - const char *dot; - int i, success; - struct strbuf sb = STRBUF_INIT; - - dot = memchr(key, '.', store.baselen); - if (dot) { - strbuf_addf(&sb, "[%.*s \"", (int)(dot - key), key); - for (i = dot - key + 1; i < store.baselen; i++) { - if (key[i] == '"' || key[i] == '\\') - strbuf_addch(&sb, '\\'); - strbuf_addch(&sb, key[i]); - } - strbuf_addstr(&sb, "\"]\n"); - } else { - strbuf_addf(&sb, "[%.*s]\n", store.baselen, key); - } - - success = write_in_full(fd, sb.buf, sb.len) == sb.len; - strbuf_release(&sb); - - return success; -} - -static int store_write_pair(int fd, const char* key, const char* value) -{ - int i, success; - int length = strlen(key + store.baselen + 1); - const char *quote = ""; - struct strbuf sb = STRBUF_INIT; - - /* - * Check to see if the value needs to be surrounded with a dq pair. - * Note that problematic characters are always backslash-quoted; this - * check is about not losing leading or trailing SP and strings that - * follow beginning-of-comment characters (i.e. ';' and '#') by the - * configuration parser. - */ - if (value[0] == ' ') - quote = "\""; - for (i = 0; value[i]; i++) - if (value[i] == ';' || value[i] == '#') - quote = "\""; - if (i && value[i - 1] == ' ') - quote = "\""; - - strbuf_addf(&sb, "\t%.*s = %s", - length, key + store.baselen + 1, quote); - - for (i = 0; value[i]; i++) - switch (value[i]) { - case '\n': - strbuf_addstr(&sb, "\\n"); - break; - case '\t': - strbuf_addstr(&sb, "\\t"); - break; - case '"': - case '\\': - strbuf_addch(&sb, '\\'); - default: - strbuf_addch(&sb, value[i]); - break; - } - strbuf_addf(&sb, "%s\n", quote); - - success = write_in_full(fd, sb.buf, sb.len) == sb.len; - strbuf_release(&sb); - - return success; -} - -static ssize_t find_beginning_of_line(const char* contents, size_t size, - size_t offset_, int* found_bracket) -{ - size_t equal_offset = size, bracket_offset = size; - ssize_t offset; - -contline: - for (offset = offset_-2; offset > 0 - && contents[offset] != '\n'; offset--) - switch (contents[offset]) { - case '=': equal_offset = offset; break; - case ']': bracket_offset = offset; break; - } - if (offset > 0 && contents[offset-1] == '\\') { - offset_ = offset; - goto contline; - } - if (bracket_offset < equal_offset) { - *found_bracket = 1; - offset = bracket_offset+1; - } else - offset++; - - return offset; -} - -int perf_config_set(const char* key, const char* value) -{ - return perf_config_set_multivar(key, value, NULL, 0); -} - -/* - * If value==NULL, unset in (remove from) config, - * if value_regex!=NULL, disregard key/value pairs where value does not match. - * if multi_replace==0, nothing, or only one matching key/value is replaced, - * else all matching key/values (regardless how many) are removed, - * before the new pair is written. - * - * Returns 0 on success. - * - * This function does this: - * - * - it locks the config file by creating ".perf/config.lock" - * - * - it then parses the config using store_aux() as validator to find - * the position on the key/value pair to replace. If it is to be unset, - * it must be found exactly once. - * - * - the config file is mmap()ed and the part before the match (if any) is - * written to the lock file, then the changed part and the rest. - * - * - the config file is removed and the lock file rename()d to it. - * - */ -int perf_config_set_multivar(const char* key, const char* value, - const char* value_regex, int multi_replace) -{ - int i, dot; - int fd = -1, in_fd; - int ret = 0; - char* config_filename; - const char* last_dot = strrchr(key, '.'); - - if (config_exclusive_filename) - config_filename = strdup(config_exclusive_filename); - else - config_filename = perf_pathdup("config"); - - /* - * Since "key" actually contains the section name and the real - * key name separated by a dot, we have to know where the dot is. - */ - - if (last_dot == NULL) { - error("key does not contain a section: %s", key); - ret = 2; - goto out_free; - } - store.baselen = last_dot - key; - - store.multi_replace = multi_replace; - - /* - * Validate the key and while at it, lower case it for matching. - */ - store.key = malloc(strlen(key) + 1); - dot = 0; - for (i = 0; key[i]; i++) { - unsigned char c = key[i]; - if (c == '.') - dot = 1; - /* Leave the extended basename untouched.. */ - if (!dot || i > store.baselen) { - if (!iskeychar(c) || (i == store.baselen+1 && !isalpha(c))) { - error("invalid key: %s", key); - free(store.key); - ret = 1; - goto out_free; - } - c = tolower(c); - } else if (c == '\n') { - error("invalid key (newline): %s", key); - free(store.key); - ret = 1; - goto out_free; - } - store.key[i] = c; - } - store.key[i] = 0; - - /* - * If .perf/config does not exist yet, write a minimal version. - */ - in_fd = open(config_filename, O_RDONLY); - if ( in_fd < 0 ) { - free(store.key); - - if ( ENOENT != errno ) { - error("opening %s: %s", config_filename, - strerror(errno)); - ret = 3; /* same as "invalid config file" */ - goto out_free; - } - /* if nothing to unset, error out */ - if (value == NULL) { - ret = 5; - goto out_free; - } - - store.key = (char*)key; - if (!store_write_section(fd, key) || - !store_write_pair(fd, key, value)) - goto write_err_out; - } else { - struct stat st; - char* contents; - size_t contents_sz, copy_begin, copy_end; - int i, new_line = 0; - - if (value_regex == NULL) - store.value_regex = NULL; - else { - if (value_regex[0] == '!') { - store.do_not_match = 1; - value_regex++; - } else - store.do_not_match = 0; - - store.value_regex = (regex_t*)malloc(sizeof(regex_t)); - if (regcomp(store.value_regex, value_regex, - REG_EXTENDED)) { - error("invalid pattern: %s", value_regex); - free(store.value_regex); - ret = 6; - goto out_free; - } - } - - store.offset[0] = 0; - store.state = START; - store.seen = 0; - - /* - * After this, store.offset will contain the *end* offset - * of the last match, or remain at 0 if no match was found. - * As a side effect, we make sure to transform only a valid - * existing config file. - */ - if (perf_config_from_file(store_aux, config_filename, NULL)) { - error("invalid config file %s", config_filename); - free(store.key); - if (store.value_regex != NULL) { - regfree(store.value_regex); - free(store.value_regex); - } - ret = 3; - goto out_free; - } - - free(store.key); - if (store.value_regex != NULL) { - regfree(store.value_regex); - free(store.value_regex); - } - - /* if nothing to unset, or too many matches, error out */ - if ((store.seen == 0 && value == NULL) || - (store.seen > 1 && multi_replace == 0)) { - ret = 5; - goto out_free; - } - - fstat(in_fd, &st); - contents_sz = xsize_t(st.st_size); - contents = mmap(NULL, contents_sz, PROT_READ, - MAP_PRIVATE, in_fd, 0); - close(in_fd); - - if (store.seen == 0) - store.seen = 1; - - for (i = 0, copy_begin = 0; i < store.seen; i++) { - if (store.offset[i] == 0) { - store.offset[i] = copy_end = contents_sz; - } else if (store.state != KEY_SEEN) { - copy_end = store.offset[i]; - } else - copy_end = find_beginning_of_line( - contents, contents_sz, - store.offset[i]-2, &new_line); - - if (copy_end > 0 && contents[copy_end-1] != '\n') - new_line = 1; - - /* write the first part of the config */ - if (copy_end > copy_begin) { - if (write_in_full(fd, contents + copy_begin, - copy_end - copy_begin) < - copy_end - copy_begin) - goto write_err_out; - if (new_line && - write_in_full(fd, "\n", 1) != 1) - goto write_err_out; - } - copy_begin = store.offset[i]; - } - - /* write the pair (value == NULL means unset) */ - if (value != NULL) { - if (store.state == START) { - if (!store_write_section(fd, key)) - goto write_err_out; - } - if (!store_write_pair(fd, key, value)) - goto write_err_out; - } - - /* write the rest of the config */ - if (copy_begin < contents_sz) - if (write_in_full(fd, contents + copy_begin, - contents_sz - copy_begin) < - contents_sz - copy_begin) - goto write_err_out; - - munmap(contents, contents_sz); - } - - ret = 0; - -out_free: - free(config_filename); - return ret; - -write_err_out: - goto out_free; - -} - -/* - * Call this to report error for your variable that should not - * get a boolean value (i.e. "[my] var" means "true"). - */ -int config_error_nonbool(const char *var) -{ - return error("Missing value for '%s'", var); -} diff --git a/Documentation/perf_counter/ctype.c b/Documentation/perf_counter/ctype.c deleted file mode 100644 index b90ec00..0000000 --- a/Documentation/perf_counter/ctype.c +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Sane locale-independent, ASCII ctype. - * - * No surprises, and works with signed and unsigned chars. - */ -#include "cache.h" - -enum { - S = GIT_SPACE, - A = GIT_ALPHA, - D = GIT_DIGIT, - G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */ - R = GIT_REGEX_SPECIAL, /* $, (, ), +, ., ^, {, | * */ -}; - -unsigned char sane_ctype[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0, /* 0.. 15 */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16.. 31 */ - S, 0, 0, 0, R, 0, 0, 0, R, R, G, R, 0, 0, R, 0, /* 32.. 47 */ - D, D, D, D, D, D, D, D, D, D, 0, 0, 0, 0, 0, G, /* 48.. 63 */ - 0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */ - A, A, A, A, A, A, A, A, A, A, A, G, G, 0, R, 0, /* 80.. 95 */ - 0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */ - A, A, A, A, A, A, A, A, A, A, A, R, R, 0, 0, 0, /* 112..127 */ - /* Nothing in the 128.. range */ -}; diff --git a/Documentation/perf_counter/exec_cmd.c b/Documentation/perf_counter/exec_cmd.c deleted file mode 100644 index d392922..0000000 --- a/Documentation/perf_counter/exec_cmd.c +++ /dev/null @@ -1,165 +0,0 @@ -#include "cache.h" -#include "exec_cmd.h" -#include "quote.h" -#define MAX_ARGS 32 - -extern char **environ; -static const char *argv_exec_path; -static const char *argv0_path; - -const char *system_path(const char *path) -{ -#ifdef RUNTIME_PREFIX - static const char *prefix; -#else - static const char *prefix = PREFIX; -#endif - struct strbuf d = STRBUF_INIT; - - if (is_absolute_path(path)) - return path; - -#ifdef RUNTIME_PREFIX - assert(argv0_path); - assert(is_absolute_path(argv0_path)); - - if (!prefix && - !(prefix = strip_path_suffix(argv0_path, PERF_EXEC_PATH)) && - !(prefix = strip_path_suffix(argv0_path, BINDIR)) && - !(prefix = strip_path_suffix(argv0_path, "perf"))) { - prefix = PREFIX; - fprintf(stderr, "RUNTIME_PREFIX requested, " - "but prefix computation failed. " - "Using static fallback '%s'.\n", prefix); - } -#endif - - strbuf_addf(&d, "%s/%s", prefix, path); - path = strbuf_detach(&d, NULL); - return path; -} - -const char *perf_extract_argv0_path(const char *argv0) -{ - const char *slash; - - if (!argv0 || !*argv0) - return NULL; - slash = argv0 + strlen(argv0); - - while (argv0 <= slash && !is_dir_sep(*slash)) - slash--; - - if (slash >= argv0) { - argv0_path = strndup(argv0, slash - argv0); - return slash + 1; - } - - return argv0; -} - -void perf_set_argv_exec_path(const char *exec_path) -{ - argv_exec_path = exec_path; - /* - * Propagate this setting to external programs. - */ - setenv(EXEC_PATH_ENVIRONMENT, exec_path, 1); -} - - -/* Returns the highest-priority, location to look for perf programs. */ -const char *perf_exec_path(void) -{ - const char *env; - - if (argv_exec_path) - return argv_exec_path; - - env = getenv(EXEC_PATH_ENVIRONMENT); - if (env && *env) { - return env; - } - - return system_path(PERF_EXEC_PATH); -} - -static void add_path(struct strbuf *out, const char *path) -{ - if (path && *path) { - if (is_absolute_path(path)) - strbuf_addstr(out, path); - else - strbuf_addstr(out, make_nonrelative_path(path)); - - strbuf_addch(out, PATH_SEP); - } -} - -void setup_path(void) -{ - const char *old_path = getenv("PATH"); - struct strbuf new_path = STRBUF_INIT; - - add_path(&new_path, perf_exec_path()); - add_path(&new_path, argv0_path); - - if (old_path) - strbuf_addstr(&new_path, old_path); - else - strbuf_addstr(&new_path, "/usr/local/bin:/usr/bin:/bin"); - - setenv("PATH", new_path.buf, 1); - - strbuf_release(&new_path); -} - -const char **prepare_perf_cmd(const char **argv) -{ - int argc; - const char **nargv; - - for (argc = 0; argv[argc]; argc++) - ; /* just counting */ - nargv = malloc(sizeof(*nargv) * (argc + 2)); - - nargv[0] = "perf"; - for (argc = 0; argv[argc]; argc++) - nargv[argc + 1] = argv[argc]; - nargv[argc + 1] = NULL; - return nargv; -} - -int execv_perf_cmd(const char **argv) { - const char **nargv = prepare_perf_cmd(argv); - - /* execvp() can only ever return if it fails */ - execvp("perf", (char **)nargv); - - free(nargv); - return -1; -} - - -int execl_perf_cmd(const char *cmd,...) -{ - int argc; - const char *argv[MAX_ARGS + 1]; - const char *arg; - va_list param; - - va_start(param, cmd); - argv[0] = cmd; - argc = 1; - while (argc < MAX_ARGS) { - arg = argv[argc++] = va_arg(param, char *); - if (!arg) - break; - } - va_end(param); - if (MAX_ARGS <= argc) - return error("too many args to run %s", cmd); - - argv[argc] = NULL; - return execv_perf_cmd(argv); -} diff --git a/Documentation/perf_counter/exec_cmd.h b/Documentation/perf_counter/exec_cmd.h deleted file mode 100644 index effe25e..0000000 --- a/Documentation/perf_counter/exec_cmd.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef PERF_EXEC_CMD_H -#define PERF_EXEC_CMD_H - -extern void perf_set_argv_exec_path(const char *exec_path); -extern const char *perf_extract_argv0_path(const char *path); -extern const char *perf_exec_path(void); -extern void setup_path(void); -extern const char **prepare_perf_cmd(const char **argv); -extern int execv_perf_cmd(const char **argv); /* NULL terminated */ -extern int execl_perf_cmd(const char *cmd, ...); -extern const char *system_path(const char *path); - -#endif /* PERF_EXEC_CMD_H */ diff --git a/Documentation/perf_counter/generate-cmdlist.sh b/Documentation/perf_counter/generate-cmdlist.sh deleted file mode 100755 index f06f6fd..0000000 --- a/Documentation/perf_counter/generate-cmdlist.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/sh - -echo "/* Automatically generated by $0 */ -struct cmdname_help -{ - char name[16]; - char help[80]; -}; - -static struct cmdname_help common_cmds[] = {" - -sed -n -e 's/^perf-\([^ ]*\)[ ].* common.*/\1/p' command-list.txt | -sort | -while read cmd -do - sed -n ' - /^NAME/,/perf-'"$cmd"'/H - ${ - x - s/.*perf-'"$cmd"' - \(.*\)/ {"'"$cmd"'", "\1"},/ - p - }' "Documentation/perf-$cmd.txt" -done -echo "};" diff --git a/Documentation/perf_counter/help.c b/Documentation/perf_counter/help.c deleted file mode 100644 index ec01167..0000000 --- a/Documentation/perf_counter/help.c +++ /dev/null @@ -1,366 +0,0 @@ -#include "cache.h" -#include "builtin.h" -#include "exec_cmd.h" -#include "levenshtein.h" -#include "help.h" - -/* most GUI terminals set COLUMNS (although some don't export it) */ -static int term_columns(void) -{ - char *col_string = getenv("COLUMNS"); - int n_cols; - - if (col_string && (n_cols = atoi(col_string)) > 0) - return n_cols; - -#ifdef TIOCGWINSZ - { - struct winsize ws; - if (!ioctl(1, TIOCGWINSZ, &ws)) { - if (ws.ws_col) - return ws.ws_col; - } - } -#endif - - return 80; -} - -void add_cmdname(struct cmdnames *cmds, const char *name, int len) -{ - struct cmdname *ent = malloc(sizeof(*ent) + len + 1); - - ent->len = len; - memcpy(ent->name, name, len); - ent->name[len] = 0; - - ALLOC_GROW(cmds->names, cmds->cnt + 1, cmds->alloc); - cmds->names[cmds->cnt++] = ent; -} - -static void clean_cmdnames(struct cmdnames *cmds) -{ - int i; - for (i = 0; i < cmds->cnt; ++i) - free(cmds->names[i]); - free(cmds->names); - cmds->cnt = 0; - cmds->alloc = 0; -} - -static int cmdname_compare(const void *a_, const void *b_) -{ - struct cmdname *a = *(struct cmdname **)a_; - struct cmdname *b = *(struct cmdname **)b_; - return strcmp(a->name, b->name); -} - -static void uniq(struct cmdnames *cmds) -{ - int i, j; - - if (!cmds->cnt) - return; - - for (i = j = 1; i < cmds->cnt; i++) - if (strcmp(cmds->names[i]->name, cmds->names[i-1]->name)) - cmds->names[j++] = cmds->names[i]; - - cmds->cnt = j; -} - -void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes) -{ - int ci, cj, ei; - int cmp; - - ci = cj = ei = 0; - while (ci < cmds->cnt && ei < excludes->cnt) { - cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name); - if (cmp < 0) - cmds->names[cj++] = cmds->names[ci++]; - else if (cmp == 0) - ci++, ei++; - else if (cmp > 0) - ei++; - } - - while (ci < cmds->cnt) - cmds->names[cj++] = cmds->names[ci++]; - - cmds->cnt = cj; -} - -static void pretty_print_string_list(struct cmdnames *cmds, int longest) -{ - int cols = 1, rows; - int space = longest + 1; /* min 1 SP between words */ - int max_cols = term_columns() - 1; /* don't print *on* the edge */ - int i, j; - - if (space < max_cols) - cols = max_cols / space; - rows = (cmds->cnt + cols - 1) / cols; - - for (i = 0; i < rows; i++) { - printf(" "); - - for (j = 0; j < cols; j++) { - int n = j * rows + i; - int size = space; - if (n >= cmds->cnt) - break; - if (j == cols-1 || n + rows >= cmds->cnt) - size = 1; - printf("%-*s", size, cmds->names[n]->name); - } - putchar('\n'); - } -} - -static int is_executable(const char *name) -{ - struct stat st; - - if (stat(name, &st) || /* stat, not lstat */ - !S_ISREG(st.st_mode)) - return 0; - -#ifdef __MINGW32__ - /* cannot trust the executable bit, peek into the file instead */ - char buf[3] = { 0 }; - int n; - int fd = open(name, O_RDONLY); - st.st_mode &= ~S_IXUSR; - if (fd >= 0) { - n = read(fd, buf, 2); - if (n == 2) - /* DOS executables start with "MZ" */ - if (!strcmp(buf, "#!") || !strcmp(buf, "MZ")) - st.st_mode |= S_IXUSR; - close(fd); - } -#endif - return st.st_mode & S_IXUSR; -} - -static void list_commands_in_dir(struct cmdnames *cmds, - const char *path, - const char *prefix) -{ - int prefix_len; - DIR *dir = opendir(path); - struct dirent *de; - struct strbuf buf = STRBUF_INIT; - int len; - - if (!dir) - return; - if (!prefix) - prefix = "perf-"; - prefix_len = strlen(prefix); - - strbuf_addf(&buf, "%s/", path); - len = buf.len; - - while ((de = readdir(dir)) != NULL) { - int entlen; - - if (prefixcmp(de->d_name, prefix)) - continue; - - strbuf_setlen(&buf, len); - strbuf_addstr(&buf, de->d_name); - if (!is_executable(buf.buf)) - continue; - - entlen = strlen(de->d_name) - prefix_len; - if (has_extension(de->d_name, ".exe")) - entlen -= 4; - - add_cmdname(cmds, de->d_name + prefix_len, entlen); - } - closedir(dir); - strbuf_release(&buf); -} - -void load_command_list(const char *prefix, - struct cmdnames *main_cmds, - struct cmdnames *other_cmds) -{ - const char *env_path = getenv("PATH"); - const char *exec_path = perf_exec_path(); - - if (exec_path) { - list_commands_in_dir(main_cmds, exec_path, prefix); - qsort(main_cmds->names, main_cmds->cnt, - sizeof(*main_cmds->names), cmdname_compare); - uniq(main_cmds); - } - - if (env_path) { - char *paths, *path, *colon; - path = paths = strdup(env_path); - while (1) { - if ((colon = strchr(path, PATH_SEP))) - *colon = 0; - if (!exec_path || strcmp(path, exec_path)) - list_commands_in_dir(other_cmds, path, prefix); - - if (!colon) - break; - path = colon + 1; - } - free(paths); - - qsort(other_cmds->names, other_cmds->cnt, - sizeof(*other_cmds->names), cmdname_compare); - uniq(other_cmds); - } - exclude_cmds(other_cmds, main_cmds); -} - -void list_commands(const char *title, struct cmdnames *main_cmds, - struct cmdnames *other_cmds) -{ - int i, longest = 0; - - for (i = 0; i < main_cmds->cnt; i++) - if (longest < main_cmds->names[i]->len) - longest = main_cmds->names[i]->len; - for (i = 0; i < other_cmds->cnt; i++) - if (longest < other_cmds->names[i]->len) - longest = other_cmds->names[i]->len; - - if (main_cmds->cnt) { - const char *exec_path = perf_exec_path(); - printf("available %s in '%s'\n", title, exec_path); - printf("----------------"); - mput_char('-', strlen(title) + strlen(exec_path)); - putchar('\n'); - pretty_print_string_list(main_cmds, longest); - putchar('\n'); - } - - if (other_cmds->cnt) { - printf("%s available from elsewhere on your $PATH\n", title); - printf("---------------------------------------"); - mput_char('-', strlen(title)); - putchar('\n'); - pretty_print_string_list(other_cmds, longest); - putchar('\n'); - } -} - -int is_in_cmdlist(struct cmdnames *c, const char *s) -{ - int i; - for (i = 0; i < c->cnt; i++) - if (!strcmp(s, c->names[i]->name)) - return 1; - return 0; -} - -static int autocorrect; -static struct cmdnames aliases; - -static int perf_unknown_cmd_config(const char *var, const char *value, void *cb) -{ - if (!strcmp(var, "help.autocorrect")) - autocorrect = perf_config_int(var,value); - /* Also use aliases for command lookup */ - if (!prefixcmp(var, "alias.")) - add_cmdname(&aliases, var + 6, strlen(var + 6)); - - return perf_default_config(var, value, cb); -} - -static int levenshtein_compare(const void *p1, const void *p2) -{ - const struct cmdname *const *c1 = p1, *const *c2 = p2; - const char *s1 = (*c1)->name, *s2 = (*c2)->name; - int l1 = (*c1)->len; - int l2 = (*c2)->len; - return l1 != l2 ? l1 - l2 : strcmp(s1, s2); -} - -static void add_cmd_list(struct cmdnames *cmds, struct cmdnames *old) -{ - int i; - ALLOC_GROW(cmds->names, cmds->cnt + old->cnt, cmds->alloc); - - for (i = 0; i < old->cnt; i++) - cmds->names[cmds->cnt++] = old->names[i]; - free(old->names); - old->cnt = 0; - old->names = NULL; -} - -const char *help_unknown_cmd(const char *cmd) -{ - int i, n, best_similarity = 0; - struct cmdnames main_cmds, other_cmds; - - memset(&main_cmds, 0, sizeof(main_cmds)); - memset(&other_cmds, 0, sizeof(main_cmds)); - memset(&aliases, 0, sizeof(aliases)); - - perf_config(perf_unknown_cmd_config, NULL); - - load_command_list("perf-", &main_cmds, &other_cmds); - - add_cmd_list(&main_cmds, &aliases); - add_cmd_list(&main_cmds, &other_cmds); - qsort(main_cmds.names, main_cmds.cnt, - sizeof(main_cmds.names), cmdname_compare); - uniq(&main_cmds); - - /* This reuses cmdname->len for similarity index */ - for (i = 0; i < main_cmds.cnt; ++i) - main_cmds.names[i]->len = - levenshtein(cmd, main_cmds.names[i]->name, 0, 2, 1, 4); - - qsort(main_cmds.names, main_cmds.cnt, - sizeof(*main_cmds.names), levenshtein_compare); - - if (!main_cmds.cnt) - die ("Uh oh. Your system reports no Git commands at all."); - - best_similarity = main_cmds.names[0]->len; - n = 1; - while (n < main_cmds.cnt && best_similarity == main_cmds.names[n]->len) - ++n; - if (autocorrect && n == 1) { - const char *assumed = main_cmds.names[0]->name; - main_cmds.names[0] = NULL; - clean_cmdnames(&main_cmds); - fprintf(stderr, "WARNING: You called a Git program named '%s', " - "which does not exist.\n" - "Continuing under the assumption that you meant '%s'\n", - cmd, assumed); - if (autocorrect > 0) { - fprintf(stderr, "in %0.1f seconds automatically...\n", - (float)autocorrect/10.0); - poll(NULL, 0, autocorrect * 100); - } - return assumed; - } - - fprintf(stderr, "perf: '%s' is not a perf-command. See 'perf --help'.\n", cmd); - - if (best_similarity < 6) { - fprintf(stderr, "\nDid you mean %s?\n", - n < 2 ? "this": "one of these"); - - for (i = 0; i < n; i++) - fprintf(stderr, "\t%s\n", main_cmds.names[i]->name); - } - - exit(1); -} - -int cmd_version(int argc, const char **argv, const char *prefix) -{ - printf("perf version %s\n", perf_version_string); - return 0; -} diff --git a/Documentation/perf_counter/help.h b/Documentation/perf_counter/help.h deleted file mode 100644 index 56bc154..0000000 --- a/Documentation/perf_counter/help.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef HELP_H -#define HELP_H - -struct cmdnames { - int alloc; - int cnt; - struct cmdname { - size_t len; /* also used for similarity index in help.c */ - char name[FLEX_ARRAY]; - } **names; -}; - -static inline void mput_char(char c, unsigned int num) -{ - while(num--) - putchar(c); -} - -void load_command_list(const char *prefix, - struct cmdnames *main_cmds, - struct cmdnames *other_cmds); -void add_cmdname(struct cmdnames *cmds, const char *name, int len); -/* Here we require that excludes is a sorted list. */ -void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes); -int is_in_cmdlist(struct cmdnames *c, const char *s); -void list_commands(const char *title, struct cmdnames *main_cmds, - struct cmdnames *other_cmds); - -#endif /* HELP_H */ diff --git a/Documentation/perf_counter/levenshtein.c b/Documentation/perf_counter/levenshtein.c deleted file mode 100644 index e521d15..0000000 --- a/Documentation/perf_counter/levenshtein.c +++ /dev/null @@ -1,84 +0,0 @@ -#include "cache.h" -#include "levenshtein.h" - -/* - * This function implements the Damerau-Levenshtein algorithm to - * calculate a distance between strings. - * - * Basically, it says how many letters need to be swapped, substituted, - * deleted from, or added to string1, at least, to get string2. - * - * The idea is to build a distance matrix for the substrings of both - * strings. To avoid a large space complexity, only the last three rows - * are kept in memory (if swaps had the same or higher cost as one deletion - * plus one insertion, only two rows would be needed). - * - * At any stage, "i + 1" denotes the length of the current substring of - * string1 that the distance is calculated for. - * - * row2 holds the current row, row1 the previous row (i.e. for the substring - * of string1 of length "i"), and row0 the row before that. - * - * In other words, at the start of the big loop, row2[j + 1] contains the - * Damerau-Levenshtein distance between the substring of string1 of length - * "i" and the substring of string2 of length "j + 1". - * - * All the big loop does is determine the partial minimum-cost paths. - * - * It does so by calculating the costs of the path ending in characters - * i (in string1) and j (in string2), respectively, given that the last - * operation is a substition, a swap, a deletion, or an insertion. - * - * This implementation allows the costs to be weighted: - * - * - w (as in "sWap") - * - s (as in "Substitution") - * - a (for insertion, AKA "Add") - * - d (as in "Deletion") - * - * Note that this algorithm calculates a distance _iff_ d == a. - */ -int levenshtein(const char *string1, const char *string2, - int w, int s, int a, int d) -{ - int len1 = strlen(string1), len2 = strlen(string2); - int *row0 = malloc(sizeof(int) * (len2 + 1)); - int *row1 = malloc(sizeof(int) * (len2 + 1)); - int *row2 = malloc(sizeof(int) * (len2 + 1)); - int i, j; - - for (j = 0; j <= len2; j++) - row1[j] = j * a; - for (i = 0; i < len1; i++) { - int *dummy; - - row2[0] = (i + 1) * d; - for (j = 0; j < len2; j++) { - /* substitution */ - row2[j + 1] = row1[j] + s * (string1[i] != string2[j]); - /* swap */ - if (i > 0 && j > 0 && string1[i - 1] == string2[j] && - string1[i] == string2[j - 1] && - row2[j + 1] > row0[j - 1] + w) - row2[j + 1] = row0[j - 1] + w; - /* deletion */ - if (row2[j + 1] > row1[j + 1] + d) - row2[j + 1] = row1[j + 1] + d; - /* insertion */ - if (row2[j + 1] > row2[j] + a) - row2[j + 1] = row2[j] + a; - } - - dummy = row0; - row0 = row1; - row1 = row2; - row2 = dummy; - } - - i = row1[len2]; - free(row0); - free(row1); - free(row2); - - return i; -} diff --git a/Documentation/perf_counter/levenshtein.h b/Documentation/perf_counter/levenshtein.h deleted file mode 100644 index 0173abe..0000000 --- a/Documentation/perf_counter/levenshtein.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef LEVENSHTEIN_H -#define LEVENSHTEIN_H - -int levenshtein(const char *string1, const char *string2, - int swap_penalty, int substition_penalty, - int insertion_penalty, int deletion_penalty); - -#endif diff --git a/Documentation/perf_counter/parse-options.c b/Documentation/perf_counter/parse-options.c deleted file mode 100644 index 7464f34..0000000 --- a/Documentation/perf_counter/parse-options.c +++ /dev/null @@ -1,495 +0,0 @@ -#include "util.h" -#include "parse-options.h" -#include "cache.h" - -#define OPT_SHORT 1 -#define OPT_UNSET 2 - -static int opterror(const struct option *opt, const char *reason, int flags) -{ - if (flags & OPT_SHORT) - return error("switch `%c' %s", opt->short_name, reason); - if (flags & OPT_UNSET) - return error("option `no-%s' %s", opt->long_name, reason); - return error("option `%s' %s", opt->long_name, reason); -} - -static int get_arg(struct parse_opt_ctx_t *p, const struct option *opt, - int flags, const char **arg) -{ - if (p->opt) { - *arg = p->opt; - p->opt = NULL; - } else if (p->argc == 1 && (opt->flags & PARSE_OPT_LASTARG_DEFAULT)) { - *arg = (const char *)opt->defval; - } else if (p->argc > 1) { - p->argc--; - *arg = *++p->argv; - } else - return opterror(opt, "requires a value", flags); - return 0; -} - -static int get_value(struct parse_opt_ctx_t *p, - const struct option *opt, int flags) -{ - const char *s, *arg; - const int unset = flags & OPT_UNSET; - - if (unset && p->opt) - return opterror(opt, "takes no value", flags); - if (unset && (opt->flags & PARSE_OPT_NONEG)) - return opterror(opt, "isn't available", flags); - - if (!(flags & OPT_SHORT) && p->opt) { - switch (opt->type) { - case OPTION_CALLBACK: - if (!(opt->flags & PARSE_OPT_NOARG)) - break; - /* FALLTHROUGH */ - case OPTION_BOOLEAN: - case OPTION_BIT: - case OPTION_SET_INT: - case OPTION_SET_PTR: - return opterror(opt, "takes no value", flags); - default: - break; - } - } - - switch (opt->type) { - case OPTION_BIT: - if (unset) - *(int *)opt->value &= ~opt->defval; - else - *(int *)opt->value |= opt->defval; - return 0; - - case OPTION_BOOLEAN: - *(int *)opt->value = unset ? 0 : *(int *)opt->value + 1; - return 0; - - case OPTION_SET_INT: - *(int *)opt->value = unset ? 0 : opt->defval; - return 0; - - case OPTION_SET_PTR: - *(void **)opt->value = unset ? NULL : (void *)opt->defval; - return 0; - - case OPTION_STRING: - if (unset) - *(const char **)opt->value = NULL; - else if (opt->flags & PARSE_OPT_OPTARG && !p->opt) - *(const char **)opt->value = (const char *)opt->defval; - else - return get_arg(p, opt, flags, (const char **)opt->value); - return 0; - - case OPTION_CALLBACK: - if (unset) - return (*opt->callback)(opt, NULL, 1) ? (-1) : 0; - if (opt->flags & PARSE_OPT_NOARG) - return (*opt->callback)(opt, NULL, 0) ? (-1) : 0; - if (opt->flags & PARSE_OPT_OPTARG && !p->opt) - return (*opt->callback)(opt, NULL, 0) ? (-1) : 0; - if (get_arg(p, opt, flags, &arg)) - return -1; - return (*opt->callback)(opt, arg, 0) ? (-1) : 0; - - case OPTION_INTEGER: - if (unset) { - *(int *)opt->value = 0; - return 0; - } - if (opt->flags & PARSE_OPT_OPTARG && !p->opt) { - *(int *)opt->value = opt->defval; - return 0; - } - if (get_arg(p, opt, flags, &arg)) - return -1; - *(int *)opt->value = strtol(arg, (char **)&s, 10); - if (*s) - return opterror(opt, "expects a numerical value", flags); - return 0; - - default: - die("should not happen, someone must be hit on the forehead"); - } -} - -static int parse_short_opt(struct parse_opt_ctx_t *p, const struct option *options) -{ - for (; options->type != OPTION_END; options++) { - if (options->short_name == *p->opt) { - p->opt = p->opt[1] ? p->opt + 1 : NULL; - return get_value(p, options, OPT_SHORT); - } - } - return -2; -} - -static int parse_long_opt(struct parse_opt_ctx_t *p, const char *arg, - const struct option *options) -{ - const char *arg_end = strchr(arg, '='); - const struct option *abbrev_option = NULL, *ambiguous_option = NULL; - int abbrev_flags = 0, ambiguous_flags = 0; - - if (!arg_end) - arg_end = arg + strlen(arg); - - for (; options->type != OPTION_END; options++) { - const char *rest; - int flags = 0; - - if (!options->long_name) - continue; - - rest = skip_prefix(arg, options->long_name); - if (options->type == OPTION_ARGUMENT) { - if (!rest) - continue; - if (*rest == '=') - return opterror(options, "takes no value", flags); - if (*rest) - continue; - p->out[p->cpidx++] = arg - 2; - return 0; - } - if (!rest) { - /* abbreviated? */ - if (!strncmp(options->long_name, arg, arg_end - arg)) { -is_abbreviated: - if (abbrev_option) { - /* - * If this is abbreviated, it is - * ambiguous. So when there is no - * exact match later, we need to - * error out. - */ - ambiguous_option = abbrev_option; - ambiguous_flags = abbrev_flags; - } - if (!(flags & OPT_UNSET) && *arg_end) - p->opt = arg_end + 1; - abbrev_option = options; - abbrev_flags = flags; - continue; - } - /* negated and abbreviated very much? */ - if (!prefixcmp("no-", arg)) { - flags |= OPT_UNSET; - goto is_abbreviated; - } - /* negated? */ - if (strncmp(arg, "no-", 3)) - continue; - flags |= OPT_UNSET; - rest = skip_prefix(arg + 3, options->long_name); - /* abbreviated and negated? */ - if (!rest && !prefixcmp(options->long_name, arg + 3)) - goto is_abbreviated; - if (!rest) - continue; - } - if (*rest) { - if (*rest != '=') - continue; - p->opt = rest + 1; - } - return get_value(p, options, flags); - } - - if (ambiguous_option) - return error("Ambiguous option: %s " - "(could be --%s%s or --%s%s)", - arg, - (ambiguous_flags & OPT_UNSET) ? "no-" : "", - ambiguous_option->long_name, - (abbrev_flags & OPT_UNSET) ? "no-" : "", - abbrev_option->long_name); - if (abbrev_option) - return get_value(p, abbrev_option, abbrev_flags); - return -2; -} - -static void check_typos(const char *arg, const struct option *options) -{ - if (strlen(arg) < 3) - return; - - if (!prefixcmp(arg, "no-")) { - error ("did you mean `--%s` (with two dashes ?)", arg); - exit(129); - } - - for (; options->type != OPTION_END; options++) { - if (!options->long_name) - continue; - if (!prefixcmp(options->long_name, arg)) { - error ("did you mean `--%s` (with two dashes ?)", arg); - exit(129); - } - } -} - -void parse_options_start(struct parse_opt_ctx_t *ctx, - int argc, const char **argv, int flags) -{ - memset(ctx, 0, sizeof(*ctx)); - ctx->argc = argc - 1; - ctx->argv = argv + 1; - ctx->out = argv; - ctx->cpidx = ((flags & PARSE_OPT_KEEP_ARGV0) != 0); - ctx->flags = flags; - if ((flags & PARSE_OPT_KEEP_UNKNOWN) && - (flags & PARSE_OPT_STOP_AT_NON_OPTION)) - die("STOP_AT_NON_OPTION and KEEP_UNKNOWN don't go together"); -} - -static int usage_with_options_internal(const char * const *, - const struct option *, int); - -int parse_options_step(struct parse_opt_ctx_t *ctx, - const struct option *options, - const char * const usagestr[]) -{ - int internal_help = !(ctx->flags & PARSE_OPT_NO_INTERNAL_HELP); - - /* we must reset ->opt, unknown short option leave it dangling */ - ctx->opt = NULL; - - for (; ctx->argc; ctx->argc--, ctx->argv++) { - const char *arg = ctx->argv[0]; - - if (*arg != '-' || !arg[1]) { - if (ctx->flags & PARSE_OPT_STOP_AT_NON_OPTION) - break; - ctx->out[ctx->cpidx++] = ctx->argv[0]; - continue; - } - - if (arg[1] != '-') { - ctx->opt = arg + 1; - if (internal_help && *ctx->opt == 'h') - return parse_options_usage(usagestr, options); - switch (parse_short_opt(ctx, options)) { - case -1: - return parse_options_usage(usagestr, options); - case -2: - goto unknown; - } - if (ctx->opt) - check_typos(arg + 1, options); - while (ctx->opt) { - if (internal_help && *ctx->opt == 'h') - return parse_options_usage(usagestr, options); - switch (parse_short_opt(ctx, options)) { - case -1: - return parse_options_usage(usagestr, options); - case -2: - /* fake a short option thing to hide the fact that we may have - * started to parse aggregated stuff - * - * This is leaky, too bad. - */ - ctx->argv[0] = strdup(ctx->opt - 1); - *(char *)ctx->argv[0] = '-'; - goto unknown; - } - } - continue; - } - - if (!arg[2]) { /* "--" */ - if (!(ctx->flags & PARSE_OPT_KEEP_DASHDASH)) { - ctx->argc--; - ctx->argv++; - } - break; - } - - if (internal_help && !strcmp(arg + 2, "help-all")) - return usage_with_options_internal(usagestr, options, 1); - if (internal_help && !strcmp(arg + 2, "help")) - return parse_options_usage(usagestr, options); - switch (parse_long_opt(ctx, arg + 2, options)) { - case -1: - return parse_options_usage(usagestr, options); - case -2: - goto unknown; - } - continue; -unknown: - if (!(ctx->flags & PARSE_OPT_KEEP_UNKNOWN)) - return PARSE_OPT_UNKNOWN; - ctx->out[ctx->cpidx++] = ctx->argv[0]; - ctx->opt = NULL; - } - return PARSE_OPT_DONE; -} - -int parse_options_end(struct parse_opt_ctx_t *ctx) -{ - memmove(ctx->out + ctx->cpidx, ctx->argv, ctx->argc * sizeof(*ctx->out)); - ctx->out[ctx->cpidx + ctx->argc] = NULL; - return ctx->cpidx + ctx->argc; -} - -int parse_options(int argc, const char **argv, const struct option *options, - const char * const usagestr[], int flags) -{ - struct parse_opt_ctx_t ctx; - - parse_options_start(&ctx, argc, argv, flags); - switch (parse_options_step(&ctx, options, usagestr)) { - case PARSE_OPT_HELP: - exit(129); - case PARSE_OPT_DONE: - break; - default: /* PARSE_OPT_UNKNOWN */ - if (ctx.argv[0][1] == '-') { - error("unknown option `%s'", ctx.argv[0] + 2); - } else { - error("unknown switch `%c'", *ctx.opt); - } - usage_with_options(usagestr, options); - } - - return parse_options_end(&ctx); -} - -#define USAGE_OPTS_WIDTH 24 -#define USAGE_GAP 2 - -int usage_with_options_internal(const char * const *usagestr, - const struct option *opts, int full) -{ - if (!usagestr) - return PARSE_OPT_HELP; - - fprintf(stderr, "usage: %s\n", *usagestr++); - while (*usagestr && **usagestr) - fprintf(stderr, " or: %s\n", *usagestr++); - while (*usagestr) { - fprintf(stderr, "%s%s\n", - **usagestr ? " " : "", - *usagestr); - usagestr++; - } - - if (opts->type != OPTION_GROUP) - fputc('\n', stderr); - - for (; opts->type != OPTION_END; opts++) { - size_t pos; - int pad; - - if (opts->type == OPTION_GROUP) { - fputc('\n', stderr); - if (*opts->help) - fprintf(stderr, "%s\n", opts->help); - continue; - } - if (!full && (opts->flags & PARSE_OPT_HIDDEN)) - continue; - - pos = fprintf(stderr, " "); - if (opts->short_name) - pos += fprintf(stderr, "-%c", opts->short_name); - if (opts->long_name && opts->short_name) - pos += fprintf(stderr, ", "); - if (opts->long_name) - pos += fprintf(stderr, "--%s", opts->long_name); - - switch (opts->type) { - case OPTION_ARGUMENT: - break; - case OPTION_INTEGER: - if (opts->flags & PARSE_OPT_OPTARG) - if (opts->long_name) - pos += fprintf(stderr, "[=]"); - else - pos += fprintf(stderr, "[]"); - else - pos += fprintf(stderr, " "); - break; - case OPTION_CALLBACK: - if (opts->flags & PARSE_OPT_NOARG) - break; - /* FALLTHROUGH */ - case OPTION_STRING: - if (opts->argh) { - if (opts->flags & PARSE_OPT_OPTARG) - if (opts->long_name) - pos += fprintf(stderr, "[=<%s>]", opts->argh); - else - pos += fprintf(stderr, "[<%s>]", opts->argh); - else - pos += fprintf(stderr, " <%s>", opts->argh); - } else { - if (opts->flags & PARSE_OPT_OPTARG) - if (opts->long_name) - pos += fprintf(stderr, "[=...]"); - else - pos += fprintf(stderr, "[...]"); - else - pos += fprintf(stderr, " ..."); - } - break; - default: /* OPTION_{BIT,BOOLEAN,SET_INT,SET_PTR} */ - break; - } - - if (pos <= USAGE_OPTS_WIDTH) - pad = USAGE_OPTS_WIDTH - pos; - else { - fputc('\n', stderr); - pad = USAGE_OPTS_WIDTH; - } - fprintf(stderr, "%*s%s\n", pad + USAGE_GAP, "", opts->help); - } - fputc('\n', stderr); - - return PARSE_OPT_HELP; -} - -void usage_with_options(const char * const *usagestr, - const struct option *opts) -{ - usage_with_options_internal(usagestr, opts, 0); - exit(129); -} - -int parse_options_usage(const char * const *usagestr, - const struct option *opts) -{ - return usage_with_options_internal(usagestr, opts, 0); -} - - -/*----- some often used options -----*/ -#include "cache.h" - -int parse_opt_verbosity_cb(const struct option *opt, const char *arg, - int unset) -{ - int *target = opt->value; - - if (unset) - /* --no-quiet, --no-verbose */ - *target = 0; - else if (opt->short_name == 'v') { - if (*target >= 0) - (*target)++; - else - *target = 1; - } else { - if (*target <= 0) - (*target)--; - else - *target = -1; - } - return 0; -} diff --git a/Documentation/perf_counter/parse-options.h b/Documentation/perf_counter/parse-options.h deleted file mode 100644 index a81c7fa..0000000 --- a/Documentation/perf_counter/parse-options.h +++ /dev/null @@ -1,172 +0,0 @@ -#ifndef PARSE_OPTIONS_H -#define PARSE_OPTIONS_H - -enum parse_opt_type { - /* special types */ - OPTION_END, - OPTION_ARGUMENT, - OPTION_GROUP, - /* options with no arguments */ - OPTION_BIT, - OPTION_BOOLEAN, /* _INCR would have been a better name */ - OPTION_SET_INT, - OPTION_SET_PTR, - /* options with arguments (usually) */ - OPTION_STRING, - OPTION_INTEGER, - OPTION_CALLBACK, -}; - -enum parse_opt_flags { - PARSE_OPT_KEEP_DASHDASH = 1, - PARSE_OPT_STOP_AT_NON_OPTION = 2, - PARSE_OPT_KEEP_ARGV0 = 4, - PARSE_OPT_KEEP_UNKNOWN = 8, - PARSE_OPT_NO_INTERNAL_HELP = 16, -}; - -enum parse_opt_option_flags { - PARSE_OPT_OPTARG = 1, - PARSE_OPT_NOARG = 2, - PARSE_OPT_NONEG = 4, - PARSE_OPT_HIDDEN = 8, - PARSE_OPT_LASTARG_DEFAULT = 16, -}; - -struct option; -typedef int parse_opt_cb(const struct option *, const char *arg, int unset); - -/* - * `type`:: - * holds the type of the option, you must have an OPTION_END last in your - * array. - * - * `short_name`:: - * the character to use as a short option name, '\0' if none. - * - * `long_name`:: - * the long option name, without the leading dashes, NULL if none. - * - * `value`:: - * stores pointers to the values to be filled. - * - * `argh`:: - * token to explain the kind of argument this option wants. Keep it - * homogenous across the repository. - * - * `help`:: - * the short help associated to what the option does. - * Must never be NULL (except for OPTION_END). - * OPTION_GROUP uses this pointer to store the group header. - * - * `flags`:: - * mask of parse_opt_option_flags. - * PARSE_OPT_OPTARG: says that the argument is optionnal (not for BOOLEANs) - * PARSE_OPT_NOARG: says that this option takes no argument, for CALLBACKs - * PARSE_OPT_NONEG: says that this option cannot be negated - * PARSE_OPT_HIDDEN this option is skipped in the default usage, showed in - * the long one. - * - * `callback`:: - * pointer to the callback to use for OPTION_CALLBACK. - * - * `defval`:: - * default value to fill (*->value) with for PARSE_OPT_OPTARG. - * OPTION_{BIT,SET_INT,SET_PTR} store the {mask,integer,pointer} to put in - * the value when met. - * CALLBACKS can use it like they want. - */ -struct option { - enum parse_opt_type type; - int short_name; - const char *long_name; - void *value; - const char *argh; - const char *help; - - int flags; - parse_opt_cb *callback; - intptr_t defval; -}; - -#define OPT_END() { OPTION_END } -#define OPT_ARGUMENT(l, h) { OPTION_ARGUMENT, 0, (l), NULL, NULL, (h) } -#define OPT_GROUP(h) { OPTION_GROUP, 0, NULL, NULL, NULL, (h) } -#define OPT_BIT(s, l, v, h, b) { OPTION_BIT, (s), (l), (v), NULL, (h), 0, NULL, (b) } -#define OPT_BOOLEAN(s, l, v, h) { OPTION_BOOLEAN, (s), (l), (v), NULL, (h) } -#define OPT_SET_INT(s, l, v, h, i) { OPTION_SET_INT, (s), (l), (v), NULL, (h), 0, NULL, (i) } -#define OPT_SET_PTR(s, l, v, h, p) { OPTION_SET_PTR, (s), (l), (v), NULL, (h), 0, NULL, (p) } -#define OPT_INTEGER(s, l, v, h) { OPTION_INTEGER, (s), (l), (v), NULL, (h) } -#define OPT_STRING(s, l, v, a, h) { OPTION_STRING, (s), (l), (v), (a), (h) } -#define OPT_DATE(s, l, v, h) \ - { OPTION_CALLBACK, (s), (l), (v), "time",(h), 0, \ - parse_opt_approxidate_cb } -#define OPT_CALLBACK(s, l, v, a, h, f) \ - { OPTION_CALLBACK, (s), (l), (v), (a), (h), 0, (f) } - -/* parse_options() will filter out the processed options and leave the - * non-option argments in argv[]. - * Returns the number of arguments left in argv[]. - */ -extern int parse_options(int argc, const char **argv, - const struct option *options, - const char * const usagestr[], int flags); - -extern NORETURN void usage_with_options(const char * const *usagestr, - const struct option *options); - -/*----- incremantal advanced APIs -----*/ - -enum { - PARSE_OPT_HELP = -1, - PARSE_OPT_DONE, - PARSE_OPT_UNKNOWN, -}; - -/* - * It's okay for the caller to consume argv/argc in the usual way. - * Other fields of that structure are private to parse-options and should not - * be modified in any way. - */ -struct parse_opt_ctx_t { - const char **argv; - const char **out; - int argc, cpidx; - const char *opt; - int flags; -}; - -extern int parse_options_usage(const char * const *usagestr, - const struct option *opts); - -extern void parse_options_start(struct parse_opt_ctx_t *ctx, - int argc, const char **argv, int flags); - -extern int parse_options_step(struct parse_opt_ctx_t *ctx, - const struct option *options, - const char * const usagestr[]); - -extern int parse_options_end(struct parse_opt_ctx_t *ctx); - - -/*----- some often used options -----*/ -extern int parse_opt_abbrev_cb(const struct option *, const char *, int); -extern int parse_opt_approxidate_cb(const struct option *, const char *, int); -extern int parse_opt_verbosity_cb(const struct option *, const char *, int); - -#define OPT__VERBOSE(var) OPT_BOOLEAN('v', "verbose", (var), "be verbose") -#define OPT__QUIET(var) OPT_BOOLEAN('q', "quiet", (var), "be quiet") -#define OPT__VERBOSITY(var) \ - { OPTION_CALLBACK, 'v', "verbose", (var), NULL, "be more verbose", \ - PARSE_OPT_NOARG, &parse_opt_verbosity_cb, 0 }, \ - { OPTION_CALLBACK, 'q', "quiet", (var), NULL, "be more quiet", \ - PARSE_OPT_NOARG, &parse_opt_verbosity_cb, 0 } -#define OPT__DRY_RUN(var) OPT_BOOLEAN('n', "dry-run", (var), "dry run") -#define OPT__ABBREV(var) \ - { OPTION_CALLBACK, 0, "abbrev", (var), "n", \ - "use digits to display SHA-1s", \ - PARSE_OPT_OPTARG, &parse_opt_abbrev_cb, 0 } - -extern const char *parse_options_fix_filename(const char *prefix, const char *file); - -#endif diff --git a/Documentation/perf_counter/path.c b/Documentation/perf_counter/path.c deleted file mode 100644 index a501a40..0000000 --- a/Documentation/perf_counter/path.c +++ /dev/null @@ -1,353 +0,0 @@ -/* - * I'm tired of doing "vsnprintf()" etc just to open a - * file, so here's a "return static buffer with printf" - * interface for paths. - * - * It's obviously not thread-safe. Sue me. But it's quite - * useful for doing things like - * - * f = open(mkpath("%s/%s.perf", base, name), O_RDONLY); - * - * which is what it's designed for. - */ -#include "cache.h" - -static char bad_path[] = "/bad-path/"; -/* - * Two hacks: - */ - -static char *get_perf_dir(void) -{ - return "."; -} - -size_t strlcpy(char *dest, const char *src, size_t size) -{ - size_t ret = strlen(src); - - if (size) { - size_t len = (ret >= size) ? size - 1 : ret; - memcpy(dest, src, len); - dest[len] = '\0'; - } - return ret; -} - - -static char *get_pathname(void) -{ - static char pathname_array[4][PATH_MAX]; - static int index; - return pathname_array[3 & ++index]; -} - -static char *cleanup_path(char *path) -{ - /* Clean it up */ - if (!memcmp(path, "./", 2)) { - path += 2; - while (*path == '/') - path++; - } - return path; -} - -char *mksnpath(char *buf, size_t n, const char *fmt, ...) -{ - va_list args; - unsigned len; - - va_start(args, fmt); - len = vsnprintf(buf, n, fmt, args); - va_end(args); - if (len >= n) { - strlcpy(buf, bad_path, n); - return buf; - } - return cleanup_path(buf); -} - -static char *perf_vsnpath(char *buf, size_t n, const char *fmt, va_list args) -{ - const char *perf_dir = get_perf_dir(); - size_t len; - - len = strlen(perf_dir); - if (n < len + 1) - goto bad; - memcpy(buf, perf_dir, len); - if (len && !is_dir_sep(perf_dir[len-1])) - buf[len++] = '/'; - len += vsnprintf(buf + len, n - len, fmt, args); - if (len >= n) - goto bad; - return cleanup_path(buf); -bad: - strlcpy(buf, bad_path, n); - return buf; -} - -char *perf_snpath(char *buf, size_t n, const char *fmt, ...) -{ - va_list args; - va_start(args, fmt); - (void)perf_vsnpath(buf, n, fmt, args); - va_end(args); - return buf; -} - -char *perf_pathdup(const char *fmt, ...) -{ - char path[PATH_MAX]; - va_list args; - va_start(args, fmt); - (void)perf_vsnpath(path, sizeof(path), fmt, args); - va_end(args); - return xstrdup(path); -} - -char *mkpath(const char *fmt, ...) -{ - va_list args; - unsigned len; - char *pathname = get_pathname(); - - va_start(args, fmt); - len = vsnprintf(pathname, PATH_MAX, fmt, args); - va_end(args); - if (len >= PATH_MAX) - return bad_path; - return cleanup_path(pathname); -} - -char *perf_path(const char *fmt, ...) -{ - const char *perf_dir = get_perf_dir(); - char *pathname = get_pathname(); - va_list args; - unsigned len; - - len = strlen(perf_dir); - if (len > PATH_MAX-100) - return bad_path; - memcpy(pathname, perf_dir, len); - if (len && perf_dir[len-1] != '/') - pathname[len++] = '/'; - va_start(args, fmt); - len += vsnprintf(pathname + len, PATH_MAX - len, fmt, args); - va_end(args); - if (len >= PATH_MAX) - return bad_path; - return cleanup_path(pathname); -} - - -/* perf_mkstemp() - create tmp file honoring TMPDIR variable */ -int perf_mkstemp(char *path, size_t len, const char *template) -{ - const char *tmp; - size_t n; - - tmp = getenv("TMPDIR"); - if (!tmp) - tmp = "/tmp"; - n = snprintf(path, len, "%s/%s", tmp, template); - if (len <= n) { - errno = ENAMETOOLONG; - return -1; - } - return mkstemp(path); -} - - -const char *make_relative_path(const char *abs, const char *base) -{ - static char buf[PATH_MAX + 1]; - int baselen; - if (!base) - return abs; - baselen = strlen(base); - if (prefixcmp(abs, base)) - return abs; - if (abs[baselen] == '/') - baselen++; - else if (base[baselen - 1] != '/') - return abs; - strcpy(buf, abs + baselen); - return buf; -} - -/* - * It is okay if dst == src, but they should not overlap otherwise. - * - * Performs the following normalizations on src, storing the result in dst: - * - Ensures that components are separated by '/' (Windows only) - * - Squashes sequences of '/'. - * - Removes "." components. - * - Removes ".." components, and the components the precede them. - * Returns failure (non-zero) if a ".." component appears as first path - * component anytime during the normalization. Otherwise, returns success (0). - * - * Note that this function is purely textual. It does not follow symlinks, - * verify the existence of the path, or make any system calls. - */ -int normalize_path_copy(char *dst, const char *src) -{ - char *dst0; - - if (has_dos_drive_prefix(src)) { - *dst++ = *src++; - *dst++ = *src++; - } - dst0 = dst; - - if (is_dir_sep(*src)) { - *dst++ = '/'; - while (is_dir_sep(*src)) - src++; - } - - for (;;) { - char c = *src; - - /* - * A path component that begins with . could be - * special: - * (1) "." and ends -- ignore and terminate. - * (2) "./" -- ignore them, eat slash and continue. - * (3) ".." and ends -- strip one and terminate. - * (4) "../" -- strip one, eat slash and continue. - */ - if (c == '.') { - if (!src[1]) { - /* (1) */ - src++; - } else if (is_dir_sep(src[1])) { - /* (2) */ - src += 2; - while (is_dir_sep(*src)) - src++; - continue; - } else if (src[1] == '.') { - if (!src[2]) { - /* (3) */ - src += 2; - goto up_one; - } else if (is_dir_sep(src[2])) { - /* (4) */ - src += 3; - while (is_dir_sep(*src)) - src++; - goto up_one; - } - } - } - - /* copy up to the next '/', and eat all '/' */ - while ((c = *src++) != '\0' && !is_dir_sep(c)) - *dst++ = c; - if (is_dir_sep(c)) { - *dst++ = '/'; - while (is_dir_sep(c)) - c = *src++; - src--; - } else if (!c) - break; - continue; - - up_one: - /* - * dst0..dst is prefix portion, and dst[-1] is '/'; - * go up one level. - */ - dst--; /* go to trailing '/' */ - if (dst <= dst0) - return -1; - /* Windows: dst[-1] cannot be backslash anymore */ - while (dst0 < dst && dst[-1] != '/') - dst--; - } - *dst = '\0'; - return 0; -} - -/* - * path = Canonical absolute path - * prefix_list = Colon-separated list of absolute paths - * - * Determines, for each path in prefix_list, whether the "prefix" really - * is an ancestor directory of path. Returns the length of the longest - * ancestor directory, excluding any trailing slashes, or -1 if no prefix - * is an ancestor. (Note that this means 0 is returned if prefix_list is - * "/".) "/foo" is not considered an ancestor of "/foobar". Directories - * are not considered to be their own ancestors. path must be in a - * canonical form: empty components, or "." or ".." components are not - * allowed. prefix_list may be null, which is like "". - */ -int longest_ancestor_length(const char *path, const char *prefix_list) -{ - char buf[PATH_MAX+1]; - const char *ceil, *colon; - int len, max_len = -1; - - if (prefix_list == NULL || !strcmp(path, "/")) - return -1; - - for (colon = ceil = prefix_list; *colon; ceil = colon+1) { - for (colon = ceil; *colon && *colon != PATH_SEP; colon++); - len = colon - ceil; - if (len == 0 || len > PATH_MAX || !is_absolute_path(ceil)) - continue; - strlcpy(buf, ceil, len+1); - if (normalize_path_copy(buf, buf) < 0) - continue; - len = strlen(buf); - if (len > 0 && buf[len-1] == '/') - buf[--len] = '\0'; - - if (!strncmp(path, buf, len) && - path[len] == '/' && - len > max_len) { - max_len = len; - } - } - - return max_len; -} - -/* strip arbitrary amount of directory separators at end of path */ -static inline int chomp_trailing_dir_sep(const char *path, int len) -{ - while (len && is_dir_sep(path[len - 1])) - len--; - return len; -} - -/* - * If path ends with suffix (complete path components), returns the - * part before suffix (sans trailing directory separators). - * Otherwise returns NULL. - */ -char *strip_path_suffix(const char *path, const char *suffix) -{ - int path_len = strlen(path), suffix_len = strlen(suffix); - - while (suffix_len) { - if (!path_len) - return NULL; - - if (is_dir_sep(path[path_len - 1])) { - if (!is_dir_sep(suffix[suffix_len - 1])) - return NULL; - path_len = chomp_trailing_dir_sep(path, path_len); - suffix_len = chomp_trailing_dir_sep(suffix, suffix_len); - } - else if (path[--path_len] != suffix[--suffix_len]) - return NULL; - } - - if (path_len && !is_dir_sep(path[path_len - 1])) - return NULL; - return xstrndup(path, chomp_trailing_dir_sep(path, path_len)); -} diff --git a/Documentation/perf_counter/perf.c b/Documentation/perf_counter/perf.c index 8d6faec..594d270 100644 --- a/Documentation/perf_counter/perf.c +++ b/Documentation/perf_counter/perf.c @@ -1,8 +1,8 @@ #include "builtin.h" -#include "exec_cmd.h" -#include "cache.h" -#include "quote.h" -#include "run-command.h" +#include "util/exec_cmd.h" +#include "util/cache.h" +#include "util/quote.h" +#include "util/run-command.h" const char perf_usage_string[] = "perf [--version] [--help] COMMAND [ARGS]"; diff --git a/Documentation/perf_counter/quote.c b/Documentation/perf_counter/quote.c deleted file mode 100644 index 7a49fcf..0000000 --- a/Documentation/perf_counter/quote.c +++ /dev/null @@ -1,478 +0,0 @@ -#include "cache.h" -#include "quote.h" - -int quote_path_fully = 1; - -/* Help to copy the thing properly quoted for the shell safety. - * any single quote is replaced with '\'', any exclamation point - * is replaced with '\!', and the whole thing is enclosed in a - * - * E.g. - * original sq_quote result - * name ==> name ==> 'name' - * a b ==> a b ==> 'a b' - * a'b ==> a'\''b ==> 'a'\''b' - * a!b ==> a'\!'b ==> 'a'\!'b' - */ -static inline int need_bs_quote(char c) -{ - return (c == '\'' || c == '!'); -} - -void sq_quote_buf(struct strbuf *dst, const char *src) -{ - char *to_free = NULL; - - if (dst->buf == src) - to_free = strbuf_detach(dst, NULL); - - strbuf_addch(dst, '\''); - while (*src) { - size_t len = strcspn(src, "'!"); - strbuf_add(dst, src, len); - src += len; - while (need_bs_quote(*src)) { - strbuf_addstr(dst, "'\\"); - strbuf_addch(dst, *src++); - strbuf_addch(dst, '\''); - } - } - strbuf_addch(dst, '\''); - free(to_free); -} - -void sq_quote_print(FILE *stream, const char *src) -{ - char c; - - fputc('\'', stream); - while ((c = *src++)) { - if (need_bs_quote(c)) { - fputs("'\\", stream); - fputc(c, stream); - fputc('\'', stream); - } else { - fputc(c, stream); - } - } - fputc('\'', stream); -} - -void sq_quote_argv(struct strbuf *dst, const char** argv, size_t maxlen) -{ - int i; - - /* Copy into destination buffer. */ - strbuf_grow(dst, 255); - for (i = 0; argv[i]; ++i) { - strbuf_addch(dst, ' '); - sq_quote_buf(dst, argv[i]); - if (maxlen && dst->len > maxlen) - die("Too many or long arguments"); - } -} - -char *sq_dequote_step(char *arg, char **next) -{ - char *dst = arg; - char *src = arg; - char c; - - if (*src != '\'') - return NULL; - for (;;) { - c = *++src; - if (!c) - return NULL; - if (c != '\'') { - *dst++ = c; - continue; - } - /* We stepped out of sq */ - switch (*++src) { - case '\0': - *dst = 0; - if (next) - *next = NULL; - return arg; - case '\\': - c = *++src; - if (need_bs_quote(c) && *++src == '\'') { - *dst++ = c; - continue; - } - /* Fallthrough */ - default: - if (!next || !isspace(*src)) - return NULL; - do { - c = *++src; - } while (isspace(c)); - *dst = 0; - *next = src; - return arg; - } - } -} - -char *sq_dequote(char *arg) -{ - return sq_dequote_step(arg, NULL); -} - -int sq_dequote_to_argv(char *arg, const char ***argv, int *nr, int *alloc) -{ - char *next = arg; - - if (!*arg) - return 0; - do { - char *dequoted = sq_dequote_step(next, &next); - if (!dequoted) - return -1; - ALLOC_GROW(*argv, *nr + 1, *alloc); - (*argv)[(*nr)++] = dequoted; - } while (next); - - return 0; -} - -/* 1 means: quote as octal - * 0 means: quote as octal if (quote_path_fully) - * -1 means: never quote - * c: quote as "\\c" - */ -#define X8(x) x, x, x, x, x, x, x, x -#define X16(x) X8(x), X8(x) -static signed char const sq_lookup[256] = { - /* 0 1 2 3 4 5 6 7 */ - /* 0x00 */ 1, 1, 1, 1, 1, 1, 1, 'a', - /* 0x08 */ 'b', 't', 'n', 'v', 'f', 'r', 1, 1, - /* 0x10 */ X16(1), - /* 0x20 */ -1, -1, '"', -1, -1, -1, -1, -1, - /* 0x28 */ X16(-1), X16(-1), X16(-1), - /* 0x58 */ -1, -1, -1, -1,'\\', -1, -1, -1, - /* 0x60 */ X16(-1), X8(-1), - /* 0x78 */ -1, -1, -1, -1, -1, -1, -1, 1, - /* 0x80 */ /* set to 0 */ -}; - -static inline int sq_must_quote(char c) -{ - return sq_lookup[(unsigned char)c] + quote_path_fully > 0; -} - -/* returns the longest prefix not needing a quote up to maxlen if positive. - This stops at the first \0 because it's marked as a character needing an - escape */ -static size_t next_quote_pos(const char *s, ssize_t maxlen) -{ - size_t len; - if (maxlen < 0) { - for (len = 0; !sq_must_quote(s[len]); len++); - } else { - for (len = 0; len < maxlen && !sq_must_quote(s[len]); len++); - } - return len; -} - -/* - * C-style name quoting. - * - * (1) if sb and fp are both NULL, inspect the input name and counts the - * number of bytes that are needed to hold c_style quoted version of name, - * counting the double quotes around it but not terminating NUL, and - * returns it. - * However, if name does not need c_style quoting, it returns 0. - * - * (2) if sb or fp are not NULL, it emits the c_style quoted version - * of name, enclosed with double quotes if asked and needed only. - * Return value is the same as in (1). - */ -static size_t quote_c_style_counted(const char *name, ssize_t maxlen, - struct strbuf *sb, FILE *fp, int no_dq) -{ -#undef EMIT -#define EMIT(c) \ - do { \ - if (sb) strbuf_addch(sb, (c)); \ - if (fp) fputc((c), fp); \ - count++; \ - } while (0) -#define EMITBUF(s, l) \ - do { \ - if (sb) strbuf_add(sb, (s), (l)); \ - if (fp) fwrite((s), (l), 1, fp); \ - count += (l); \ - } while (0) - - size_t len, count = 0; - const char *p = name; - - for (;;) { - int ch; - - len = next_quote_pos(p, maxlen); - if (len == maxlen || !p[len]) - break; - - if (!no_dq && p == name) - EMIT('"'); - - EMITBUF(p, len); - EMIT('\\'); - p += len; - ch = (unsigned char)*p++; - if (sq_lookup[ch] >= ' ') { - EMIT(sq_lookup[ch]); - } else { - EMIT(((ch >> 6) & 03) + '0'); - EMIT(((ch >> 3) & 07) + '0'); - EMIT(((ch >> 0) & 07) + '0'); - } - } - - EMITBUF(p, len); - if (p == name) /* no ending quote needed */ - return 0; - - if (!no_dq) - EMIT('"'); - return count; -} - -size_t quote_c_style(const char *name, struct strbuf *sb, FILE *fp, int nodq) -{ - return quote_c_style_counted(name, -1, sb, fp, nodq); -} - -void quote_two_c_style(struct strbuf *sb, const char *prefix, const char *path, int nodq) -{ - if (quote_c_style(prefix, NULL, NULL, 0) || - quote_c_style(path, NULL, NULL, 0)) { - if (!nodq) - strbuf_addch(sb, '"'); - quote_c_style(prefix, sb, NULL, 1); - quote_c_style(path, sb, NULL, 1); - if (!nodq) - strbuf_addch(sb, '"'); - } else { - strbuf_addstr(sb, prefix); - strbuf_addstr(sb, path); - } -} - -void write_name_quoted(const char *name, FILE *fp, int terminator) -{ - if (terminator) { - quote_c_style(name, NULL, fp, 0); - } else { - fputs(name, fp); - } - fputc(terminator, fp); -} - -extern void write_name_quotedpfx(const char *pfx, size_t pfxlen, - const char *name, FILE *fp, int terminator) -{ - int needquote = 0; - - if (terminator) { - needquote = next_quote_pos(pfx, pfxlen) < pfxlen - || name[next_quote_pos(name, -1)]; - } - if (needquote) { - fputc('"', fp); - quote_c_style_counted(pfx, pfxlen, NULL, fp, 1); - quote_c_style(name, NULL, fp, 1); - fputc('"', fp); - } else { - fwrite(pfx, pfxlen, 1, fp); - fputs(name, fp); - } - fputc(terminator, fp); -} - -/* quote path as relative to the given prefix */ -char *quote_path_relative(const char *in, int len, - struct strbuf *out, const char *prefix) -{ - int needquote; - - if (len < 0) - len = strlen(in); - - /* "../" prefix itself does not need quoting, but "in" might. */ - needquote = next_quote_pos(in, len) < len; - strbuf_setlen(out, 0); - strbuf_grow(out, len); - - if (needquote) - strbuf_addch(out, '"'); - if (prefix) { - int off = 0; - while (prefix[off] && off < len && prefix[off] == in[off]) - if (prefix[off] == '/') { - prefix += off + 1; - in += off + 1; - len -= off + 1; - off = 0; - } else - off++; - - for (; *prefix; prefix++) - if (*prefix == '/') - strbuf_addstr(out, "../"); - } - - quote_c_style_counted (in, len, out, NULL, 1); - - if (needquote) - strbuf_addch(out, '"'); - if (!out->len) - strbuf_addstr(out, "./"); - - return out->buf; -} - -/* - * C-style name unquoting. - * - * Quoted should point at the opening double quote. - * + Returns 0 if it was able to unquote the string properly, and appends the - * result in the strbuf `sb'. - * + Returns -1 in case of error, and doesn't touch the strbuf. Though note - * that this function will allocate memory in the strbuf, so calling - * strbuf_release is mandatory whichever result unquote_c_style returns. - * - * Updates endp pointer to point at one past the ending double quote if given. - */ -int unquote_c_style(struct strbuf *sb, const char *quoted, const char **endp) -{ - size_t oldlen = sb->len, len; - int ch, ac; - - if (*quoted++ != '"') - return -1; - - for (;;) { - len = strcspn(quoted, "\"\\"); - strbuf_add(sb, quoted, len); - quoted += len; - - switch (*quoted++) { - case '"': - if (endp) - *endp = quoted; - return 0; - case '\\': - break; - default: - goto error; - } - - switch ((ch = *quoted++)) { - case 'a': ch = '\a'; break; - case 'b': ch = '\b'; break; - case 'f': ch = '\f'; break; - case 'n': ch = '\n'; break; - case 'r': ch = '\r'; break; - case 't': ch = '\t'; break; - case 'v': ch = '\v'; break; - - case '\\': case '"': - break; /* verbatim */ - - /* octal values with first digit over 4 overflow */ - case '0': case '1': case '2': case '3': - ac = ((ch - '0') << 6); - if ((ch = *quoted++) < '0' || '7' < ch) - goto error; - ac |= ((ch - '0') << 3); - if ((ch = *quoted++) < '0' || '7' < ch) - goto error; - ac |= (ch - '0'); - ch = ac; - break; - default: - goto error; - } - strbuf_addch(sb, ch); - } - - error: - strbuf_setlen(sb, oldlen); - return -1; -} - -/* quoting as a string literal for other languages */ - -void perl_quote_print(FILE *stream, const char *src) -{ - const char sq = '\''; - const char bq = '\\'; - char c; - - fputc(sq, stream); - while ((c = *src++)) { - if (c == sq || c == bq) - fputc(bq, stream); - fputc(c, stream); - } - fputc(sq, stream); -} - -void python_quote_print(FILE *stream, const char *src) -{ - const char sq = '\''; - const char bq = '\\'; - const char nl = '\n'; - char c; - - fputc(sq, stream); - while ((c = *src++)) { - if (c == nl) { - fputc(bq, stream); - fputc('n', stream); - continue; - } - if (c == sq || c == bq) - fputc(bq, stream); - fputc(c, stream); - } - fputc(sq, stream); -} - -void tcl_quote_print(FILE *stream, const char *src) -{ - char c; - - fputc('"', stream); - while ((c = *src++)) { - switch (c) { - case '[': case ']': - case '{': case '}': - case '$': case '\\': case '"': - fputc('\\', stream); - default: - fputc(c, stream); - break; - case '\f': - fputs("\\f", stream); - break; - case '\r': - fputs("\\r", stream); - break; - case '\n': - fputs("\\n", stream); - break; - case '\t': - fputs("\\t", stream); - break; - case '\v': - fputs("\\v", stream); - break; - } - } - fputc('"', stream); -} diff --git a/Documentation/perf_counter/quote.h b/Documentation/perf_counter/quote.h deleted file mode 100644 index 66730f2..0000000 --- a/Documentation/perf_counter/quote.h +++ /dev/null @@ -1,68 +0,0 @@ -#ifndef QUOTE_H -#define QUOTE_H - -#include -#include - -/* Help to copy the thing properly quoted for the shell safety. - * any single quote is replaced with '\'', any exclamation point - * is replaced with '\!', and the whole thing is enclosed in a - * single quote pair. - * - * For example, if you are passing the result to system() as an - * argument: - * - * sprintf(cmd, "foobar %s %s", sq_quote(arg0), sq_quote(arg1)) - * - * would be appropriate. If the system() is going to call ssh to - * run the command on the other side: - * - * sprintf(cmd, "git-diff-tree %s %s", sq_quote(arg0), sq_quote(arg1)); - * sprintf(rcmd, "ssh %s %s", sq_quote(host), sq_quote(cmd)); - * - * Note that the above examples leak memory! Remember to free result from - * sq_quote() in a real application. - * - * sq_quote_buf() writes to an existing buffer of specified size; it - * will return the number of characters that would have been written - * excluding the final null regardless of the buffer size. - */ - -extern void sq_quote_print(FILE *stream, const char *src); - -extern void sq_quote_buf(struct strbuf *, const char *src); -extern void sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen); - -/* This unwraps what sq_quote() produces in place, but returns - * NULL if the input does not look like what sq_quote would have - * produced. - */ -extern char *sq_dequote(char *); - -/* - * Same as the above, but can be used to unwrap many arguments in the - * same string separated by space. "next" is changed to point to the - * next argument that should be passed as first parameter. When there - * is no more argument to be dequoted, "next" is updated to point to NULL. - */ -extern char *sq_dequote_step(char *arg, char **next); -extern int sq_dequote_to_argv(char *arg, const char ***argv, int *nr, int *alloc); - -extern int unquote_c_style(struct strbuf *, const char *quoted, const char **endp); -extern size_t quote_c_style(const char *name, struct strbuf *, FILE *, int no_dq); -extern void quote_two_c_style(struct strbuf *, const char *, const char *, int); - -extern void write_name_quoted(const char *name, FILE *, int terminator); -extern void write_name_quotedpfx(const char *pfx, size_t pfxlen, - const char *name, FILE *, int terminator); - -/* quote path as relative to the given prefix */ -char *quote_path_relative(const char *in, int len, - struct strbuf *out, const char *prefix); - -/* quoting as a string literal for other languages */ -extern void perl_quote_print(FILE *stream, const char *src); -extern void python_quote_print(FILE *stream, const char *src); -extern void tcl_quote_print(FILE *stream, const char *src); - -#endif diff --git a/Documentation/perf_counter/run-command.c b/Documentation/perf_counter/run-command.c deleted file mode 100644 index b2f5e85..0000000 --- a/Documentation/perf_counter/run-command.c +++ /dev/null @@ -1,395 +0,0 @@ -#include "cache.h" -#include "run-command.h" -#include "exec_cmd.h" - -static inline void close_pair(int fd[2]) -{ - close(fd[0]); - close(fd[1]); -} - -static inline void dup_devnull(int to) -{ - int fd = open("/dev/null", O_RDWR); - dup2(fd, to); - close(fd); -} - -int start_command(struct child_process *cmd) -{ - int need_in, need_out, need_err; - int fdin[2], fdout[2], fderr[2]; - - /* - * In case of errors we must keep the promise to close FDs - * that have been passed in via ->in and ->out. - */ - - need_in = !cmd->no_stdin && cmd->in < 0; - if (need_in) { - if (pipe(fdin) < 0) { - if (cmd->out > 0) - close(cmd->out); - return -ERR_RUN_COMMAND_PIPE; - } - cmd->in = fdin[1]; - } - - need_out = !cmd->no_stdout - && !cmd->stdout_to_stderr - && cmd->out < 0; - if (need_out) { - if (pipe(fdout) < 0) { - if (need_in) - close_pair(fdin); - else if (cmd->in) - close(cmd->in); - return -ERR_RUN_COMMAND_PIPE; - } - cmd->out = fdout[0]; - } - - need_err = !cmd->no_stderr && cmd->err < 0; - if (need_err) { - if (pipe(fderr) < 0) { - if (need_in) - close_pair(fdin); - else if (cmd->in) - close(cmd->in); - if (need_out) - close_pair(fdout); - else if (cmd->out) - close(cmd->out); - return -ERR_RUN_COMMAND_PIPE; - } - cmd->err = fderr[0]; - } - -#ifndef __MINGW32__ - fflush(NULL); - cmd->pid = fork(); - if (!cmd->pid) { - if (cmd->no_stdin) - dup_devnull(0); - else if (need_in) { - dup2(fdin[0], 0); - close_pair(fdin); - } else if (cmd->in) { - dup2(cmd->in, 0); - close(cmd->in); - } - - if (cmd->no_stderr) - dup_devnull(2); - else if (need_err) { - dup2(fderr[1], 2); - close_pair(fderr); - } - - if (cmd->no_stdout) - dup_devnull(1); - else if (cmd->stdout_to_stderr) - dup2(2, 1); - else if (need_out) { - dup2(fdout[1], 1); - close_pair(fdout); - } else if (cmd->out > 1) { - dup2(cmd->out, 1); - close(cmd->out); - } - - if (cmd->dir && chdir(cmd->dir)) - die("exec %s: cd to %s failed (%s)", cmd->argv[0], - cmd->dir, strerror(errno)); - if (cmd->env) { - for (; *cmd->env; cmd->env++) { - if (strchr(*cmd->env, '=')) - putenv((char*)*cmd->env); - else - unsetenv(*cmd->env); - } - } - if (cmd->preexec_cb) - cmd->preexec_cb(); - if (cmd->perf_cmd) { - execv_perf_cmd(cmd->argv); - } else { - execvp(cmd->argv[0], (char *const*) cmd->argv); - } - exit(127); - } -#else - int s0 = -1, s1 = -1, s2 = -1; /* backups of stdin, stdout, stderr */ - const char **sargv = cmd->argv; - char **env = environ; - - if (cmd->no_stdin) { - s0 = dup(0); - dup_devnull(0); - } else if (need_in) { - s0 = dup(0); - dup2(fdin[0], 0); - } else if (cmd->in) { - s0 = dup(0); - dup2(cmd->in, 0); - } - - if (cmd->no_stderr) { - s2 = dup(2); - dup_devnull(2); - } else if (need_err) { - s2 = dup(2); - dup2(fderr[1], 2); - } - - if (cmd->no_stdout) { - s1 = dup(1); - dup_devnull(1); - } else if (cmd->stdout_to_stderr) { - s1 = dup(1); - dup2(2, 1); - } else if (need_out) { - s1 = dup(1); - dup2(fdout[1], 1); - } else if (cmd->out > 1) { - s1 = dup(1); - dup2(cmd->out, 1); - } - - if (cmd->dir) - die("chdir in start_command() not implemented"); - if (cmd->env) { - env = copy_environ(); - for (; *cmd->env; cmd->env++) - env = env_setenv(env, *cmd->env); - } - - if (cmd->perf_cmd) { - cmd->argv = prepare_perf_cmd(cmd->argv); - } - - cmd->pid = mingw_spawnvpe(cmd->argv[0], cmd->argv, env); - - if (cmd->env) - free_environ(env); - if (cmd->perf_cmd) - free(cmd->argv); - - cmd->argv = sargv; - if (s0 >= 0) - dup2(s0, 0), close(s0); - if (s1 >= 0) - dup2(s1, 1), close(s1); - if (s2 >= 0) - dup2(s2, 2), close(s2); -#endif - - if (cmd->pid < 0) { - int err = errno; - if (need_in) - close_pair(fdin); - else if (cmd->in) - close(cmd->in); - if (need_out) - close_pair(fdout); - else if (cmd->out) - close(cmd->out); - if (need_err) - close_pair(fderr); - return err == ENOENT ? - -ERR_RUN_COMMAND_EXEC : - -ERR_RUN_COMMAND_FORK; - } - - if (need_in) - close(fdin[0]); - else if (cmd->in) - close(cmd->in); - - if (need_out) - close(fdout[1]); - else if (cmd->out) - close(cmd->out); - - if (need_err) - close(fderr[1]); - - return 0; -} - -static int wait_or_whine(pid_t pid) -{ - for (;;) { - int status, code; - pid_t waiting = waitpid(pid, &status, 0); - - if (waiting < 0) { - if (errno == EINTR) - continue; - error("waitpid failed (%s)", strerror(errno)); - return -ERR_RUN_COMMAND_WAITPID; - } - if (waiting != pid) - return -ERR_RUN_COMMAND_WAITPID_WRONG_PID; - if (WIFSIGNALED(status)) - return -ERR_RUN_COMMAND_WAITPID_SIGNAL; - - if (!WIFEXITED(status)) - return -ERR_RUN_COMMAND_WAITPID_NOEXIT; - code = WEXITSTATUS(status); - switch (code) { - case 127: - return -ERR_RUN_COMMAND_EXEC; - case 0: - return 0; - default: - return -code; - } - } -} - -int finish_command(struct child_process *cmd) -{ - return wait_or_whine(cmd->pid); -} - -int run_command(struct child_process *cmd) -{ - int code = start_command(cmd); - if (code) - return code; - return finish_command(cmd); -} - -static void prepare_run_command_v_opt(struct child_process *cmd, - const char **argv, - int opt) -{ - memset(cmd, 0, sizeof(*cmd)); - cmd->argv = argv; - cmd->no_stdin = opt & RUN_COMMAND_NO_STDIN ? 1 : 0; - cmd->perf_cmd = opt & RUN_PERF_CMD ? 1 : 0; - cmd->stdout_to_stderr = opt & RUN_COMMAND_STDOUT_TO_STDERR ? 1 : 0; -} - -int run_command_v_opt(const char **argv, int opt) -{ - struct child_process cmd; - prepare_run_command_v_opt(&cmd, argv, opt); - return run_command(&cmd); -} - -int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const char *const *env) -{ - struct child_process cmd; - prepare_run_command_v_opt(&cmd, argv, opt); - cmd.dir = dir; - cmd.env = env; - return run_command(&cmd); -} - -#ifdef __MINGW32__ -static __stdcall unsigned run_thread(void *data) -{ - struct async *async = data; - return async->proc(async->fd_for_proc, async->data); -} -#endif - -int start_async(struct async *async) -{ - int pipe_out[2]; - - if (pipe(pipe_out) < 0) - return error("cannot create pipe: %s", strerror(errno)); - async->out = pipe_out[0]; - -#ifndef __MINGW32__ - /* Flush stdio before fork() to avoid cloning buffers */ - fflush(NULL); - - async->pid = fork(); - if (async->pid < 0) { - error("fork (async) failed: %s", strerror(errno)); - close_pair(pipe_out); - return -1; - } - if (!async->pid) { - close(pipe_out[0]); - exit(!!async->proc(pipe_out[1], async->data)); - } - close(pipe_out[1]); -#else - async->fd_for_proc = pipe_out[1]; - async->tid = (HANDLE) _beginthreadex(NULL, 0, run_thread, async, 0, NULL); - if (!async->tid) { - error("cannot create thread: %s", strerror(errno)); - close_pair(pipe_out); - return -1; - } -#endif - return 0; -} - -int finish_async(struct async *async) -{ -#ifndef __MINGW32__ - int ret = 0; - - if (wait_or_whine(async->pid)) - ret = error("waitpid (async) failed"); -#else - DWORD ret = 0; - if (WaitForSingleObject(async->tid, INFINITE) != WAIT_OBJECT_0) - ret = error("waiting for thread failed: %lu", GetLastError()); - else if (!GetExitCodeThread(async->tid, &ret)) - ret = error("cannot get thread exit code: %lu", GetLastError()); - CloseHandle(async->tid); -#endif - return ret; -} - -int run_hook(const char *index_file, const char *name, ...) -{ - struct child_process hook; - const char **argv = NULL, *env[2]; - char index[PATH_MAX]; - va_list args; - int ret; - size_t i = 0, alloc = 0; - - if (access(perf_path("hooks/%s", name), X_OK) < 0) - return 0; - - va_start(args, name); - ALLOC_GROW(argv, i + 1, alloc); - argv[i++] = perf_path("hooks/%s", name); - while (argv[i-1]) { - ALLOC_GROW(argv, i + 1, alloc); - argv[i++] = va_arg(args, const char *); - } - va_end(args); - - memset(&hook, 0, sizeof(hook)); - hook.argv = argv; - hook.no_stdin = 1; - hook.stdout_to_stderr = 1; - if (index_file) { - snprintf(index, sizeof(index), "PERF_INDEX_FILE=%s", index_file); - env[0] = index; - env[1] = NULL; - hook.env = env; - } - - ret = start_command(&hook); - free(argv); - if (ret) { - warning("Could not spawn %s", argv[0]); - return ret; - } - ret = finish_command(&hook); - if (ret == -ERR_RUN_COMMAND_WAITPID_SIGNAL) - warning("%s exited due to uncaught signal", argv[0]); - - return ret; -} diff --git a/Documentation/perf_counter/run-command.h b/Documentation/perf_counter/run-command.h deleted file mode 100644 index 328289f..0000000 --- a/Documentation/perf_counter/run-command.h +++ /dev/null @@ -1,93 +0,0 @@ -#ifndef RUN_COMMAND_H -#define RUN_COMMAND_H - -enum { - ERR_RUN_COMMAND_FORK = 10000, - ERR_RUN_COMMAND_EXEC, - ERR_RUN_COMMAND_PIPE, - ERR_RUN_COMMAND_WAITPID, - ERR_RUN_COMMAND_WAITPID_WRONG_PID, - ERR_RUN_COMMAND_WAITPID_SIGNAL, - ERR_RUN_COMMAND_WAITPID_NOEXIT, -}; -#define IS_RUN_COMMAND_ERR(x) (-(x) >= ERR_RUN_COMMAND_FORK) - -struct child_process { - const char **argv; - pid_t pid; - /* - * Using .in, .out, .err: - * - Specify 0 for no redirections (child inherits stdin, stdout, - * stderr from parent). - * - Specify -1 to have a pipe allocated as follows: - * .in: returns the writable pipe end; parent writes to it, - * the readable pipe end becomes child's stdin - * .out, .err: returns the readable pipe end; parent reads from - * it, the writable pipe end becomes child's stdout/stderr - * The caller of start_command() must close the returned FDs - * after it has completed reading from/writing to it! - * - Specify > 0 to set a channel to a particular FD as follows: - * .in: a readable FD, becomes child's stdin - * .out: a writable FD, becomes child's stdout/stderr - * .err > 0 not supported - * The specified FD is closed by start_command(), even in case - * of errors! - */ - int in; - int out; - int err; - const char *dir; - const char *const *env; - unsigned no_stdin:1; - unsigned no_stdout:1; - unsigned no_stderr:1; - unsigned perf_cmd:1; /* if this is to be perf sub-command */ - unsigned stdout_to_stderr:1; - void (*preexec_cb)(void); -}; - -int start_command(struct child_process *); -int finish_command(struct child_process *); -int run_command(struct child_process *); - -extern int run_hook(const char *index_file, const char *name, ...); - -#define RUN_COMMAND_NO_STDIN 1 -#define RUN_PERF_CMD 2 /*If this is to be perf sub-command */ -#define RUN_COMMAND_STDOUT_TO_STDERR 4 -int run_command_v_opt(const char **argv, int opt); - -/* - * env (the environment) is to be formatted like environ: "VAR=VALUE". - * To unset an environment variable use just "VAR". - */ -int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const char *const *env); - -/* - * The purpose of the following functions is to feed a pipe by running - * a function asynchronously and providing output that the caller reads. - * - * It is expected that no synchronization and mutual exclusion between - * the caller and the feed function is necessary so that the function - * can run in a thread without interfering with the caller. - */ -struct async { - /* - * proc writes to fd and closes it; - * returns 0 on success, non-zero on failure - */ - int (*proc)(int fd, void *data); - void *data; - int out; /* caller reads from here and closes it */ -#ifndef __MINGW32__ - pid_t pid; -#else - HANDLE tid; - int fd_for_proc; -#endif -}; - -int start_async(struct async *async); -int finish_async(struct async *async); - -#endif diff --git a/Documentation/perf_counter/strbuf.c b/Documentation/perf_counter/strbuf.c deleted file mode 100644 index eaba093..0000000 --- a/Documentation/perf_counter/strbuf.c +++ /dev/null @@ -1,359 +0,0 @@ -#include "cache.h" - -int prefixcmp(const char *str, const char *prefix) -{ - for (; ; str++, prefix++) - if (!*prefix) - return 0; - else if (*str != *prefix) - return (unsigned char)*prefix - (unsigned char)*str; -} - -/* - * Used as the default ->buf value, so that people can always assume - * buf is non NULL and ->buf is NUL terminated even for a freshly - * initialized strbuf. - */ -char strbuf_slopbuf[1]; - -void strbuf_init(struct strbuf *sb, size_t hint) -{ - sb->alloc = sb->len = 0; - sb->buf = strbuf_slopbuf; - if (hint) - strbuf_grow(sb, hint); -} - -void strbuf_release(struct strbuf *sb) -{ - if (sb->alloc) { - free(sb->buf); - strbuf_init(sb, 0); - } -} - -char *strbuf_detach(struct strbuf *sb, size_t *sz) -{ - char *res = sb->alloc ? sb->buf : NULL; - if (sz) - *sz = sb->len; - strbuf_init(sb, 0); - return res; -} - -void strbuf_attach(struct strbuf *sb, void *buf, size_t len, size_t alloc) -{ - strbuf_release(sb); - sb->buf = buf; - sb->len = len; - sb->alloc = alloc; - strbuf_grow(sb, 0); - sb->buf[sb->len] = '\0'; -} - -void strbuf_grow(struct strbuf *sb, size_t extra) -{ - if (sb->len + extra + 1 <= sb->len) - die("you want to use way too much memory"); - if (!sb->alloc) - sb->buf = NULL; - ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc); -} - -void strbuf_trim(struct strbuf *sb) -{ - char *b = sb->buf; - while (sb->len > 0 && isspace((unsigned char)sb->buf[sb->len - 1])) - sb->len--; - while (sb->len > 0 && isspace(*b)) { - b++; - sb->len--; - } - memmove(sb->buf, b, sb->len); - sb->buf[sb->len] = '\0'; -} -void strbuf_rtrim(struct strbuf *sb) -{ - while (sb->len > 0 && isspace((unsigned char)sb->buf[sb->len - 1])) - sb->len--; - sb->buf[sb->len] = '\0'; -} - -void strbuf_ltrim(struct strbuf *sb) -{ - char *b = sb->buf; - while (sb->len > 0 && isspace(*b)) { - b++; - sb->len--; - } - memmove(sb->buf, b, sb->len); - sb->buf[sb->len] = '\0'; -} - -void strbuf_tolower(struct strbuf *sb) -{ - int i; - for (i = 0; i < sb->len; i++) - sb->buf[i] = tolower(sb->buf[i]); -} - -struct strbuf **strbuf_split(const struct strbuf *sb, int delim) -{ - int alloc = 2, pos = 0; - char *n, *p; - struct strbuf **ret; - struct strbuf *t; - - ret = calloc(alloc, sizeof(struct strbuf *)); - p = n = sb->buf; - while (n < sb->buf + sb->len) { - int len; - n = memchr(n, delim, sb->len - (n - sb->buf)); - if (pos + 1 >= alloc) { - alloc = alloc * 2; - ret = realloc(ret, sizeof(struct strbuf *) * alloc); - } - if (!n) - n = sb->buf + sb->len - 1; - len = n - p + 1; - t = malloc(sizeof(struct strbuf)); - strbuf_init(t, len); - strbuf_add(t, p, len); - ret[pos] = t; - ret[++pos] = NULL; - p = ++n; - } - return ret; -} - -void strbuf_list_free(struct strbuf **sbs) -{ - struct strbuf **s = sbs; - - while (*s) { - strbuf_release(*s); - free(*s++); - } - free(sbs); -} - -int strbuf_cmp(const struct strbuf *a, const struct strbuf *b) -{ - int len = a->len < b->len ? a->len: b->len; - int cmp = memcmp(a->buf, b->buf, len); - if (cmp) - return cmp; - return a->len < b->len ? -1: a->len != b->len; -} - -void strbuf_splice(struct strbuf *sb, size_t pos, size_t len, - const void *data, size_t dlen) -{ - if (pos + len < pos) - die("you want to use way too much memory"); - if (pos > sb->len) - die("`pos' is too far after the end of the buffer"); - if (pos + len > sb->len) - die("`pos + len' is too far after the end of the buffer"); - - if (dlen >= len) - strbuf_grow(sb, dlen - len); - memmove(sb->buf + pos + dlen, - sb->buf + pos + len, - sb->len - pos - len); - memcpy(sb->buf + pos, data, dlen); - strbuf_setlen(sb, sb->len + dlen - len); -} - -void strbuf_insert(struct strbuf *sb, size_t pos, const void *data, size_t len) -{ - strbuf_splice(sb, pos, 0, data, len); -} - -void strbuf_remove(struct strbuf *sb, size_t pos, size_t len) -{ - strbuf_splice(sb, pos, len, NULL, 0); -} - -void strbuf_add(struct strbuf *sb, const void *data, size_t len) -{ - strbuf_grow(sb, len); - memcpy(sb->buf + sb->len, data, len); - strbuf_setlen(sb, sb->len + len); -} - -void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len) -{ - strbuf_grow(sb, len); - memcpy(sb->buf + sb->len, sb->buf + pos, len); - strbuf_setlen(sb, sb->len + len); -} - -void strbuf_addf(struct strbuf *sb, const char *fmt, ...) -{ - int len; - va_list ap; - - if (!strbuf_avail(sb)) - strbuf_grow(sb, 64); - va_start(ap, fmt); - len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap); - va_end(ap); - if (len < 0) - die("your vsnprintf is broken"); - if (len > strbuf_avail(sb)) { - strbuf_grow(sb, len); - va_start(ap, fmt); - len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap); - va_end(ap); - if (len > strbuf_avail(sb)) { - die("this should not happen, your snprintf is broken"); - } - } - strbuf_setlen(sb, sb->len + len); -} - -void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn, - void *context) -{ - for (;;) { - const char *percent; - size_t consumed; - - percent = strchrnul(format, '%'); - strbuf_add(sb, format, percent - format); - if (!*percent) - break; - format = percent + 1; - - consumed = fn(sb, format, context); - if (consumed) - format += consumed; - else - strbuf_addch(sb, '%'); - } -} - -size_t strbuf_expand_dict_cb(struct strbuf *sb, const char *placeholder, - void *context) -{ - struct strbuf_expand_dict_entry *e = context; - size_t len; - - for (; e->placeholder && (len = strlen(e->placeholder)); e++) { - if (!strncmp(placeholder, e->placeholder, len)) { - if (e->value) - strbuf_addstr(sb, e->value); - return len; - } - } - return 0; -} - -size_t strbuf_fread(struct strbuf *sb, size_t size, FILE *f) -{ - size_t res; - size_t oldalloc = sb->alloc; - - strbuf_grow(sb, size); - res = fread(sb->buf + sb->len, 1, size, f); - if (res > 0) - strbuf_setlen(sb, sb->len + res); - else if (res < 0 && oldalloc == 0) - strbuf_release(sb); - return res; -} - -ssize_t strbuf_read(struct strbuf *sb, int fd, size_t hint) -{ - size_t oldlen = sb->len; - size_t oldalloc = sb->alloc; - - strbuf_grow(sb, hint ? hint : 8192); - for (;;) { - ssize_t cnt; - - cnt = read(fd, sb->buf + sb->len, sb->alloc - sb->len - 1); - if (cnt < 0) { - if (oldalloc == 0) - strbuf_release(sb); - else - strbuf_setlen(sb, oldlen); - return -1; - } - if (!cnt) - break; - sb->len += cnt; - strbuf_grow(sb, 8192); - } - - sb->buf[sb->len] = '\0'; - return sb->len - oldlen; -} - -#define STRBUF_MAXLINK (2*PATH_MAX) - -int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint) -{ - size_t oldalloc = sb->alloc; - - if (hint < 32) - hint = 32; - - while (hint < STRBUF_MAXLINK) { - int len; - - strbuf_grow(sb, hint); - len = readlink(path, sb->buf, hint); - if (len < 0) { - if (errno != ERANGE) - break; - } else if (len < hint) { - strbuf_setlen(sb, len); - return 0; - } - - /* .. the buffer was too small - try again */ - hint *= 2; - } - if (oldalloc == 0) - strbuf_release(sb); - return -1; -} - -int strbuf_getline(struct strbuf *sb, FILE *fp, int term) -{ - int ch; - - strbuf_grow(sb, 0); - if (feof(fp)) - return EOF; - - strbuf_reset(sb); - while ((ch = fgetc(fp)) != EOF) { - if (ch == term) - break; - strbuf_grow(sb, 1); - sb->buf[sb->len++] = ch; - } - if (ch == EOF && sb->len == 0) - return EOF; - - sb->buf[sb->len] = '\0'; - return 0; -} - -int strbuf_read_file(struct strbuf *sb, const char *path, size_t hint) -{ - int fd, len; - - fd = open(path, O_RDONLY); - if (fd < 0) - return -1; - len = strbuf_read(sb, fd, hint); - close(fd); - if (len < 0) - return -1; - - return len; -} diff --git a/Documentation/perf_counter/strbuf.h b/Documentation/perf_counter/strbuf.h deleted file mode 100644 index 9ee908a..0000000 --- a/Documentation/perf_counter/strbuf.h +++ /dev/null @@ -1,137 +0,0 @@ -#ifndef STRBUF_H -#define STRBUF_H - -/* - * Strbuf's can be use in many ways: as a byte array, or to store arbitrary - * long, overflow safe strings. - * - * Strbufs has some invariants that are very important to keep in mind: - * - * 1. the ->buf member is always malloc-ed, hence strbuf's can be used to - * build complex strings/buffers whose final size isn't easily known. - * - * It is NOT legal to copy the ->buf pointer away. - * `strbuf_detach' is the operation that detachs a buffer from its shell - * while keeping the shell valid wrt its invariants. - * - * 2. the ->buf member is a byte array that has at least ->len + 1 bytes - * allocated. The extra byte is used to store a '\0', allowing the ->buf - * member to be a valid C-string. Every strbuf function ensure this - * invariant is preserved. - * - * Note that it is OK to "play" with the buffer directly if you work it - * that way: - * - * strbuf_grow(sb, SOME_SIZE); - * ... Here, the memory array starting at sb->buf, and of length - * ... strbuf_avail(sb) is all yours, and you are sure that - * ... strbuf_avail(sb) is at least SOME_SIZE. - * strbuf_setlen(sb, sb->len + SOME_OTHER_SIZE); - * - * Of course, SOME_OTHER_SIZE must be smaller or equal to strbuf_avail(sb). - * - * Doing so is safe, though if it has to be done in many places, adding the - * missing API to the strbuf module is the way to go. - * - * XXX: do _not_ assume that the area that is yours is of size ->alloc - 1 - * even if it's true in the current implementation. Alloc is somehow a - * "private" member that should not be messed with. - */ - -#include - -extern char strbuf_slopbuf[]; -struct strbuf { - size_t alloc; - size_t len; - char *buf; -}; - -#define STRBUF_INIT { 0, 0, strbuf_slopbuf } - -/*----- strbuf life cycle -----*/ -extern void strbuf_init(struct strbuf *, size_t); -extern void strbuf_release(struct strbuf *); -extern char *strbuf_detach(struct strbuf *, size_t *); -extern void strbuf_attach(struct strbuf *, void *, size_t, size_t); -static inline void strbuf_swap(struct strbuf *a, struct strbuf *b) { - struct strbuf tmp = *a; - *a = *b; - *b = tmp; -} - -/*----- strbuf size related -----*/ -static inline size_t strbuf_avail(const struct strbuf *sb) { - return sb->alloc ? sb->alloc - sb->len - 1 : 0; -} - -extern void strbuf_grow(struct strbuf *, size_t); - -static inline void strbuf_setlen(struct strbuf *sb, size_t len) { - if (!sb->alloc) - strbuf_grow(sb, 0); - assert(len < sb->alloc); - sb->len = len; - sb->buf[len] = '\0'; -} -#define strbuf_reset(sb) strbuf_setlen(sb, 0) - -/*----- content related -----*/ -extern void strbuf_trim(struct strbuf *); -extern void strbuf_rtrim(struct strbuf *); -extern void strbuf_ltrim(struct strbuf *); -extern int strbuf_cmp(const struct strbuf *, const struct strbuf *); -extern void strbuf_tolower(struct strbuf *); - -extern struct strbuf **strbuf_split(const struct strbuf *, int delim); -extern void strbuf_list_free(struct strbuf **); - -/*----- add data in your buffer -----*/ -static inline void strbuf_addch(struct strbuf *sb, int c) { - strbuf_grow(sb, 1); - sb->buf[sb->len++] = c; - sb->buf[sb->len] = '\0'; -} - -extern void strbuf_insert(struct strbuf *, size_t pos, const void *, size_t); -extern void strbuf_remove(struct strbuf *, size_t pos, size_t len); - -/* splice pos..pos+len with given data */ -extern void strbuf_splice(struct strbuf *, size_t pos, size_t len, - const void *, size_t); - -extern void strbuf_add(struct strbuf *, const void *, size_t); -static inline void strbuf_addstr(struct strbuf *sb, const char *s) { - strbuf_add(sb, s, strlen(s)); -} -static inline void strbuf_addbuf(struct strbuf *sb, const struct strbuf *sb2) { - strbuf_add(sb, sb2->buf, sb2->len); -} -extern void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len); - -typedef size_t (*expand_fn_t) (struct strbuf *sb, const char *placeholder, void *context); -extern void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn, void *context); -struct strbuf_expand_dict_entry { - const char *placeholder; - const char *value; -}; -extern size_t strbuf_expand_dict_cb(struct strbuf *sb, const char *placeholder, void *context); - -__attribute__((format(printf,2,3))) -extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...); - -extern size_t strbuf_fread(struct strbuf *, size_t, FILE *); -/* XXX: if read fails, any partial read is undone */ -extern ssize_t strbuf_read(struct strbuf *, int fd, size_t hint); -extern int strbuf_read_file(struct strbuf *sb, const char *path, size_t hint); -extern int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint); - -extern int strbuf_getline(struct strbuf *, FILE *, int); - -extern void stripspace(struct strbuf *buf, int skip_comments); -extern int launch_editor(const char *path, struct strbuf *buffer, const char *const *env); - -extern int strbuf_branchname(struct strbuf *sb, const char *name); -extern int strbuf_check_branch_ref(struct strbuf *sb, const char *name); - -#endif /* STRBUF_H */ diff --git a/Documentation/perf_counter/usage.c b/Documentation/perf_counter/usage.c deleted file mode 100644 index 7a10421..0000000 --- a/Documentation/perf_counter/usage.c +++ /dev/null @@ -1,80 +0,0 @@ -/* - * GIT - The information manager from hell - * - * Copyright (C) Linus Torvalds, 2005 - */ -#include "util.h" - -static void report(const char *prefix, const char *err, va_list params) -{ - char msg[1024]; - vsnprintf(msg, sizeof(msg), err, params); - fprintf(stderr, "%s%s\n", prefix, msg); -} - -static NORETURN void usage_builtin(const char *err) -{ - fprintf(stderr, "usage: %s\n", err); - exit(129); -} - -static NORETURN void die_builtin(const char *err, va_list params) -{ - report("fatal: ", err, params); - exit(128); -} - -static void error_builtin(const char *err, va_list params) -{ - report("error: ", err, params); -} - -static void warn_builtin(const char *warn, va_list params) -{ - report("warning: ", warn, params); -} - -/* If we are in a dlopen()ed .so write to a global variable would segfault - * (ugh), so keep things static. */ -static void (*usage_routine)(const char *err) NORETURN = usage_builtin; -static void (*die_routine)(const char *err, va_list params) NORETURN = die_builtin; -static void (*error_routine)(const char *err, va_list params) = error_builtin; -static void (*warn_routine)(const char *err, va_list params) = warn_builtin; - -void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN) -{ - die_routine = routine; -} - -void usage(const char *err) -{ - usage_routine(err); -} - -void die(const char *err, ...) -{ - va_list params; - - va_start(params, err); - die_routine(err, params); - va_end(params); -} - -int error(const char *err, ...) -{ - va_list params; - - va_start(params, err); - error_routine(err, params); - va_end(params); - return -1; -} - -void warning(const char *warn, ...) -{ - va_list params; - - va_start(params, warn); - warn_routine(warn, params); - va_end(params); -} diff --git a/Documentation/perf_counter/util.h b/Documentation/perf_counter/util.h deleted file mode 100644 index 36e40c3..0000000 --- a/Documentation/perf_counter/util.h +++ /dev/null @@ -1,408 +0,0 @@ -#ifndef GIT_COMPAT_UTIL_H -#define GIT_COMPAT_UTIL_H - -#define _FILE_OFFSET_BITS 64 - -#ifndef FLEX_ARRAY -/* - * See if our compiler is known to support flexible array members. - */ -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) -# define FLEX_ARRAY /* empty */ -#elif defined(__GNUC__) -# if (__GNUC__ >= 3) -# define FLEX_ARRAY /* empty */ -# else -# define FLEX_ARRAY 0 /* older GNU extension */ -# endif -#endif - -/* - * Otherwise, default to safer but a bit wasteful traditional style - */ -#ifndef FLEX_ARRAY -# define FLEX_ARRAY 1 -#endif -#endif - -#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) - -#ifdef __GNUC__ -#define TYPEOF(x) (__typeof__(x)) -#else -#define TYPEOF(x) -#endif - -#define MSB(x, bits) ((x) & TYPEOF(x)(~0ULL << (sizeof(x) * 8 - (bits)))) -#define HAS_MULTI_BITS(i) ((i) & ((i) - 1)) /* checks if an integer has more than 1 bit set */ - -/* Approximation of the length of the decimal representation of this type. */ -#define decimal_length(x) ((int)(sizeof(x) * 2.56 + 0.5) + 1) - -#if !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(__USLC__) && !defined(_M_UNIX) -#define _XOPEN_SOURCE 600 /* glibc2 and AIX 5.3L need 500, OpenBSD needs 600 for S_ISLNK() */ -#define _XOPEN_SOURCE_EXTENDED 1 /* AIX 5.3L needs this */ -#endif -#define _ALL_SOURCE 1 -#define _GNU_SOURCE 1 -#define _BSD_SOURCE 1 - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifndef __MINGW32__ -#include -#include -#include -#include -#ifndef NO_SYS_SELECT_H -#include -#endif -#include -#include -#include -#include -#include -#include -#if defined(__CYGWIN__) -#undef _XOPEN_SOURCE -#include -#define _XOPEN_SOURCE 600 -#include "compat/cygwin.h" -#else -#undef _ALL_SOURCE /* AIX 5.3L defines a struct list with _ALL_SOURCE. */ -#include -#define _ALL_SOURCE 1 -#endif -#else /* __MINGW32__ */ -/* pull in Windows compatibility stuff */ -#include "compat/mingw.h" -#endif /* __MINGW32__ */ - -#ifndef NO_ICONV -#include -#endif - -#ifndef NO_OPENSSL -#include -#include -#endif - -/* On most systems would have given us this, but - * not on some systems (e.g. GNU/Hurd). - */ -#ifndef PATH_MAX -#define PATH_MAX 4096 -#endif - -#ifndef PRIuMAX -#define PRIuMAX "llu" -#endif - -#ifndef PRIu32 -#define PRIu32 "u" -#endif - -#ifndef PRIx32 -#define PRIx32 "x" -#endif - -#ifndef PATH_SEP -#define PATH_SEP ':' -#endif - -#ifndef STRIP_EXTENSION -#define STRIP_EXTENSION "" -#endif - -#ifndef has_dos_drive_prefix -#define has_dos_drive_prefix(path) 0 -#endif - -#ifndef is_dir_sep -#define is_dir_sep(c) ((c) == '/') -#endif - -#ifdef __GNUC__ -#define NORETURN __attribute__((__noreturn__)) -#else -#define NORETURN -#ifndef __attribute__ -#define __attribute__(x) -#endif -#endif - -/* General helper functions */ -extern void usage(const char *err) NORETURN; -extern void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, 2))); -extern int error(const char *err, ...) __attribute__((format (printf, 1, 2))); -extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2))); - -extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN); - -extern int prefixcmp(const char *str, const char *prefix); -extern time_t tm_to_time_t(const struct tm *tm); - -static inline const char *skip_prefix(const char *str, const char *prefix) -{ - size_t len = strlen(prefix); - return strncmp(str, prefix, len) ? NULL : str + len; -} - -#if defined(NO_MMAP) || defined(USE_WIN32_MMAP) - -#ifndef PROT_READ -#define PROT_READ 1 -#define PROT_WRITE 2 -#define MAP_PRIVATE 1 -#define MAP_FAILED ((void*)-1) -#endif - -#define mmap git_mmap -#define munmap git_munmap -extern void *git_mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset); -extern int git_munmap(void *start, size_t length); - -#else /* NO_MMAP || USE_WIN32_MMAP */ - -#include - -#endif /* NO_MMAP || USE_WIN32_MMAP */ - -#ifdef NO_MMAP - -/* This value must be multiple of (pagesize * 2) */ -#define DEFAULT_PACKED_GIT_WINDOW_SIZE (1 * 1024 * 1024) - -#else /* NO_MMAP */ - -/* This value must be multiple of (pagesize * 2) */ -#define DEFAULT_PACKED_GIT_WINDOW_SIZE \ - (sizeof(void*) >= 8 \ - ? 1 * 1024 * 1024 * 1024 \ - : 32 * 1024 * 1024) - -#endif /* NO_MMAP */ - -#ifdef NO_ST_BLOCKS_IN_STRUCT_STAT -#define on_disk_bytes(st) ((st).st_size) -#else -#define on_disk_bytes(st) ((st).st_blocks * 512) -#endif - -#define DEFAULT_PACKED_GIT_LIMIT \ - ((1024L * 1024L) * (sizeof(void*) >= 8 ? 8192 : 256)) - -#ifdef NO_PREAD -#define pread git_pread -extern ssize_t git_pread(int fd, void *buf, size_t count, off_t offset); -#endif -/* - * Forward decl that will remind us if its twin in cache.h changes. - * This function is used in compat/pread.c. But we can't include - * cache.h there. - */ -extern ssize_t read_in_full(int fd, void *buf, size_t count); - -#ifdef NO_SETENV -#define setenv gitsetenv -extern int gitsetenv(const char *, const char *, int); -#endif - -#ifdef NO_MKDTEMP -#define mkdtemp gitmkdtemp -extern char *gitmkdtemp(char *); -#endif - -#ifdef NO_UNSETENV -#define unsetenv gitunsetenv -extern void gitunsetenv(const char *); -#endif - -#ifdef NO_STRCASESTR -#define strcasestr gitstrcasestr -extern char *gitstrcasestr(const char *haystack, const char *needle); -#endif - -#ifdef NO_STRLCPY -#define strlcpy gitstrlcpy -extern size_t gitstrlcpy(char *, const char *, size_t); -#endif - -#ifdef NO_STRTOUMAX -#define strtoumax gitstrtoumax -extern uintmax_t gitstrtoumax(const char *, char **, int); -#endif - -#ifdef NO_HSTRERROR -#define hstrerror githstrerror -extern const char *githstrerror(int herror); -#endif - -#ifdef NO_MEMMEM -#define memmem gitmemmem -void *gitmemmem(const void *haystack, size_t haystacklen, - const void *needle, size_t needlelen); -#endif - -#ifdef FREAD_READS_DIRECTORIES -#ifdef fopen -#undef fopen -#endif -#define fopen(a,b) git_fopen(a,b) -extern FILE *git_fopen(const char*, const char*); -#endif - -#ifdef SNPRINTF_RETURNS_BOGUS -#define snprintf git_snprintf -extern int git_snprintf(char *str, size_t maxsize, - const char *format, ...); -#define vsnprintf git_vsnprintf -extern int git_vsnprintf(char *str, size_t maxsize, - const char *format, va_list ap); -#endif - -#ifdef __GLIBC_PREREQ -#if __GLIBC_PREREQ(2, 1) -#define HAVE_STRCHRNUL -#endif -#endif - -#ifndef HAVE_STRCHRNUL -#define strchrnul gitstrchrnul -static inline char *gitstrchrnul(const char *s, int c) -{ - while (*s && *s != c) - s++; - return (char *)s; -} -#endif - -/* - * Wrappers: - */ -extern char *xstrdup(const char *str); -extern void *xmalloc(size_t size); -extern void *xmemdupz(const void *data, size_t len); -extern char *xstrndup(const char *str, size_t len); -extern void *xrealloc(void *ptr, size_t size); -extern void *xcalloc(size_t nmemb, size_t size); -extern void *xmmap(void *start, size_t length, int prot, int flags, int fd, off_t offset); -extern ssize_t xread(int fd, void *buf, size_t len); -extern ssize_t xwrite(int fd, const void *buf, size_t len); -extern int xdup(int fd); -extern FILE *xfdopen(int fd, const char *mode); -static inline size_t xsize_t(off_t len) -{ - return (size_t)len; -} - -static inline int has_extension(const char *filename, const char *ext) -{ - size_t len = strlen(filename); - size_t extlen = strlen(ext); - return len > extlen && !memcmp(filename + len - extlen, ext, extlen); -} - -/* Sane ctype - no locale, and works with signed chars */ -#undef isascii -#undef isspace -#undef isdigit -#undef isalpha -#undef isalnum -#undef tolower -#undef toupper -extern unsigned char sane_ctype[256]; -#define GIT_SPACE 0x01 -#define GIT_DIGIT 0x02 -#define GIT_ALPHA 0x04 -#define GIT_GLOB_SPECIAL 0x08 -#define GIT_REGEX_SPECIAL 0x10 -#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0) -#define isascii(x) (((x) & ~0x7f) == 0) -#define isspace(x) sane_istest(x,GIT_SPACE) -#define isdigit(x) sane_istest(x,GIT_DIGIT) -#define isalpha(x) sane_istest(x,GIT_ALPHA) -#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) -#define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL) -#define is_regex_special(x) sane_istest(x,GIT_GLOB_SPECIAL | GIT_REGEX_SPECIAL) -#define tolower(x) sane_case((unsigned char)(x), 0x20) -#define toupper(x) sane_case((unsigned char)(x), 0) - -static inline int sane_case(int x, int high) -{ - if (sane_istest(x, GIT_ALPHA)) - x = (x & ~0x20) | high; - return x; -} - -static inline int strtoul_ui(char const *s, int base, unsigned int *result) -{ - unsigned long ul; - char *p; - - errno = 0; - ul = strtoul(s, &p, base); - if (errno || *p || p == s || (unsigned int) ul != ul) - return -1; - *result = ul; - return 0; -} - -static inline int strtol_i(char const *s, int base, int *result) -{ - long ul; - char *p; - - errno = 0; - ul = strtol(s, &p, base); - if (errno || *p || p == s || (int) ul != ul) - return -1; - *result = ul; - return 0; -} - -#ifdef INTERNAL_QSORT -void git_qsort(void *base, size_t nmemb, size_t size, - int(*compar)(const void *, const void *)); -#define qsort git_qsort -#endif - -#ifndef DIR_HAS_BSD_GROUP_SEMANTICS -# define FORCE_DIR_SET_GID S_ISGID -#else -# define FORCE_DIR_SET_GID 0 -#endif - -#ifdef NO_NSEC -#undef USE_NSEC -#define ST_CTIME_NSEC(st) 0 -#define ST_MTIME_NSEC(st) 0 -#else -#ifdef USE_ST_TIMESPEC -#define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctimespec.tv_nsec)) -#define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtimespec.tv_nsec)) -#else -#define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctim.tv_nsec)) -#define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtim.tv_nsec)) -#endif -#endif - -#endif diff --git a/Documentation/perf_counter/util/PERF-VERSION-GEN b/Documentation/perf_counter/util/PERF-VERSION-GEN new file mode 100755 index 0000000..c561d15 --- /dev/null +++ b/Documentation/perf_counter/util/PERF-VERSION-GEN @@ -0,0 +1,42 @@ +#!/bin/sh + +GVF=PERF-VERSION-FILE +DEF_VER=v0.0.1.PERF + +LF=' +' + +# First see if there is a version file (included in release tarballs), +# then try git-describe, then default. +if test -f version +then + VN=$(cat version) || VN="$DEF_VER" +elif test -d .git -o -f .git && + VN=$(git describe --abbrev=4 HEAD 2>/dev/null) && + case "$VN" in + *$LF*) (exit 1) ;; + v[0-9]*) + git update-index -q --refresh + test -z "$(git diff-index --name-only HEAD --)" || + VN="$VN-dirty" ;; + esac +then + VN=$(echo "$VN" | sed -e 's/-/./g'); +else + VN="$DEF_VER" +fi + +VN=$(expr "$VN" : v*'\(.*\)') + +if test -r $GVF +then + VC=$(sed -e 's/^PERF_VERSION = //' <$GVF) +else + VC=unset +fi +test "$VN" = "$VC" || { + echo >&2 "PERF_VERSION = $VN" + echo "PERF_VERSION = $VN" >$GVF +} + + diff --git a/Documentation/perf_counter/util/abspath.c b/Documentation/perf_counter/util/abspath.c new file mode 100644 index 0000000..649f34f --- /dev/null +++ b/Documentation/perf_counter/util/abspath.c @@ -0,0 +1,117 @@ +#include "cache.h" + +/* + * Do not use this for inspecting *tracked* content. When path is a + * symlink to a directory, we do not want to say it is a directory when + * dealing with tracked content in the working tree. + */ +int is_directory(const char *path) +{ + struct stat st; + return (!stat(path, &st) && S_ISDIR(st.st_mode)); +} + +/* We allow "recursive" symbolic links. Only within reason, though. */ +#define MAXDEPTH 5 + +const char *make_absolute_path(const char *path) +{ + static char bufs[2][PATH_MAX + 1], *buf = bufs[0], *next_buf = bufs[1]; + char cwd[1024] = ""; + int buf_index = 1, len; + + int depth = MAXDEPTH; + char *last_elem = NULL; + struct stat st; + + if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX) + die ("Too long path: %.*s", 60, path); + + while (depth--) { + if (!is_directory(buf)) { + char *last_slash = strrchr(buf, '/'); + if (last_slash) { + *last_slash = '\0'; + last_elem = xstrdup(last_slash + 1); + } else { + last_elem = xstrdup(buf); + *buf = '\0'; + } + } + + if (*buf) { + if (!*cwd && !getcwd(cwd, sizeof(cwd))) + die ("Could not get current working directory"); + + if (chdir(buf)) + die ("Could not switch to '%s'", buf); + } + if (!getcwd(buf, PATH_MAX)) + die ("Could not get current working directory"); + + if (last_elem) { + int len = strlen(buf); + if (len + strlen(last_elem) + 2 > PATH_MAX) + die ("Too long path name: '%s/%s'", + buf, last_elem); + buf[len] = '/'; + strcpy(buf + len + 1, last_elem); + free(last_elem); + last_elem = NULL; + } + + if (!lstat(buf, &st) && S_ISLNK(st.st_mode)) { + len = readlink(buf, next_buf, PATH_MAX); + if (len < 0) + die ("Invalid symlink: %s", buf); + if (PATH_MAX <= len) + die("symbolic link too long: %s", buf); + next_buf[len] = '\0'; + buf = next_buf; + buf_index = 1 - buf_index; + next_buf = bufs[buf_index]; + } else + break; + } + + if (*cwd && chdir(cwd)) + die ("Could not change back to '%s'", cwd); + + return buf; +} + +static const char *get_pwd_cwd(void) +{ + static char cwd[PATH_MAX + 1]; + char *pwd; + struct stat cwd_stat, pwd_stat; + if (getcwd(cwd, PATH_MAX) == NULL) + return NULL; + pwd = getenv("PWD"); + if (pwd && strcmp(pwd, cwd)) { + stat(cwd, &cwd_stat); + if (!stat(pwd, &pwd_stat) && + pwd_stat.st_dev == cwd_stat.st_dev && + pwd_stat.st_ino == cwd_stat.st_ino) { + strlcpy(cwd, pwd, PATH_MAX); + } + } + return cwd; +} + +const char *make_nonrelative_path(const char *path) +{ + static char buf[PATH_MAX + 1]; + + if (is_absolute_path(path)) { + if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX) + die("Too long path: %.*s", 60, path); + } else { + const char *cwd = get_pwd_cwd(); + if (!cwd) + die("Cannot determine the current working directory"); + if (snprintf(buf, PATH_MAX, "%s/%s", cwd, path) >= PATH_MAX) + die("Too long path: %.*s", 60, path); + } + return buf; +} diff --git a/Documentation/perf_counter/util/alias.c b/Documentation/perf_counter/util/alias.c new file mode 100644 index 0000000..9b3dd2b --- /dev/null +++ b/Documentation/perf_counter/util/alias.c @@ -0,0 +1,77 @@ +#include "cache.h" + +static const char *alias_key; +static char *alias_val; + +static int alias_lookup_cb(const char *k, const char *v, void *cb) +{ + if (!prefixcmp(k, "alias.") && !strcmp(k+6, alias_key)) { + if (!v) + return config_error_nonbool(k); + alias_val = strdup(v); + return 0; + } + return 0; +} + +char *alias_lookup(const char *alias) +{ + alias_key = alias; + alias_val = NULL; + perf_config(alias_lookup_cb, NULL); + return alias_val; +} + +int split_cmdline(char *cmdline, const char ***argv) +{ + int src, dst, count = 0, size = 16; + char quoted = 0; + + *argv = malloc(sizeof(char*) * size); + + /* split alias_string */ + (*argv)[count++] = cmdline; + for (src = dst = 0; cmdline[src];) { + char c = cmdline[src]; + if (!quoted && isspace(c)) { + cmdline[dst++] = 0; + while (cmdline[++src] + && isspace(cmdline[src])) + ; /* skip */ + if (count >= size) { + size += 16; + *argv = realloc(*argv, sizeof(char*) * size); + } + (*argv)[count++] = cmdline + dst; + } else if (!quoted && (c == '\'' || c == '"')) { + quoted = c; + src++; + } else if (c == quoted) { + quoted = 0; + src++; + } else { + if (c == '\\' && quoted != '\'') { + src++; + c = cmdline[src]; + if (!c) { + free(*argv); + *argv = NULL; + return error("cmdline ends with \\"); + } + } + cmdline[dst++] = c; + src++; + } + } + + cmdline[dst] = 0; + + if (quoted) { + free(*argv); + *argv = NULL; + return error("unclosed quote"); + } + + return count; +} + diff --git a/Documentation/perf_counter/util/cache.h b/Documentation/perf_counter/util/cache.h new file mode 100644 index 0000000..7108051 --- /dev/null +++ b/Documentation/perf_counter/util/cache.h @@ -0,0 +1,117 @@ +#ifndef CACHE_H +#define CACHE_H + +#include "util.h" +#include "strbuf.h" + +#define PERF_DIR_ENVIRONMENT "PERF_DIR" +#define PERF_WORK_TREE_ENVIRONMENT "PERF_WORK_TREE" +#define DEFAULT_PERF_DIR_ENVIRONMENT ".perf" +#define DB_ENVIRONMENT "PERF_OBJECT_DIRECTORY" +#define INDEX_ENVIRONMENT "PERF_INDEX_FILE" +#define GRAFT_ENVIRONMENT "PERF_GRAFT_FILE" +#define TEMPLATE_DIR_ENVIRONMENT "PERF_TEMPLATE_DIR" +#define CONFIG_ENVIRONMENT "PERF_CONFIG" +#define EXEC_PATH_ENVIRONMENT "PERF_EXEC_PATH" +#define CEILING_DIRECTORIES_ENVIRONMENT "PERF_CEILING_DIRECTORIES" +#define PERFATTRIBUTES_FILE ".perfattributes" +#define INFOATTRIBUTES_FILE "info/attributes" +#define ATTRIBUTE_MACRO_PREFIX "[attr]" + +typedef int (*config_fn_t)(const char *, const char *, void *); +extern int perf_default_config(const char *, const char *, void *); +extern int perf_config_from_file(config_fn_t fn, const char *, void *); +extern int perf_config(config_fn_t fn, void *); +extern int perf_parse_ulong(const char *, unsigned long *); +extern int perf_config_int(const char *, const char *); +extern unsigned long perf_config_ulong(const char *, const char *); +extern int perf_config_bool_or_int(const char *, const char *, int *); +extern int perf_config_bool(const char *, const char *); +extern int perf_config_string(const char **, const char *, const char *); +extern int perf_config_set(const char *, const char *); +extern int perf_config_set_multivar(const char *, const char *, const char *, int); +extern int perf_config_rename_section(const char *, const char *); +extern const char *perf_etc_perfconfig(void); +extern int check_repository_format_version(const char *var, const char *value, void *cb); +extern int perf_config_system(void); +extern int perf_config_global(void); +extern int config_error_nonbool(const char *); +extern const char *config_exclusive_filename; + +#define MAX_PERFNAME (1000) +extern char perf_default_email[MAX_PERFNAME]; +extern char perf_default_name[MAX_PERFNAME]; +extern int user_ident_explicitly_given; + +extern const char *perf_log_output_encoding; +extern const char *perf_mailmap_file; + +/* IO helper functions */ +extern void maybe_flush_or_die(FILE *, const char *); +extern int copy_fd(int ifd, int ofd); +extern int copy_file(const char *dst, const char *src, int mode); +extern ssize_t read_in_full(int fd, void *buf, size_t count); +extern ssize_t write_in_full(int fd, const void *buf, size_t count); +extern void write_or_die(int fd, const void *buf, size_t count); +extern int write_or_whine(int fd, const void *buf, size_t count, const char *msg); +extern int write_or_whine_pipe(int fd, const void *buf, size_t count, const char *msg); +extern void fsync_or_die(int fd, const char *); + +/* pager.c */ +extern void setup_pager(void); +extern const char *pager_program; +extern int pager_in_use(void); +extern int pager_use_color; + +extern const char *editor_program; +extern const char *excludes_file; + +char *alias_lookup(const char *alias); +int split_cmdline(char *cmdline, const char ***argv); + +#define alloc_nr(x) (((x)+16)*3/2) + +/* + * Realloc the buffer pointed at by variable 'x' so that it can hold + * at least 'nr' entries; the number of entries currently allocated + * is 'alloc', using the standard growing factor alloc_nr() macro. + * + * DO NOT USE any expression with side-effect for 'x' or 'alloc'. + */ +#define ALLOC_GROW(x, nr, alloc) \ + do { \ + if ((nr) > alloc) { \ + if (alloc_nr(alloc) < (nr)) \ + alloc = (nr); \ + else \ + alloc = alloc_nr(alloc); \ + x = xrealloc((x), alloc * sizeof(*(x))); \ + } \ + } while(0) + + +static inline int is_absolute_path(const char *path) +{ + return path[0] == '/'; +} + +const char *make_absolute_path(const char *path); +const char *make_nonrelative_path(const char *path); +const char *make_relative_path(const char *abs, const char *base); +int normalize_path_copy(char *dst, const char *src); +int longest_ancestor_length(const char *path, const char *prefix_list); +char *strip_path_suffix(const char *path, const char *suffix); + +extern char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2))); +extern char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2))); + +extern char *mksnpath(char *buf, size_t n, const char *fmt, ...) + __attribute__((format (printf, 3, 4))); +extern char *perf_snpath(char *buf, size_t n, const char *fmt, ...) + __attribute__((format (printf, 3, 4))); +extern char *perf_pathdup(const char *fmt, ...) + __attribute__((format (printf, 1, 2))); + +extern size_t strlcpy(char *dest, const char *src, size_t size); + +#endif /* CACHE_H */ diff --git a/Documentation/perf_counter/util/config.c b/Documentation/perf_counter/util/config.c new file mode 100644 index 0000000..3dd13fa --- /dev/null +++ b/Documentation/perf_counter/util/config.c @@ -0,0 +1,873 @@ +/* + * GIT - The information manager from hell + * + * Copyright (C) Linus Torvalds, 2005 + * Copyright (C) Johannes Schindelin, 2005 + * + */ +#include "util.h" +#include "cache.h" +#include "exec_cmd.h" + +#define MAXNAME (256) + +static FILE *config_file; +static const char *config_file_name; +static int config_linenr; +static int config_file_eof; + +const char *config_exclusive_filename = NULL; + +static int get_next_char(void) +{ + int c; + FILE *f; + + c = '\n'; + if ((f = config_file) != NULL) { + c = fgetc(f); + if (c == '\r') { + /* DOS like systems */ + c = fgetc(f); + if (c != '\n') { + ungetc(c, f); + c = '\r'; + } + } + if (c == '\n') + config_linenr++; + if (c == EOF) { + config_file_eof = 1; + c = '\n'; + } + } + return c; +} + +static char *parse_value(void) +{ + static char value[1024]; + int quote = 0, comment = 0, len = 0, space = 0; + + for (;;) { + int c = get_next_char(); + if (len >= sizeof(value) - 1) + return NULL; + if (c == '\n') { + if (quote) + return NULL; + value[len] = 0; + return value; + } + if (comment) + continue; + if (isspace(c) && !quote) { + space = 1; + continue; + } + if (!quote) { + if (c == ';' || c == '#') { + comment = 1; + continue; + } + } + if (space) { + if (len) + value[len++] = ' '; + space = 0; + } + if (c == '\\') { + c = get_next_char(); + switch (c) { + case '\n': + continue; + case 't': + c = '\t'; + break; + case 'b': + c = '\b'; + break; + case 'n': + c = '\n'; + break; + /* Some characters escape as themselves */ + case '\\': case '"': + break; + /* Reject unknown escape sequences */ + default: + return NULL; + } + value[len++] = c; + continue; + } + if (c == '"') { + quote = 1-quote; + continue; + } + value[len++] = c; + } +} + +static inline int iskeychar(int c) +{ + return isalnum(c) || c == '-'; +} + +static int get_value(config_fn_t fn, void *data, char *name, unsigned int len) +{ + int c; + char *value; + + /* Get the full name */ + for (;;) { + c = get_next_char(); + if (config_file_eof) + break; + if (!iskeychar(c)) + break; + name[len++] = tolower(c); + if (len >= MAXNAME) + return -1; + } + name[len] = 0; + while (c == ' ' || c == '\t') + c = get_next_char(); + + value = NULL; + if (c != '\n') { + if (c != '=') + return -1; + value = parse_value(); + if (!value) + return -1; + } + return fn(name, value, data); +} + +static int get_extended_base_var(char *name, int baselen, int c) +{ + do { + if (c == '\n') + return -1; + c = get_next_char(); + } while (isspace(c)); + + /* We require the format to be '[base "extension"]' */ + if (c != '"') + return -1; + name[baselen++] = '.'; + + for (;;) { + int c = get_next_char(); + if (c == '\n') + return -1; + if (c == '"') + break; + if (c == '\\') { + c = get_next_char(); + if (c == '\n') + return -1; + } + name[baselen++] = c; + if (baselen > MAXNAME / 2) + return -1; + } + + /* Final ']' */ + if (get_next_char() != ']') + return -1; + return baselen; +} + +static int get_base_var(char *name) +{ + int baselen = 0; + + for (;;) { + int c = get_next_char(); + if (config_file_eof) + return -1; + if (c == ']') + return baselen; + if (isspace(c)) + return get_extended_base_var(name, baselen, c); + if (!iskeychar(c) && c != '.') + return -1; + if (baselen > MAXNAME / 2) + return -1; + name[baselen++] = tolower(c); + } +} + +static int perf_parse_file(config_fn_t fn, void *data) +{ + int comment = 0; + int baselen = 0; + static char var[MAXNAME]; + + /* U+FEFF Byte Order Mark in UTF8 */ + static const unsigned char *utf8_bom = (unsigned char *) "\xef\xbb\xbf"; + const unsigned char *bomptr = utf8_bom; + + for (;;) { + int c = get_next_char(); + if (bomptr && *bomptr) { + /* We are at the file beginning; skip UTF8-encoded BOM + * if present. Sane editors won't put this in on their + * own, but e.g. Windows Notepad will do it happily. */ + if ((unsigned char) c == *bomptr) { + bomptr++; + continue; + } else { + /* Do not tolerate partial BOM. */ + if (bomptr != utf8_bom) + break; + /* No BOM at file beginning. Cool. */ + bomptr = NULL; + } + } + if (c == '\n') { + if (config_file_eof) + return 0; + comment = 0; + continue; + } + if (comment || isspace(c)) + continue; + if (c == '#' || c == ';') { + comment = 1; + continue; + } + if (c == '[') { + baselen = get_base_var(var); + if (baselen <= 0) + break; + var[baselen++] = '.'; + var[baselen] = 0; + continue; + } + if (!isalpha(c)) + break; + var[baselen] = tolower(c); + if (get_value(fn, data, var, baselen+1) < 0) + break; + } + die("bad config file line %d in %s", config_linenr, config_file_name); +} + +static int parse_unit_factor(const char *end, unsigned long *val) +{ + if (!*end) + return 1; + else if (!strcasecmp(end, "k")) { + *val *= 1024; + return 1; + } + else if (!strcasecmp(end, "m")) { + *val *= 1024 * 1024; + return 1; + } + else if (!strcasecmp(end, "g")) { + *val *= 1024 * 1024 * 1024; + return 1; + } + return 0; +} + +static int perf_parse_long(const char *value, long *ret) +{ + if (value && *value) { + char *end; + long val = strtol(value, &end, 0); + unsigned long factor = 1; + if (!parse_unit_factor(end, &factor)) + return 0; + *ret = val * factor; + return 1; + } + return 0; +} + +int perf_parse_ulong(const char *value, unsigned long *ret) +{ + if (value && *value) { + char *end; + unsigned long val = strtoul(value, &end, 0); + if (!parse_unit_factor(end, &val)) + return 0; + *ret = val; + return 1; + } + return 0; +} + +static void die_bad_config(const char *name) +{ + if (config_file_name) + die("bad config value for '%s' in %s", name, config_file_name); + die("bad config value for '%s'", name); +} + +int perf_config_int(const char *name, const char *value) +{ + long ret = 0; + if (!perf_parse_long(value, &ret)) + die_bad_config(name); + return ret; +} + +unsigned long perf_config_ulong(const char *name, const char *value) +{ + unsigned long ret; + if (!perf_parse_ulong(value, &ret)) + die_bad_config(name); + return ret; +} + +int perf_config_bool_or_int(const char *name, const char *value, int *is_bool) +{ + *is_bool = 1; + if (!value) + return 1; + if (!*value) + return 0; + if (!strcasecmp(value, "true") || !strcasecmp(value, "yes") || !strcasecmp(value, "on")) + return 1; + if (!strcasecmp(value, "false") || !strcasecmp(value, "no") || !strcasecmp(value, "off")) + return 0; + *is_bool = 0; + return perf_config_int(name, value); +} + +int perf_config_bool(const char *name, const char *value) +{ + int discard; + return !!perf_config_bool_or_int(name, value, &discard); +} + +int perf_config_string(const char **dest, const char *var, const char *value) +{ + if (!value) + return config_error_nonbool(var); + *dest = strdup(value); + return 0; +} + +static int perf_default_core_config(const char *var, const char *value) +{ + /* Add other config variables here and to Documentation/config.txt. */ + return 0; +} + +int perf_default_config(const char *var, const char *value, void *dummy) +{ + if (!prefixcmp(var, "core.")) + return perf_default_core_config(var, value); + + /* Add other config variables here and to Documentation/config.txt. */ + return 0; +} + +int perf_config_from_file(config_fn_t fn, const char *filename, void *data) +{ + int ret; + FILE *f = fopen(filename, "r"); + + ret = -1; + if (f) { + config_file = f; + config_file_name = filename; + config_linenr = 1; + config_file_eof = 0; + ret = perf_parse_file(fn, data); + fclose(f); + config_file_name = NULL; + } + return ret; +} + +const char *perf_etc_perfconfig(void) +{ + static const char *system_wide; + if (!system_wide) + system_wide = system_path(ETC_PERFCONFIG); + return system_wide; +} + +static int perf_env_bool(const char *k, int def) +{ + const char *v = getenv(k); + return v ? perf_config_bool(k, v) : def; +} + +int perf_config_system(void) +{ + return !perf_env_bool("PERF_CONFIG_NOSYSTEM", 0); +} + +int perf_config_global(void) +{ + return !perf_env_bool("PERF_CONFIG_NOGLOBAL", 0); +} + +int perf_config(config_fn_t fn, void *data) +{ + int ret = 0, found = 0; + char *repo_config = NULL; + const char *home = NULL; + + /* Setting $PERF_CONFIG makes perf read _only_ the given config file. */ + if (config_exclusive_filename) + return perf_config_from_file(fn, config_exclusive_filename, data); + if (perf_config_system() && !access(perf_etc_perfconfig(), R_OK)) { + ret += perf_config_from_file(fn, perf_etc_perfconfig(), + data); + found += 1; + } + + home = getenv("HOME"); + if (perf_config_global() && home) { + char *user_config = strdup(mkpath("%s/.perfconfig", home)); + if (!access(user_config, R_OK)) { + ret += perf_config_from_file(fn, user_config, data); + found += 1; + } + free(user_config); + } + + repo_config = perf_pathdup("config"); + if (!access(repo_config, R_OK)) { + ret += perf_config_from_file(fn, repo_config, data); + found += 1; + } + free(repo_config); + if (found == 0) + return -1; + return ret; +} + +/* + * Find all the stuff for perf_config_set() below. + */ + +#define MAX_MATCHES 512 + +static struct { + int baselen; + char* key; + int do_not_match; + regex_t* value_regex; + int multi_replace; + size_t offset[MAX_MATCHES]; + enum { START, SECTION_SEEN, SECTION_END_SEEN, KEY_SEEN } state; + int seen; +} store; + +static int matches(const char* key, const char* value) +{ + return !strcmp(key, store.key) && + (store.value_regex == NULL || + (store.do_not_match ^ + !regexec(store.value_regex, value, 0, NULL, 0))); +} + +static int store_aux(const char* key, const char* value, void *cb) +{ + const char *ep; + size_t section_len; + + switch (store.state) { + case KEY_SEEN: + if (matches(key, value)) { + if (store.seen == 1 && store.multi_replace == 0) { + warning("%s has multiple values", key); + } else if (store.seen >= MAX_MATCHES) { + error("too many matches for %s", key); + return 1; + } + + store.offset[store.seen] = ftell(config_file); + store.seen++; + } + break; + case SECTION_SEEN: + /* + * What we are looking for is in store.key (both + * section and var), and its section part is baselen + * long. We found key (again, both section and var). + * We would want to know if this key is in the same + * section as what we are looking for. We already + * know we are in the same section as what should + * hold store.key. + */ + ep = strrchr(key, '.'); + section_len = ep - key; + + if ((section_len != store.baselen) || + memcmp(key, store.key, section_len+1)) { + store.state = SECTION_END_SEEN; + break; + } + + /* + * Do not increment matches: this is no match, but we + * just made sure we are in the desired section. + */ + store.offset[store.seen] = ftell(config_file); + /* fallthru */ + case SECTION_END_SEEN: + case START: + if (matches(key, value)) { + store.offset[store.seen] = ftell(config_file); + store.state = KEY_SEEN; + store.seen++; + } else { + if (strrchr(key, '.') - key == store.baselen && + !strncmp(key, store.key, store.baselen)) { + store.state = SECTION_SEEN; + store.offset[store.seen] = ftell(config_file); + } + } + } + return 0; +} + +static int store_write_section(int fd, const char* key) +{ + const char *dot; + int i, success; + struct strbuf sb = STRBUF_INIT; + + dot = memchr(key, '.', store.baselen); + if (dot) { + strbuf_addf(&sb, "[%.*s \"", (int)(dot - key), key); + for (i = dot - key + 1; i < store.baselen; i++) { + if (key[i] == '"' || key[i] == '\\') + strbuf_addch(&sb, '\\'); + strbuf_addch(&sb, key[i]); + } + strbuf_addstr(&sb, "\"]\n"); + } else { + strbuf_addf(&sb, "[%.*s]\n", store.baselen, key); + } + + success = write_in_full(fd, sb.buf, sb.len) == sb.len; + strbuf_release(&sb); + + return success; +} + +static int store_write_pair(int fd, const char* key, const char* value) +{ + int i, success; + int length = strlen(key + store.baselen + 1); + const char *quote = ""; + struct strbuf sb = STRBUF_INIT; + + /* + * Check to see if the value needs to be surrounded with a dq pair. + * Note that problematic characters are always backslash-quoted; this + * check is about not losing leading or trailing SP and strings that + * follow beginning-of-comment characters (i.e. ';' and '#') by the + * configuration parser. + */ + if (value[0] == ' ') + quote = "\""; + for (i = 0; value[i]; i++) + if (value[i] == ';' || value[i] == '#') + quote = "\""; + if (i && value[i - 1] == ' ') + quote = "\""; + + strbuf_addf(&sb, "\t%.*s = %s", + length, key + store.baselen + 1, quote); + + for (i = 0; value[i]; i++) + switch (value[i]) { + case '\n': + strbuf_addstr(&sb, "\\n"); + break; + case '\t': + strbuf_addstr(&sb, "\\t"); + break; + case '"': + case '\\': + strbuf_addch(&sb, '\\'); + default: + strbuf_addch(&sb, value[i]); + break; + } + strbuf_addf(&sb, "%s\n", quote); + + success = write_in_full(fd, sb.buf, sb.len) == sb.len; + strbuf_release(&sb); + + return success; +} + +static ssize_t find_beginning_of_line(const char* contents, size_t size, + size_t offset_, int* found_bracket) +{ + size_t equal_offset = size, bracket_offset = size; + ssize_t offset; + +contline: + for (offset = offset_-2; offset > 0 + && contents[offset] != '\n'; offset--) + switch (contents[offset]) { + case '=': equal_offset = offset; break; + case ']': bracket_offset = offset; break; + } + if (offset > 0 && contents[offset-1] == '\\') { + offset_ = offset; + goto contline; + } + if (bracket_offset < equal_offset) { + *found_bracket = 1; + offset = bracket_offset+1; + } else + offset++; + + return offset; +} + +int perf_config_set(const char* key, const char* value) +{ + return perf_config_set_multivar(key, value, NULL, 0); +} + +/* + * If value==NULL, unset in (remove from) config, + * if value_regex!=NULL, disregard key/value pairs where value does not match. + * if multi_replace==0, nothing, or only one matching key/value is replaced, + * else all matching key/values (regardless how many) are removed, + * before the new pair is written. + * + * Returns 0 on success. + * + * This function does this: + * + * - it locks the config file by creating ".perf/config.lock" + * + * - it then parses the config using store_aux() as validator to find + * the position on the key/value pair to replace. If it is to be unset, + * it must be found exactly once. + * + * - the config file is mmap()ed and the part before the match (if any) is + * written to the lock file, then the changed part and the rest. + * + * - the config file is removed and the lock file rename()d to it. + * + */ +int perf_config_set_multivar(const char* key, const char* value, + const char* value_regex, int multi_replace) +{ + int i, dot; + int fd = -1, in_fd; + int ret = 0; + char* config_filename; + const char* last_dot = strrchr(key, '.'); + + if (config_exclusive_filename) + config_filename = strdup(config_exclusive_filename); + else + config_filename = perf_pathdup("config"); + + /* + * Since "key" actually contains the section name and the real + * key name separated by a dot, we have to know where the dot is. + */ + + if (last_dot == NULL) { + error("key does not contain a section: %s", key); + ret = 2; + goto out_free; + } + store.baselen = last_dot - key; + + store.multi_replace = multi_replace; + + /* + * Validate the key and while at it, lower case it for matching. + */ + store.key = malloc(strlen(key) + 1); + dot = 0; + for (i = 0; key[i]; i++) { + unsigned char c = key[i]; + if (c == '.') + dot = 1; + /* Leave the extended basename untouched.. */ + if (!dot || i > store.baselen) { + if (!iskeychar(c) || (i == store.baselen+1 && !isalpha(c))) { + error("invalid key: %s", key); + free(store.key); + ret = 1; + goto out_free; + } + c = tolower(c); + } else if (c == '\n') { + error("invalid key (newline): %s", key); + free(store.key); + ret = 1; + goto out_free; + } + store.key[i] = c; + } + store.key[i] = 0; + + /* + * If .perf/config does not exist yet, write a minimal version. + */ + in_fd = open(config_filename, O_RDONLY); + if ( in_fd < 0 ) { + free(store.key); + + if ( ENOENT != errno ) { + error("opening %s: %s", config_filename, + strerror(errno)); + ret = 3; /* same as "invalid config file" */ + goto out_free; + } + /* if nothing to unset, error out */ + if (value == NULL) { + ret = 5; + goto out_free; + } + + store.key = (char*)key; + if (!store_write_section(fd, key) || + !store_write_pair(fd, key, value)) + goto write_err_out; + } else { + struct stat st; + char* contents; + size_t contents_sz, copy_begin, copy_end; + int i, new_line = 0; + + if (value_regex == NULL) + store.value_regex = NULL; + else { + if (value_regex[0] == '!') { + store.do_not_match = 1; + value_regex++; + } else + store.do_not_match = 0; + + store.value_regex = (regex_t*)malloc(sizeof(regex_t)); + if (regcomp(store.value_regex, value_regex, + REG_EXTENDED)) { + error("invalid pattern: %s", value_regex); + free(store.value_regex); + ret = 6; + goto out_free; + } + } + + store.offset[0] = 0; + store.state = START; + store.seen = 0; + + /* + * After this, store.offset will contain the *end* offset + * of the last match, or remain at 0 if no match was found. + * As a side effect, we make sure to transform only a valid + * existing config file. + */ + if (perf_config_from_file(store_aux, config_filename, NULL)) { + error("invalid config file %s", config_filename); + free(store.key); + if (store.value_regex != NULL) { + regfree(store.value_regex); + free(store.value_regex); + } + ret = 3; + goto out_free; + } + + free(store.key); + if (store.value_regex != NULL) { + regfree(store.value_regex); + free(store.value_regex); + } + + /* if nothing to unset, or too many matches, error out */ + if ((store.seen == 0 && value == NULL) || + (store.seen > 1 && multi_replace == 0)) { + ret = 5; + goto out_free; + } + + fstat(in_fd, &st); + contents_sz = xsize_t(st.st_size); + contents = mmap(NULL, contents_sz, PROT_READ, + MAP_PRIVATE, in_fd, 0); + close(in_fd); + + if (store.seen == 0) + store.seen = 1; + + for (i = 0, copy_begin = 0; i < store.seen; i++) { + if (store.offset[i] == 0) { + store.offset[i] = copy_end = contents_sz; + } else if (store.state != KEY_SEEN) { + copy_end = store.offset[i]; + } else + copy_end = find_beginning_of_line( + contents, contents_sz, + store.offset[i]-2, &new_line); + + if (copy_end > 0 && contents[copy_end-1] != '\n') + new_line = 1; + + /* write the first part of the config */ + if (copy_end > copy_begin) { + if (write_in_full(fd, contents + copy_begin, + copy_end - copy_begin) < + copy_end - copy_begin) + goto write_err_out; + if (new_line && + write_in_full(fd, "\n", 1) != 1) + goto write_err_out; + } + copy_begin = store.offset[i]; + } + + /* write the pair (value == NULL means unset) */ + if (value != NULL) { + if (store.state == START) { + if (!store_write_section(fd, key)) + goto write_err_out; + } + if (!store_write_pair(fd, key, value)) + goto write_err_out; + } + + /* write the rest of the config */ + if (copy_begin < contents_sz) + if (write_in_full(fd, contents + copy_begin, + contents_sz - copy_begin) < + contents_sz - copy_begin) + goto write_err_out; + + munmap(contents, contents_sz); + } + + ret = 0; + +out_free: + free(config_filename); + return ret; + +write_err_out: + goto out_free; + +} + +/* + * Call this to report error for your variable that should not + * get a boolean value (i.e. "[my] var" means "true"). + */ +int config_error_nonbool(const char *var) +{ + return error("Missing value for '%s'", var); +} diff --git a/Documentation/perf_counter/util/ctype.c b/Documentation/perf_counter/util/ctype.c new file mode 100644 index 0000000..b90ec00 --- /dev/null +++ b/Documentation/perf_counter/util/ctype.c @@ -0,0 +1,26 @@ +/* + * Sane locale-independent, ASCII ctype. + * + * No surprises, and works with signed and unsigned chars. + */ +#include "cache.h" + +enum { + S = GIT_SPACE, + A = GIT_ALPHA, + D = GIT_DIGIT, + G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */ + R = GIT_REGEX_SPECIAL, /* $, (, ), +, ., ^, {, | * */ +}; + +unsigned char sane_ctype[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0, /* 0.. 15 */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16.. 31 */ + S, 0, 0, 0, R, 0, 0, 0, R, R, G, R, 0, 0, R, 0, /* 32.. 47 */ + D, D, D, D, D, D, D, D, D, D, 0, 0, 0, 0, 0, G, /* 48.. 63 */ + 0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */ + A, A, A, A, A, A, A, A, A, A, A, G, G, 0, R, 0, /* 80.. 95 */ + 0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */ + A, A, A, A, A, A, A, A, A, A, A, R, R, 0, 0, 0, /* 112..127 */ + /* Nothing in the 128.. range */ +}; diff --git a/Documentation/perf_counter/util/exec_cmd.c b/Documentation/perf_counter/util/exec_cmd.c new file mode 100644 index 0000000..d392922 --- /dev/null +++ b/Documentation/perf_counter/util/exec_cmd.c @@ -0,0 +1,165 @@ +#include "cache.h" +#include "exec_cmd.h" +#include "quote.h" +#define MAX_ARGS 32 + +extern char **environ; +static const char *argv_exec_path; +static const char *argv0_path; + +const char *system_path(const char *path) +{ +#ifdef RUNTIME_PREFIX + static const char *prefix; +#else + static const char *prefix = PREFIX; +#endif + struct strbuf d = STRBUF_INIT; + + if (is_absolute_path(path)) + return path; + +#ifdef RUNTIME_PREFIX + assert(argv0_path); + assert(is_absolute_path(argv0_path)); + + if (!prefix && + !(prefix = strip_path_suffix(argv0_path, PERF_EXEC_PATH)) && + !(prefix = strip_path_suffix(argv0_path, BINDIR)) && + !(prefix = strip_path_suffix(argv0_path, "perf"))) { + prefix = PREFIX; + fprintf(stderr, "RUNTIME_PREFIX requested, " + "but prefix computation failed. " + "Using static fallback '%s'.\n", prefix); + } +#endif + + strbuf_addf(&d, "%s/%s", prefix, path); + path = strbuf_detach(&d, NULL); + return path; +} + +const char *perf_extract_argv0_path(const char *argv0) +{ + const char *slash; + + if (!argv0 || !*argv0) + return NULL; + slash = argv0 + strlen(argv0); + + while (argv0 <= slash && !is_dir_sep(*slash)) + slash--; + + if (slash >= argv0) { + argv0_path = strndup(argv0, slash - argv0); + return slash + 1; + } + + return argv0; +} + +void perf_set_argv_exec_path(const char *exec_path) +{ + argv_exec_path = exec_path; + /* + * Propagate this setting to external programs. + */ + setenv(EXEC_PATH_ENVIRONMENT, exec_path, 1); +} + + +/* Returns the highest-priority, location to look for perf programs. */ +const char *perf_exec_path(void) +{ + const char *env; + + if (argv_exec_path) + return argv_exec_path; + + env = getenv(EXEC_PATH_ENVIRONMENT); + if (env && *env) { + return env; + } + + return system_path(PERF_EXEC_PATH); +} + +static void add_path(struct strbuf *out, const char *path) +{ + if (path && *path) { + if (is_absolute_path(path)) + strbuf_addstr(out, path); + else + strbuf_addstr(out, make_nonrelative_path(path)); + + strbuf_addch(out, PATH_SEP); + } +} + +void setup_path(void) +{ + const char *old_path = getenv("PATH"); + struct strbuf new_path = STRBUF_INIT; + + add_path(&new_path, perf_exec_path()); + add_path(&new_path, argv0_path); + + if (old_path) + strbuf_addstr(&new_path, old_path); + else + strbuf_addstr(&new_path, "/usr/local/bin:/usr/bin:/bin"); + + setenv("PATH", new_path.buf, 1); + + strbuf_release(&new_path); +} + +const char **prepare_perf_cmd(const char **argv) +{ + int argc; + const char **nargv; + + for (argc = 0; argv[argc]; argc++) + ; /* just counting */ + nargv = malloc(sizeof(*nargv) * (argc + 2)); + + nargv[0] = "perf"; + for (argc = 0; argv[argc]; argc++) + nargv[argc + 1] = argv[argc]; + nargv[argc + 1] = NULL; + return nargv; +} + +int execv_perf_cmd(const char **argv) { + const char **nargv = prepare_perf_cmd(argv); + + /* execvp() can only ever return if it fails */ + execvp("perf", (char **)nargv); + + free(nargv); + return -1; +} + + +int execl_perf_cmd(const char *cmd,...) +{ + int argc; + const char *argv[MAX_ARGS + 1]; + const char *arg; + va_list param; + + va_start(param, cmd); + argv[0] = cmd; + argc = 1; + while (argc < MAX_ARGS) { + arg = argv[argc++] = va_arg(param, char *); + if (!arg) + break; + } + va_end(param); + if (MAX_ARGS <= argc) + return error("too many args to run %s", cmd); + + argv[argc] = NULL; + return execv_perf_cmd(argv); +} diff --git a/Documentation/perf_counter/util/exec_cmd.h b/Documentation/perf_counter/util/exec_cmd.h new file mode 100644 index 0000000..effe25e --- /dev/null +++ b/Documentation/perf_counter/util/exec_cmd.h @@ -0,0 +1,13 @@ +#ifndef PERF_EXEC_CMD_H +#define PERF_EXEC_CMD_H + +extern void perf_set_argv_exec_path(const char *exec_path); +extern const char *perf_extract_argv0_path(const char *path); +extern const char *perf_exec_path(void); +extern void setup_path(void); +extern const char **prepare_perf_cmd(const char **argv); +extern int execv_perf_cmd(const char **argv); /* NULL terminated */ +extern int execl_perf_cmd(const char *cmd, ...); +extern const char *system_path(const char *path); + +#endif /* PERF_EXEC_CMD_H */ diff --git a/Documentation/perf_counter/util/generate-cmdlist.sh b/Documentation/perf_counter/util/generate-cmdlist.sh new file mode 100755 index 0000000..f06f6fd --- /dev/null +++ b/Documentation/perf_counter/util/generate-cmdlist.sh @@ -0,0 +1,24 @@ +#!/bin/sh + +echo "/* Automatically generated by $0 */ +struct cmdname_help +{ + char name[16]; + char help[80]; +}; + +static struct cmdname_help common_cmds[] = {" + +sed -n -e 's/^perf-\([^ ]*\)[ ].* common.*/\1/p' command-list.txt | +sort | +while read cmd +do + sed -n ' + /^NAME/,/perf-'"$cmd"'/H + ${ + x + s/.*perf-'"$cmd"' - \(.*\)/ {"'"$cmd"'", "\1"},/ + p + }' "Documentation/perf-$cmd.txt" +done +echo "};" diff --git a/Documentation/perf_counter/util/help.c b/Documentation/perf_counter/util/help.c new file mode 100644 index 0000000..edde541 --- /dev/null +++ b/Documentation/perf_counter/util/help.c @@ -0,0 +1,366 @@ +#include "cache.h" +#include "../builtin.h" +#include "exec_cmd.h" +#include "levenshtein.h" +#include "help.h" + +/* most GUI terminals set COLUMNS (although some don't export it) */ +static int term_columns(void) +{ + char *col_string = getenv("COLUMNS"); + int n_cols; + + if (col_string && (n_cols = atoi(col_string)) > 0) + return n_cols; + +#ifdef TIOCGWINSZ + { + struct winsize ws; + if (!ioctl(1, TIOCGWINSZ, &ws)) { + if (ws.ws_col) + return ws.ws_col; + } + } +#endif + + return 80; +} + +void add_cmdname(struct cmdnames *cmds, const char *name, int len) +{ + struct cmdname *ent = malloc(sizeof(*ent) + len + 1); + + ent->len = len; + memcpy(ent->name, name, len); + ent->name[len] = 0; + + ALLOC_GROW(cmds->names, cmds->cnt + 1, cmds->alloc); + cmds->names[cmds->cnt++] = ent; +} + +static void clean_cmdnames(struct cmdnames *cmds) +{ + int i; + for (i = 0; i < cmds->cnt; ++i) + free(cmds->names[i]); + free(cmds->names); + cmds->cnt = 0; + cmds->alloc = 0; +} + +static int cmdname_compare(const void *a_, const void *b_) +{ + struct cmdname *a = *(struct cmdname **)a_; + struct cmdname *b = *(struct cmdname **)b_; + return strcmp(a->name, b->name); +} + +static void uniq(struct cmdnames *cmds) +{ + int i, j; + + if (!cmds->cnt) + return; + + for (i = j = 1; i < cmds->cnt; i++) + if (strcmp(cmds->names[i]->name, cmds->names[i-1]->name)) + cmds->names[j++] = cmds->names[i]; + + cmds->cnt = j; +} + +void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes) +{ + int ci, cj, ei; + int cmp; + + ci = cj = ei = 0; + while (ci < cmds->cnt && ei < excludes->cnt) { + cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name); + if (cmp < 0) + cmds->names[cj++] = cmds->names[ci++]; + else if (cmp == 0) + ci++, ei++; + else if (cmp > 0) + ei++; + } + + while (ci < cmds->cnt) + cmds->names[cj++] = cmds->names[ci++]; + + cmds->cnt = cj; +} + +static void pretty_print_string_list(struct cmdnames *cmds, int longest) +{ + int cols = 1, rows; + int space = longest + 1; /* min 1 SP between words */ + int max_cols = term_columns() - 1; /* don't print *on* the edge */ + int i, j; + + if (space < max_cols) + cols = max_cols / space; + rows = (cmds->cnt + cols - 1) / cols; + + for (i = 0; i < rows; i++) { + printf(" "); + + for (j = 0; j < cols; j++) { + int n = j * rows + i; + int size = space; + if (n >= cmds->cnt) + break; + if (j == cols-1 || n + rows >= cmds->cnt) + size = 1; + printf("%-*s", size, cmds->names[n]->name); + } + putchar('\n'); + } +} + +static int is_executable(const char *name) +{ + struct stat st; + + if (stat(name, &st) || /* stat, not lstat */ + !S_ISREG(st.st_mode)) + return 0; + +#ifdef __MINGW32__ + /* cannot trust the executable bit, peek into the file instead */ + char buf[3] = { 0 }; + int n; + int fd = open(name, O_RDONLY); + st.st_mode &= ~S_IXUSR; + if (fd >= 0) { + n = read(fd, buf, 2); + if (n == 2) + /* DOS executables start with "MZ" */ + if (!strcmp(buf, "#!") || !strcmp(buf, "MZ")) + st.st_mode |= S_IXUSR; + close(fd); + } +#endif + return st.st_mode & S_IXUSR; +} + +static void list_commands_in_dir(struct cmdnames *cmds, + const char *path, + const char *prefix) +{ + int prefix_len; + DIR *dir = opendir(path); + struct dirent *de; + struct strbuf buf = STRBUF_INIT; + int len; + + if (!dir) + return; + if (!prefix) + prefix = "perf-"; + prefix_len = strlen(prefix); + + strbuf_addf(&buf, "%s/", path); + len = buf.len; + + while ((de = readdir(dir)) != NULL) { + int entlen; + + if (prefixcmp(de->d_name, prefix)) + continue; + + strbuf_setlen(&buf, len); + strbuf_addstr(&buf, de->d_name); + if (!is_executable(buf.buf)) + continue; + + entlen = strlen(de->d_name) - prefix_len; + if (has_extension(de->d_name, ".exe")) + entlen -= 4; + + add_cmdname(cmds, de->d_name + prefix_len, entlen); + } + closedir(dir); + strbuf_release(&buf); +} + +void load_command_list(const char *prefix, + struct cmdnames *main_cmds, + struct cmdnames *other_cmds) +{ + const char *env_path = getenv("PATH"); + const char *exec_path = perf_exec_path(); + + if (exec_path) { + list_commands_in_dir(main_cmds, exec_path, prefix); + qsort(main_cmds->names, main_cmds->cnt, + sizeof(*main_cmds->names), cmdname_compare); + uniq(main_cmds); + } + + if (env_path) { + char *paths, *path, *colon; + path = paths = strdup(env_path); + while (1) { + if ((colon = strchr(path, PATH_SEP))) + *colon = 0; + if (!exec_path || strcmp(path, exec_path)) + list_commands_in_dir(other_cmds, path, prefix); + + if (!colon) + break; + path = colon + 1; + } + free(paths); + + qsort(other_cmds->names, other_cmds->cnt, + sizeof(*other_cmds->names), cmdname_compare); + uniq(other_cmds); + } + exclude_cmds(other_cmds, main_cmds); +} + +void list_commands(const char *title, struct cmdnames *main_cmds, + struct cmdnames *other_cmds) +{ + int i, longest = 0; + + for (i = 0; i < main_cmds->cnt; i++) + if (longest < main_cmds->names[i]->len) + longest = main_cmds->names[i]->len; + for (i = 0; i < other_cmds->cnt; i++) + if (longest < other_cmds->names[i]->len) + longest = other_cmds->names[i]->len; + + if (main_cmds->cnt) { + const char *exec_path = perf_exec_path(); + printf("available %s in '%s'\n", title, exec_path); + printf("----------------"); + mput_char('-', strlen(title) + strlen(exec_path)); + putchar('\n'); + pretty_print_string_list(main_cmds, longest); + putchar('\n'); + } + + if (other_cmds->cnt) { + printf("%s available from elsewhere on your $PATH\n", title); + printf("---------------------------------------"); + mput_char('-', strlen(title)); + putchar('\n'); + pretty_print_string_list(other_cmds, longest); + putchar('\n'); + } +} + +int is_in_cmdlist(struct cmdnames *c, const char *s) +{ + int i; + for (i = 0; i < c->cnt; i++) + if (!strcmp(s, c->names[i]->name)) + return 1; + return 0; +} + +static int autocorrect; +static struct cmdnames aliases; + +static int perf_unknown_cmd_config(const char *var, const char *value, void *cb) +{ + if (!strcmp(var, "help.autocorrect")) + autocorrect = perf_config_int(var,value); + /* Also use aliases for command lookup */ + if (!prefixcmp(var, "alias.")) + add_cmdname(&aliases, var + 6, strlen(var + 6)); + + return perf_default_config(var, value, cb); +} + +static int levenshtein_compare(const void *p1, const void *p2) +{ + const struct cmdname *const *c1 = p1, *const *c2 = p2; + const char *s1 = (*c1)->name, *s2 = (*c2)->name; + int l1 = (*c1)->len; + int l2 = (*c2)->len; + return l1 != l2 ? l1 - l2 : strcmp(s1, s2); +} + +static void add_cmd_list(struct cmdnames *cmds, struct cmdnames *old) +{ + int i; + ALLOC_GROW(cmds->names, cmds->cnt + old->cnt, cmds->alloc); + + for (i = 0; i < old->cnt; i++) + cmds->names[cmds->cnt++] = old->names[i]; + free(old->names); + old->cnt = 0; + old->names = NULL; +} + +const char *help_unknown_cmd(const char *cmd) +{ + int i, n, best_similarity = 0; + struct cmdnames main_cmds, other_cmds; + + memset(&main_cmds, 0, sizeof(main_cmds)); + memset(&other_cmds, 0, sizeof(main_cmds)); + memset(&aliases, 0, sizeof(aliases)); + + perf_config(perf_unknown_cmd_config, NULL); + + load_command_list("perf-", &main_cmds, &other_cmds); + + add_cmd_list(&main_cmds, &aliases); + add_cmd_list(&main_cmds, &other_cmds); + qsort(main_cmds.names, main_cmds.cnt, + sizeof(main_cmds.names), cmdname_compare); + uniq(&main_cmds); + + /* This reuses cmdname->len for similarity index */ + for (i = 0; i < main_cmds.cnt; ++i) + main_cmds.names[i]->len = + levenshtein(cmd, main_cmds.names[i]->name, 0, 2, 1, 4); + + qsort(main_cmds.names, main_cmds.cnt, + sizeof(*main_cmds.names), levenshtein_compare); + + if (!main_cmds.cnt) + die ("Uh oh. Your system reports no Git commands at all."); + + best_similarity = main_cmds.names[0]->len; + n = 1; + while (n < main_cmds.cnt && best_similarity == main_cmds.names[n]->len) + ++n; + if (autocorrect && n == 1) { + const char *assumed = main_cmds.names[0]->name; + main_cmds.names[0] = NULL; + clean_cmdnames(&main_cmds); + fprintf(stderr, "WARNING: You called a Git program named '%s', " + "which does not exist.\n" + "Continuing under the assumption that you meant '%s'\n", + cmd, assumed); + if (autocorrect > 0) { + fprintf(stderr, "in %0.1f seconds automatically...\n", + (float)autocorrect/10.0); + poll(NULL, 0, autocorrect * 100); + } + return assumed; + } + + fprintf(stderr, "perf: '%s' is not a perf-command. See 'perf --help'.\n", cmd); + + if (best_similarity < 6) { + fprintf(stderr, "\nDid you mean %s?\n", + n < 2 ? "this": "one of these"); + + for (i = 0; i < n; i++) + fprintf(stderr, "\t%s\n", main_cmds.names[i]->name); + } + + exit(1); +} + +int cmd_version(int argc, const char **argv, const char *prefix) +{ + printf("perf version %s\n", perf_version_string); + return 0; +} diff --git a/Documentation/perf_counter/util/help.h b/Documentation/perf_counter/util/help.h new file mode 100644 index 0000000..56bc154 --- /dev/null +++ b/Documentation/perf_counter/util/help.h @@ -0,0 +1,29 @@ +#ifndef HELP_H +#define HELP_H + +struct cmdnames { + int alloc; + int cnt; + struct cmdname { + size_t len; /* also used for similarity index in help.c */ + char name[FLEX_ARRAY]; + } **names; +}; + +static inline void mput_char(char c, unsigned int num) +{ + while(num--) + putchar(c); +} + +void load_command_list(const char *prefix, + struct cmdnames *main_cmds, + struct cmdnames *other_cmds); +void add_cmdname(struct cmdnames *cmds, const char *name, int len); +/* Here we require that excludes is a sorted list. */ +void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes); +int is_in_cmdlist(struct cmdnames *c, const char *s); +void list_commands(const char *title, struct cmdnames *main_cmds, + struct cmdnames *other_cmds); + +#endif /* HELP_H */ diff --git a/Documentation/perf_counter/util/levenshtein.c b/Documentation/perf_counter/util/levenshtein.c new file mode 100644 index 0000000..e521d15 --- /dev/null +++ b/Documentation/perf_counter/util/levenshtein.c @@ -0,0 +1,84 @@ +#include "cache.h" +#include "levenshtein.h" + +/* + * This function implements the Damerau-Levenshtein algorithm to + * calculate a distance between strings. + * + * Basically, it says how many letters need to be swapped, substituted, + * deleted from, or added to string1, at least, to get string2. + * + * The idea is to build a distance matrix for the substrings of both + * strings. To avoid a large space complexity, only the last three rows + * are kept in memory (if swaps had the same or higher cost as one deletion + * plus one insertion, only two rows would be needed). + * + * At any stage, "i + 1" denotes the length of the current substring of + * string1 that the distance is calculated for. + * + * row2 holds the current row, row1 the previous row (i.e. for the substring + * of string1 of length "i"), and row0 the row before that. + * + * In other words, at the start of the big loop, row2[j + 1] contains the + * Damerau-Levenshtein distance between the substring of string1 of length + * "i" and the substring of string2 of length "j + 1". + * + * All the big loop does is determine the partial minimum-cost paths. + * + * It does so by calculating the costs of the path ending in characters + * i (in string1) and j (in string2), respectively, given that the last + * operation is a substition, a swap, a deletion, or an insertion. + * + * This implementation allows the costs to be weighted: + * + * - w (as in "sWap") + * - s (as in "Substitution") + * - a (for insertion, AKA "Add") + * - d (as in "Deletion") + * + * Note that this algorithm calculates a distance _iff_ d == a. + */ +int levenshtein(const char *string1, const char *string2, + int w, int s, int a, int d) +{ + int len1 = strlen(string1), len2 = strlen(string2); + int *row0 = malloc(sizeof(int) * (len2 + 1)); + int *row1 = malloc(sizeof(int) * (len2 + 1)); + int *row2 = malloc(sizeof(int) * (len2 + 1)); + int i, j; + + for (j = 0; j <= len2; j++) + row1[j] = j * a; + for (i = 0; i < len1; i++) { + int *dummy; + + row2[0] = (i + 1) * d; + for (j = 0; j < len2; j++) { + /* substitution */ + row2[j + 1] = row1[j] + s * (string1[i] != string2[j]); + /* swap */ + if (i > 0 && j > 0 && string1[i - 1] == string2[j] && + string1[i] == string2[j - 1] && + row2[j + 1] > row0[j - 1] + w) + row2[j + 1] = row0[j - 1] + w; + /* deletion */ + if (row2[j + 1] > row1[j + 1] + d) + row2[j + 1] = row1[j + 1] + d; + /* insertion */ + if (row2[j + 1] > row2[j] + a) + row2[j + 1] = row2[j] + a; + } + + dummy = row0; + row0 = row1; + row1 = row2; + row2 = dummy; + } + + i = row1[len2]; + free(row0); + free(row1); + free(row2); + + return i; +} diff --git a/Documentation/perf_counter/util/levenshtein.h b/Documentation/perf_counter/util/levenshtein.h new file mode 100644 index 0000000..0173abe --- /dev/null +++ b/Documentation/perf_counter/util/levenshtein.h @@ -0,0 +1,8 @@ +#ifndef LEVENSHTEIN_H +#define LEVENSHTEIN_H + +int levenshtein(const char *string1, const char *string2, + int swap_penalty, int substition_penalty, + int insertion_penalty, int deletion_penalty); + +#endif diff --git a/Documentation/perf_counter/util/parse-options.c b/Documentation/perf_counter/util/parse-options.c new file mode 100644 index 0000000..28b34c1 --- /dev/null +++ b/Documentation/perf_counter/util/parse-options.c @@ -0,0 +1,492 @@ +#include "util.h" +#include "parse-options.h" +#include "cache.h" + +#define OPT_SHORT 1 +#define OPT_UNSET 2 + +static int opterror(const struct option *opt, const char *reason, int flags) +{ + if (flags & OPT_SHORT) + return error("switch `%c' %s", opt->short_name, reason); + if (flags & OPT_UNSET) + return error("option `no-%s' %s", opt->long_name, reason); + return error("option `%s' %s", opt->long_name, reason); +} + +static int get_arg(struct parse_opt_ctx_t *p, const struct option *opt, + int flags, const char **arg) +{ + if (p->opt) { + *arg = p->opt; + p->opt = NULL; + } else if (p->argc == 1 && (opt->flags & PARSE_OPT_LASTARG_DEFAULT)) { + *arg = (const char *)opt->defval; + } else if (p->argc > 1) { + p->argc--; + *arg = *++p->argv; + } else + return opterror(opt, "requires a value", flags); + return 0; +} + +static int get_value(struct parse_opt_ctx_t *p, + const struct option *opt, int flags) +{ + const char *s, *arg; + const int unset = flags & OPT_UNSET; + + if (unset && p->opt) + return opterror(opt, "takes no value", flags); + if (unset && (opt->flags & PARSE_OPT_NONEG)) + return opterror(opt, "isn't available", flags); + + if (!(flags & OPT_SHORT) && p->opt) { + switch (opt->type) { + case OPTION_CALLBACK: + if (!(opt->flags & PARSE_OPT_NOARG)) + break; + /* FALLTHROUGH */ + case OPTION_BOOLEAN: + case OPTION_BIT: + case OPTION_SET_INT: + case OPTION_SET_PTR: + return opterror(opt, "takes no value", flags); + default: + break; + } + } + + switch (opt->type) { + case OPTION_BIT: + if (unset) + *(int *)opt->value &= ~opt->defval; + else + *(int *)opt->value |= opt->defval; + return 0; + + case OPTION_BOOLEAN: + *(int *)opt->value = unset ? 0 : *(int *)opt->value + 1; + return 0; + + case OPTION_SET_INT: + *(int *)opt->value = unset ? 0 : opt->defval; + return 0; + + case OPTION_SET_PTR: + *(void **)opt->value = unset ? NULL : (void *)opt->defval; + return 0; + + case OPTION_STRING: + if (unset) + *(const char **)opt->value = NULL; + else if (opt->flags & PARSE_OPT_OPTARG && !p->opt) + *(const char **)opt->value = (const char *)opt->defval; + else + return get_arg(p, opt, flags, (const char **)opt->value); + return 0; + + case OPTION_CALLBACK: + if (unset) + return (*opt->callback)(opt, NULL, 1) ? (-1) : 0; + if (opt->flags & PARSE_OPT_NOARG) + return (*opt->callback)(opt, NULL, 0) ? (-1) : 0; + if (opt->flags & PARSE_OPT_OPTARG && !p->opt) + return (*opt->callback)(opt, NULL, 0) ? (-1) : 0; + if (get_arg(p, opt, flags, &arg)) + return -1; + return (*opt->callback)(opt, arg, 0) ? (-1) : 0; + + case OPTION_INTEGER: + if (unset) { + *(int *)opt->value = 0; + return 0; + } + if (opt->flags & PARSE_OPT_OPTARG && !p->opt) { + *(int *)opt->value = opt->defval; + return 0; + } + if (get_arg(p, opt, flags, &arg)) + return -1; + *(int *)opt->value = strtol(arg, (char **)&s, 10); + if (*s) + return opterror(opt, "expects a numerical value", flags); + return 0; + + default: + die("should not happen, someone must be hit on the forehead"); + } +} + +static int parse_short_opt(struct parse_opt_ctx_t *p, const struct option *options) +{ + for (; options->type != OPTION_END; options++) { + if (options->short_name == *p->opt) { + p->opt = p->opt[1] ? p->opt + 1 : NULL; + return get_value(p, options, OPT_SHORT); + } + } + return -2; +} + +static int parse_long_opt(struct parse_opt_ctx_t *p, const char *arg, + const struct option *options) +{ + const char *arg_end = strchr(arg, '='); + const struct option *abbrev_option = NULL, *ambiguous_option = NULL; + int abbrev_flags = 0, ambiguous_flags = 0; + + if (!arg_end) + arg_end = arg + strlen(arg); + + for (; options->type != OPTION_END; options++) { + const char *rest; + int flags = 0; + + if (!options->long_name) + continue; + + rest = skip_prefix(arg, options->long_name); + if (options->type == OPTION_ARGUMENT) { + if (!rest) + continue; + if (*rest == '=') + return opterror(options, "takes no value", flags); + if (*rest) + continue; + p->out[p->cpidx++] = arg - 2; + return 0; + } + if (!rest) { + /* abbreviated? */ + if (!strncmp(options->long_name, arg, arg_end - arg)) { +is_abbreviated: + if (abbrev_option) { + /* + * If this is abbreviated, it is + * ambiguous. So when there is no + * exact match later, we need to + * error out. + */ + ambiguous_option = abbrev_option; + ambiguous_flags = abbrev_flags; + } + if (!(flags & OPT_UNSET) && *arg_end) + p->opt = arg_end + 1; + abbrev_option = options; + abbrev_flags = flags; + continue; + } + /* negated and abbreviated very much? */ + if (!prefixcmp("no-", arg)) { + flags |= OPT_UNSET; + goto is_abbreviated; + } + /* negated? */ + if (strncmp(arg, "no-", 3)) + continue; + flags |= OPT_UNSET; + rest = skip_prefix(arg + 3, options->long_name); + /* abbreviated and negated? */ + if (!rest && !prefixcmp(options->long_name, arg + 3)) + goto is_abbreviated; + if (!rest) + continue; + } + if (*rest) { + if (*rest != '=') + continue; + p->opt = rest + 1; + } + return get_value(p, options, flags); + } + + if (ambiguous_option) + return error("Ambiguous option: %s " + "(could be --%s%s or --%s%s)", + arg, + (ambiguous_flags & OPT_UNSET) ? "no-" : "", + ambiguous_option->long_name, + (abbrev_flags & OPT_UNSET) ? "no-" : "", + abbrev_option->long_name); + if (abbrev_option) + return get_value(p, abbrev_option, abbrev_flags); + return -2; +} + +static void check_typos(const char *arg, const struct option *options) +{ + if (strlen(arg) < 3) + return; + + if (!prefixcmp(arg, "no-")) { + error ("did you mean `--%s` (with two dashes ?)", arg); + exit(129); + } + + for (; options->type != OPTION_END; options++) { + if (!options->long_name) + continue; + if (!prefixcmp(options->long_name, arg)) { + error ("did you mean `--%s` (with two dashes ?)", arg); + exit(129); + } + } +} + +void parse_options_start(struct parse_opt_ctx_t *ctx, + int argc, const char **argv, int flags) +{ + memset(ctx, 0, sizeof(*ctx)); + ctx->argc = argc - 1; + ctx->argv = argv + 1; + ctx->out = argv; + ctx->cpidx = ((flags & PARSE_OPT_KEEP_ARGV0) != 0); + ctx->flags = flags; + if ((flags & PARSE_OPT_KEEP_UNKNOWN) && + (flags & PARSE_OPT_STOP_AT_NON_OPTION)) + die("STOP_AT_NON_OPTION and KEEP_UNKNOWN don't go together"); +} + +static int usage_with_options_internal(const char * const *, + const struct option *, int); + +int parse_options_step(struct parse_opt_ctx_t *ctx, + const struct option *options, + const char * const usagestr[]) +{ + int internal_help = !(ctx->flags & PARSE_OPT_NO_INTERNAL_HELP); + + /* we must reset ->opt, unknown short option leave it dangling */ + ctx->opt = NULL; + + for (; ctx->argc; ctx->argc--, ctx->argv++) { + const char *arg = ctx->argv[0]; + + if (*arg != '-' || !arg[1]) { + if (ctx->flags & PARSE_OPT_STOP_AT_NON_OPTION) + break; + ctx->out[ctx->cpidx++] = ctx->argv[0]; + continue; + } + + if (arg[1] != '-') { + ctx->opt = arg + 1; + if (internal_help && *ctx->opt == 'h') + return parse_options_usage(usagestr, options); + switch (parse_short_opt(ctx, options)) { + case -1: + return parse_options_usage(usagestr, options); + case -2: + goto unknown; + } + if (ctx->opt) + check_typos(arg + 1, options); + while (ctx->opt) { + if (internal_help && *ctx->opt == 'h') + return parse_options_usage(usagestr, options); + switch (parse_short_opt(ctx, options)) { + case -1: + return parse_options_usage(usagestr, options); + case -2: + /* fake a short option thing to hide the fact that we may have + * started to parse aggregated stuff + * + * This is leaky, too bad. + */ + ctx->argv[0] = strdup(ctx->opt - 1); + *(char *)ctx->argv[0] = '-'; + goto unknown; + } + } + continue; + } + + if (!arg[2]) { /* "--" */ + if (!(ctx->flags & PARSE_OPT_KEEP_DASHDASH)) { + ctx->argc--; + ctx->argv++; + } + break; + } + + if (internal_help && !strcmp(arg + 2, "help-all")) + return usage_with_options_internal(usagestr, options, 1); + if (internal_help && !strcmp(arg + 2, "help")) + return parse_options_usage(usagestr, options); + switch (parse_long_opt(ctx, arg + 2, options)) { + case -1: + return parse_options_usage(usagestr, options); + case -2: + goto unknown; + } + continue; +unknown: + if (!(ctx->flags & PARSE_OPT_KEEP_UNKNOWN)) + return PARSE_OPT_UNKNOWN; + ctx->out[ctx->cpidx++] = ctx->argv[0]; + ctx->opt = NULL; + } + return PARSE_OPT_DONE; +} + +int parse_options_end(struct parse_opt_ctx_t *ctx) +{ + memmove(ctx->out + ctx->cpidx, ctx->argv, ctx->argc * sizeof(*ctx->out)); + ctx->out[ctx->cpidx + ctx->argc] = NULL; + return ctx->cpidx + ctx->argc; +} + +int parse_options(int argc, const char **argv, const struct option *options, + const char * const usagestr[], int flags) +{ + struct parse_opt_ctx_t ctx; + + parse_options_start(&ctx, argc, argv, flags); + switch (parse_options_step(&ctx, options, usagestr)) { + case PARSE_OPT_HELP: + exit(129); + case PARSE_OPT_DONE: + break; + default: /* PARSE_OPT_UNKNOWN */ + if (ctx.argv[0][1] == '-') { + error("unknown option `%s'", ctx.argv[0] + 2); + } else { + error("unknown switch `%c'", *ctx.opt); + } + usage_with_options(usagestr, options); + } + + return parse_options_end(&ctx); +} + +#define USAGE_OPTS_WIDTH 24 +#define USAGE_GAP 2 + +int usage_with_options_internal(const char * const *usagestr, + const struct option *opts, int full) +{ + if (!usagestr) + return PARSE_OPT_HELP; + + fprintf(stderr, "usage: %s\n", *usagestr++); + while (*usagestr && **usagestr) + fprintf(stderr, " or: %s\n", *usagestr++); + while (*usagestr) { + fprintf(stderr, "%s%s\n", + **usagestr ? " " : "", + *usagestr); + usagestr++; + } + + if (opts->type != OPTION_GROUP) + fputc('\n', stderr); + + for (; opts->type != OPTION_END; opts++) { + size_t pos; + int pad; + + if (opts->type == OPTION_GROUP) { + fputc('\n', stderr); + if (*opts->help) + fprintf(stderr, "%s\n", opts->help); + continue; + } + if (!full && (opts->flags & PARSE_OPT_HIDDEN)) + continue; + + pos = fprintf(stderr, " "); + if (opts->short_name) + pos += fprintf(stderr, "-%c", opts->short_name); + if (opts->long_name && opts->short_name) + pos += fprintf(stderr, ", "); + if (opts->long_name) + pos += fprintf(stderr, "--%s", opts->long_name); + + switch (opts->type) { + case OPTION_ARGUMENT: + break; + case OPTION_INTEGER: + if (opts->flags & PARSE_OPT_OPTARG) + if (opts->long_name) + pos += fprintf(stderr, "[=]"); + else + pos += fprintf(stderr, "[]"); + else + pos += fprintf(stderr, " "); + break; + case OPTION_CALLBACK: + if (opts->flags & PARSE_OPT_NOARG) + break; + /* FALLTHROUGH */ + case OPTION_STRING: + if (opts->argh) { + if (opts->flags & PARSE_OPT_OPTARG) + if (opts->long_name) + pos += fprintf(stderr, "[=<%s>]", opts->argh); + else + pos += fprintf(stderr, "[<%s>]", opts->argh); + else + pos += fprintf(stderr, " <%s>", opts->argh); + } else { + if (opts->flags & PARSE_OPT_OPTARG) + if (opts->long_name) + pos += fprintf(stderr, "[=...]"); + else + pos += fprintf(stderr, "[...]"); + else + pos += fprintf(stderr, " ..."); + } + break; + default: /* OPTION_{BIT,BOOLEAN,SET_INT,SET_PTR} */ + break; + } + + if (pos <= USAGE_OPTS_WIDTH) + pad = USAGE_OPTS_WIDTH - pos; + else { + fputc('\n', stderr); + pad = USAGE_OPTS_WIDTH; + } + fprintf(stderr, "%*s%s\n", pad + USAGE_GAP, "", opts->help); + } + fputc('\n', stderr); + + return PARSE_OPT_HELP; +} + +void usage_with_options(const char * const *usagestr, + const struct option *opts) +{ + usage_with_options_internal(usagestr, opts, 0); + exit(129); +} + +int parse_options_usage(const char * const *usagestr, + const struct option *opts) +{ + return usage_with_options_internal(usagestr, opts, 0); +} + + +int parse_opt_verbosity_cb(const struct option *opt, const char *arg, + int unset) +{ + int *target = opt->value; + + if (unset) + /* --no-quiet, --no-verbose */ + *target = 0; + else if (opt->short_name == 'v') { + if (*target >= 0) + (*target)++; + else + *target = 1; + } else { + if (*target <= 0) + (*target)--; + else + *target = -1; + } + return 0; +} diff --git a/Documentation/perf_counter/util/parse-options.h b/Documentation/perf_counter/util/parse-options.h new file mode 100644 index 0000000..a81c7fa --- /dev/null +++ b/Documentation/perf_counter/util/parse-options.h @@ -0,0 +1,172 @@ +#ifndef PARSE_OPTIONS_H +#define PARSE_OPTIONS_H + +enum parse_opt_type { + /* special types */ + OPTION_END, + OPTION_ARGUMENT, + OPTION_GROUP, + /* options with no arguments */ + OPTION_BIT, + OPTION_BOOLEAN, /* _INCR would have been a better name */ + OPTION_SET_INT, + OPTION_SET_PTR, + /* options with arguments (usually) */ + OPTION_STRING, + OPTION_INTEGER, + OPTION_CALLBACK, +}; + +enum parse_opt_flags { + PARSE_OPT_KEEP_DASHDASH = 1, + PARSE_OPT_STOP_AT_NON_OPTION = 2, + PARSE_OPT_KEEP_ARGV0 = 4, + PARSE_OPT_KEEP_UNKNOWN = 8, + PARSE_OPT_NO_INTERNAL_HELP = 16, +}; + +enum parse_opt_option_flags { + PARSE_OPT_OPTARG = 1, + PARSE_OPT_NOARG = 2, + PARSE_OPT_NONEG = 4, + PARSE_OPT_HIDDEN = 8, + PARSE_OPT_LASTARG_DEFAULT = 16, +}; + +struct option; +typedef int parse_opt_cb(const struct option *, const char *arg, int unset); + +/* + * `type`:: + * holds the type of the option, you must have an OPTION_END last in your + * array. + * + * `short_name`:: + * the character to use as a short option name, '\0' if none. + * + * `long_name`:: + * the long option name, without the leading dashes, NULL if none. + * + * `value`:: + * stores pointers to the values to be filled. + * + * `argh`:: + * token to explain the kind of argument this option wants. Keep it + * homogenous across the repository. + * + * `help`:: + * the short help associated to what the option does. + * Must never be NULL (except for OPTION_END). + * OPTION_GROUP uses this pointer to store the group header. + * + * `flags`:: + * mask of parse_opt_option_flags. + * PARSE_OPT_OPTARG: says that the argument is optionnal (not for BOOLEANs) + * PARSE_OPT_NOARG: says that this option takes no argument, for CALLBACKs + * PARSE_OPT_NONEG: says that this option cannot be negated + * PARSE_OPT_HIDDEN this option is skipped in the default usage, showed in + * the long one. + * + * `callback`:: + * pointer to the callback to use for OPTION_CALLBACK. + * + * `defval`:: + * default value to fill (*->value) with for PARSE_OPT_OPTARG. + * OPTION_{BIT,SET_INT,SET_PTR} store the {mask,integer,pointer} to put in + * the value when met. + * CALLBACKS can use it like they want. + */ +struct option { + enum parse_opt_type type; + int short_name; + const char *long_name; + void *value; + const char *argh; + const char *help; + + int flags; + parse_opt_cb *callback; + intptr_t defval; +}; + +#define OPT_END() { OPTION_END } +#define OPT_ARGUMENT(l, h) { OPTION_ARGUMENT, 0, (l), NULL, NULL, (h) } +#define OPT_GROUP(h) { OPTION_GROUP, 0, NULL, NULL, NULL, (h) } +#define OPT_BIT(s, l, v, h, b) { OPTION_BIT, (s), (l), (v), NULL, (h), 0, NULL, (b) } +#define OPT_BOOLEAN(s, l, v, h) { OPTION_BOOLEAN, (s), (l), (v), NULL, (h) } +#define OPT_SET_INT(s, l, v, h, i) { OPTION_SET_INT, (s), (l), (v), NULL, (h), 0, NULL, (i) } +#define OPT_SET_PTR(s, l, v, h, p) { OPTION_SET_PTR, (s), (l), (v), NULL, (h), 0, NULL, (p) } +#define OPT_INTEGER(s, l, v, h) { OPTION_INTEGER, (s), (l), (v), NULL, (h) } +#define OPT_STRING(s, l, v, a, h) { OPTION_STRING, (s), (l), (v), (a), (h) } +#define OPT_DATE(s, l, v, h) \ + { OPTION_CALLBACK, (s), (l), (v), "time",(h), 0, \ + parse_opt_approxidate_cb } +#define OPT_CALLBACK(s, l, v, a, h, f) \ + { OPTION_CALLBACK, (s), (l), (v), (a), (h), 0, (f) } + +/* parse_options() will filter out the processed options and leave the + * non-option argments in argv[]. + * Returns the number of arguments left in argv[]. + */ +extern int parse_options(int argc, const char **argv, + const struct option *options, + const char * const usagestr[], int flags); + +extern NORETURN void usage_with_options(const char * const *usagestr, + const struct option *options); + +/*----- incremantal advanced APIs -----*/ + +enum { + PARSE_OPT_HELP = -1, + PARSE_OPT_DONE, + PARSE_OPT_UNKNOWN, +}; + +/* + * It's okay for the caller to consume argv/argc in the usual way. + * Other fields of that structure are private to parse-options and should not + * be modified in any way. + */ +struct parse_opt_ctx_t { + const char **argv; + const char **out; + int argc, cpidx; + const char *opt; + int flags; +}; + +extern int parse_options_usage(const char * const *usagestr, + const struct option *opts); + +extern void parse_options_start(struct parse_opt_ctx_t *ctx, + int argc, const char **argv, int flags); + +extern int parse_options_step(struct parse_opt_ctx_t *ctx, + const struct option *options, + const char * const usagestr[]); + +extern int parse_options_end(struct parse_opt_ctx_t *ctx); + + +/*----- some often used options -----*/ +extern int parse_opt_abbrev_cb(const struct option *, const char *, int); +extern int parse_opt_approxidate_cb(const struct option *, const char *, int); +extern int parse_opt_verbosity_cb(const struct option *, const char *, int); + +#define OPT__VERBOSE(var) OPT_BOOLEAN('v', "verbose", (var), "be verbose") +#define OPT__QUIET(var) OPT_BOOLEAN('q', "quiet", (var), "be quiet") +#define OPT__VERBOSITY(var) \ + { OPTION_CALLBACK, 'v', "verbose", (var), NULL, "be more verbose", \ + PARSE_OPT_NOARG, &parse_opt_verbosity_cb, 0 }, \ + { OPTION_CALLBACK, 'q', "quiet", (var), NULL, "be more quiet", \ + PARSE_OPT_NOARG, &parse_opt_verbosity_cb, 0 } +#define OPT__DRY_RUN(var) OPT_BOOLEAN('n', "dry-run", (var), "dry run") +#define OPT__ABBREV(var) \ + { OPTION_CALLBACK, 0, "abbrev", (var), "n", \ + "use digits to display SHA-1s", \ + PARSE_OPT_OPTARG, &parse_opt_abbrev_cb, 0 } + +extern const char *parse_options_fix_filename(const char *prefix, const char *file); + +#endif diff --git a/Documentation/perf_counter/util/path.c b/Documentation/perf_counter/util/path.c new file mode 100644 index 0000000..a501a40 --- /dev/null +++ b/Documentation/perf_counter/util/path.c @@ -0,0 +1,353 @@ +/* + * I'm tired of doing "vsnprintf()" etc just to open a + * file, so here's a "return static buffer with printf" + * interface for paths. + * + * It's obviously not thread-safe. Sue me. But it's quite + * useful for doing things like + * + * f = open(mkpath("%s/%s.perf", base, name), O_RDONLY); + * + * which is what it's designed for. + */ +#include "cache.h" + +static char bad_path[] = "/bad-path/"; +/* + * Two hacks: + */ + +static char *get_perf_dir(void) +{ + return "."; +} + +size_t strlcpy(char *dest, const char *src, size_t size) +{ + size_t ret = strlen(src); + + if (size) { + size_t len = (ret >= size) ? size - 1 : ret; + memcpy(dest, src, len); + dest[len] = '\0'; + } + return ret; +} + + +static char *get_pathname(void) +{ + static char pathname_array[4][PATH_MAX]; + static int index; + return pathname_array[3 & ++index]; +} + +static char *cleanup_path(char *path) +{ + /* Clean it up */ + if (!memcmp(path, "./", 2)) { + path += 2; + while (*path == '/') + path++; + } + return path; +} + +char *mksnpath(char *buf, size_t n, const char *fmt, ...) +{ + va_list args; + unsigned len; + + va_start(args, fmt); + len = vsnprintf(buf, n, fmt, args); + va_end(args); + if (len >= n) { + strlcpy(buf, bad_path, n); + return buf; + } + return cleanup_path(buf); +} + +static char *perf_vsnpath(char *buf, size_t n, const char *fmt, va_list args) +{ + const char *perf_dir = get_perf_dir(); + size_t len; + + len = strlen(perf_dir); + if (n < len + 1) + goto bad; + memcpy(buf, perf_dir, len); + if (len && !is_dir_sep(perf_dir[len-1])) + buf[len++] = '/'; + len += vsnprintf(buf + len, n - len, fmt, args); + if (len >= n) + goto bad; + return cleanup_path(buf); +bad: + strlcpy(buf, bad_path, n); + return buf; +} + +char *perf_snpath(char *buf, size_t n, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + (void)perf_vsnpath(buf, n, fmt, args); + va_end(args); + return buf; +} + +char *perf_pathdup(const char *fmt, ...) +{ + char path[PATH_MAX]; + va_list args; + va_start(args, fmt); + (void)perf_vsnpath(path, sizeof(path), fmt, args); + va_end(args); + return xstrdup(path); +} + +char *mkpath(const char *fmt, ...) +{ + va_list args; + unsigned len; + char *pathname = get_pathname(); + + va_start(args, fmt); + len = vsnprintf(pathname, PATH_MAX, fmt, args); + va_end(args); + if (len >= PATH_MAX) + return bad_path; + return cleanup_path(pathname); +} + +char *perf_path(const char *fmt, ...) +{ + const char *perf_dir = get_perf_dir(); + char *pathname = get_pathname(); + va_list args; + unsigned len; + + len = strlen(perf_dir); + if (len > PATH_MAX-100) + return bad_path; + memcpy(pathname, perf_dir, len); + if (len && perf_dir[len-1] != '/') + pathname[len++] = '/'; + va_start(args, fmt); + len += vsnprintf(pathname + len, PATH_MAX - len, fmt, args); + va_end(args); + if (len >= PATH_MAX) + return bad_path; + return cleanup_path(pathname); +} + + +/* perf_mkstemp() - create tmp file honoring TMPDIR variable */ +int perf_mkstemp(char *path, size_t len, const char *template) +{ + const char *tmp; + size_t n; + + tmp = getenv("TMPDIR"); + if (!tmp) + tmp = "/tmp"; + n = snprintf(path, len, "%s/%s", tmp, template); + if (len <= n) { + errno = ENAMETOOLONG; + return -1; + } + return mkstemp(path); +} + + +const char *make_relative_path(const char *abs, const char *base) +{ + static char buf[PATH_MAX + 1]; + int baselen; + if (!base) + return abs; + baselen = strlen(base); + if (prefixcmp(abs, base)) + return abs; + if (abs[baselen] == '/') + baselen++; + else if (base[baselen - 1] != '/') + return abs; + strcpy(buf, abs + baselen); + return buf; +} + +/* + * It is okay if dst == src, but they should not overlap otherwise. + * + * Performs the following normalizations on src, storing the result in dst: + * - Ensures that components are separated by '/' (Windows only) + * - Squashes sequences of '/'. + * - Removes "." components. + * - Removes ".." components, and the components the precede them. + * Returns failure (non-zero) if a ".." component appears as first path + * component anytime during the normalization. Otherwise, returns success (0). + * + * Note that this function is purely textual. It does not follow symlinks, + * verify the existence of the path, or make any system calls. + */ +int normalize_path_copy(char *dst, const char *src) +{ + char *dst0; + + if (has_dos_drive_prefix(src)) { + *dst++ = *src++; + *dst++ = *src++; + } + dst0 = dst; + + if (is_dir_sep(*src)) { + *dst++ = '/'; + while (is_dir_sep(*src)) + src++; + } + + for (;;) { + char c = *src; + + /* + * A path component that begins with . could be + * special: + * (1) "." and ends -- ignore and terminate. + * (2) "./" -- ignore them, eat slash and continue. + * (3) ".." and ends -- strip one and terminate. + * (4) "../" -- strip one, eat slash and continue. + */ + if (c == '.') { + if (!src[1]) { + /* (1) */ + src++; + } else if (is_dir_sep(src[1])) { + /* (2) */ + src += 2; + while (is_dir_sep(*src)) + src++; + continue; + } else if (src[1] == '.') { + if (!src[2]) { + /* (3) */ + src += 2; + goto up_one; + } else if (is_dir_sep(src[2])) { + /* (4) */ + src += 3; + while (is_dir_sep(*src)) + src++; + goto up_one; + } + } + } + + /* copy up to the next '/', and eat all '/' */ + while ((c = *src++) != '\0' && !is_dir_sep(c)) + *dst++ = c; + if (is_dir_sep(c)) { + *dst++ = '/'; + while (is_dir_sep(c)) + c = *src++; + src--; + } else if (!c) + break; + continue; + + up_one: + /* + * dst0..dst is prefix portion, and dst[-1] is '/'; + * go up one level. + */ + dst--; /* go to trailing '/' */ + if (dst <= dst0) + return -1; + /* Windows: dst[-1] cannot be backslash anymore */ + while (dst0 < dst && dst[-1] != '/') + dst--; + } + *dst = '\0'; + return 0; +} + +/* + * path = Canonical absolute path + * prefix_list = Colon-separated list of absolute paths + * + * Determines, for each path in prefix_list, whether the "prefix" really + * is an ancestor directory of path. Returns the length of the longest + * ancestor directory, excluding any trailing slashes, or -1 if no prefix + * is an ancestor. (Note that this means 0 is returned if prefix_list is + * "/".) "/foo" is not considered an ancestor of "/foobar". Directories + * are not considered to be their own ancestors. path must be in a + * canonical form: empty components, or "." or ".." components are not + * allowed. prefix_list may be null, which is like "". + */ +int longest_ancestor_length(const char *path, const char *prefix_list) +{ + char buf[PATH_MAX+1]; + const char *ceil, *colon; + int len, max_len = -1; + + if (prefix_list == NULL || !strcmp(path, "/")) + return -1; + + for (colon = ceil = prefix_list; *colon; ceil = colon+1) { + for (colon = ceil; *colon && *colon != PATH_SEP; colon++); + len = colon - ceil; + if (len == 0 || len > PATH_MAX || !is_absolute_path(ceil)) + continue; + strlcpy(buf, ceil, len+1); + if (normalize_path_copy(buf, buf) < 0) + continue; + len = strlen(buf); + if (len > 0 && buf[len-1] == '/') + buf[--len] = '\0'; + + if (!strncmp(path, buf, len) && + path[len] == '/' && + len > max_len) { + max_len = len; + } + } + + return max_len; +} + +/* strip arbitrary amount of directory separators at end of path */ +static inline int chomp_trailing_dir_sep(const char *path, int len) +{ + while (len && is_dir_sep(path[len - 1])) + len--; + return len; +} + +/* + * If path ends with suffix (complete path components), returns the + * part before suffix (sans trailing directory separators). + * Otherwise returns NULL. + */ +char *strip_path_suffix(const char *path, const char *suffix) +{ + int path_len = strlen(path), suffix_len = strlen(suffix); + + while (suffix_len) { + if (!path_len) + return NULL; + + if (is_dir_sep(path[path_len - 1])) { + if (!is_dir_sep(suffix[suffix_len - 1])) + return NULL; + path_len = chomp_trailing_dir_sep(path, path_len); + suffix_len = chomp_trailing_dir_sep(suffix, suffix_len); + } + else if (path[--path_len] != suffix[--suffix_len]) + return NULL; + } + + if (path_len && !is_dir_sep(path[path_len - 1])) + return NULL; + return xstrndup(path, chomp_trailing_dir_sep(path, path_len)); +} diff --git a/Documentation/perf_counter/util/quote.c b/Documentation/perf_counter/util/quote.c new file mode 100644 index 0000000..7a49fcf --- /dev/null +++ b/Documentation/perf_counter/util/quote.c @@ -0,0 +1,478 @@ +#include "cache.h" +#include "quote.h" + +int quote_path_fully = 1; + +/* Help to copy the thing properly quoted for the shell safety. + * any single quote is replaced with '\'', any exclamation point + * is replaced with '\!', and the whole thing is enclosed in a + * + * E.g. + * original sq_quote result + * name ==> name ==> 'name' + * a b ==> a b ==> 'a b' + * a'b ==> a'\''b ==> 'a'\''b' + * a!b ==> a'\!'b ==> 'a'\!'b' + */ +static inline int need_bs_quote(char c) +{ + return (c == '\'' || c == '!'); +} + +void sq_quote_buf(struct strbuf *dst, const char *src) +{ + char *to_free = NULL; + + if (dst->buf == src) + to_free = strbuf_detach(dst, NULL); + + strbuf_addch(dst, '\''); + while (*src) { + size_t len = strcspn(src, "'!"); + strbuf_add(dst, src, len); + src += len; + while (need_bs_quote(*src)) { + strbuf_addstr(dst, "'\\"); + strbuf_addch(dst, *src++); + strbuf_addch(dst, '\''); + } + } + strbuf_addch(dst, '\''); + free(to_free); +} + +void sq_quote_print(FILE *stream, const char *src) +{ + char c; + + fputc('\'', stream); + while ((c = *src++)) { + if (need_bs_quote(c)) { + fputs("'\\", stream); + fputc(c, stream); + fputc('\'', stream); + } else { + fputc(c, stream); + } + } + fputc('\'', stream); +} + +void sq_quote_argv(struct strbuf *dst, const char** argv, size_t maxlen) +{ + int i; + + /* Copy into destination buffer. */ + strbuf_grow(dst, 255); + for (i = 0; argv[i]; ++i) { + strbuf_addch(dst, ' '); + sq_quote_buf(dst, argv[i]); + if (maxlen && dst->len > maxlen) + die("Too many or long arguments"); + } +} + +char *sq_dequote_step(char *arg, char **next) +{ + char *dst = arg; + char *src = arg; + char c; + + if (*src != '\'') + return NULL; + for (;;) { + c = *++src; + if (!c) + return NULL; + if (c != '\'') { + *dst++ = c; + continue; + } + /* We stepped out of sq */ + switch (*++src) { + case '\0': + *dst = 0; + if (next) + *next = NULL; + return arg; + case '\\': + c = *++src; + if (need_bs_quote(c) && *++src == '\'') { + *dst++ = c; + continue; + } + /* Fallthrough */ + default: + if (!next || !isspace(*src)) + return NULL; + do { + c = *++src; + } while (isspace(c)); + *dst = 0; + *next = src; + return arg; + } + } +} + +char *sq_dequote(char *arg) +{ + return sq_dequote_step(arg, NULL); +} + +int sq_dequote_to_argv(char *arg, const char ***argv, int *nr, int *alloc) +{ + char *next = arg; + + if (!*arg) + return 0; + do { + char *dequoted = sq_dequote_step(next, &next); + if (!dequoted) + return -1; + ALLOC_GROW(*argv, *nr + 1, *alloc); + (*argv)[(*nr)++] = dequoted; + } while (next); + + return 0; +} + +/* 1 means: quote as octal + * 0 means: quote as octal if (quote_path_fully) + * -1 means: never quote + * c: quote as "\\c" + */ +#define X8(x) x, x, x, x, x, x, x, x +#define X16(x) X8(x), X8(x) +static signed char const sq_lookup[256] = { + /* 0 1 2 3 4 5 6 7 */ + /* 0x00 */ 1, 1, 1, 1, 1, 1, 1, 'a', + /* 0x08 */ 'b', 't', 'n', 'v', 'f', 'r', 1, 1, + /* 0x10 */ X16(1), + /* 0x20 */ -1, -1, '"', -1, -1, -1, -1, -1, + /* 0x28 */ X16(-1), X16(-1), X16(-1), + /* 0x58 */ -1, -1, -1, -1,'\\', -1, -1, -1, + /* 0x60 */ X16(-1), X8(-1), + /* 0x78 */ -1, -1, -1, -1, -1, -1, -1, 1, + /* 0x80 */ /* set to 0 */ +}; + +static inline int sq_must_quote(char c) +{ + return sq_lookup[(unsigned char)c] + quote_path_fully > 0; +} + +/* returns the longest prefix not needing a quote up to maxlen if positive. + This stops at the first \0 because it's marked as a character needing an + escape */ +static size_t next_quote_pos(const char *s, ssize_t maxlen) +{ + size_t len; + if (maxlen < 0) { + for (len = 0; !sq_must_quote(s[len]); len++); + } else { + for (len = 0; len < maxlen && !sq_must_quote(s[len]); len++); + } + return len; +} + +/* + * C-style name quoting. + * + * (1) if sb and fp are both NULL, inspect the input name and counts the + * number of bytes that are needed to hold c_style quoted version of name, + * counting the double quotes around it but not terminating NUL, and + * returns it. + * However, if name does not need c_style quoting, it returns 0. + * + * (2) if sb or fp are not NULL, it emits the c_style quoted version + * of name, enclosed with double quotes if asked and needed only. + * Return value is the same as in (1). + */ +static size_t quote_c_style_counted(const char *name, ssize_t maxlen, + struct strbuf *sb, FILE *fp, int no_dq) +{ +#undef EMIT +#define EMIT(c) \ + do { \ + if (sb) strbuf_addch(sb, (c)); \ + if (fp) fputc((c), fp); \ + count++; \ + } while (0) +#define EMITBUF(s, l) \ + do { \ + if (sb) strbuf_add(sb, (s), (l)); \ + if (fp) fwrite((s), (l), 1, fp); \ + count += (l); \ + } while (0) + + size_t len, count = 0; + const char *p = name; + + for (;;) { + int ch; + + len = next_quote_pos(p, maxlen); + if (len == maxlen || !p[len]) + break; + + if (!no_dq && p == name) + EMIT('"'); + + EMITBUF(p, len); + EMIT('\\'); + p += len; + ch = (unsigned char)*p++; + if (sq_lookup[ch] >= ' ') { + EMIT(sq_lookup[ch]); + } else { + EMIT(((ch >> 6) & 03) + '0'); + EMIT(((ch >> 3) & 07) + '0'); + EMIT(((ch >> 0) & 07) + '0'); + } + } + + EMITBUF(p, len); + if (p == name) /* no ending quote needed */ + return 0; + + if (!no_dq) + EMIT('"'); + return count; +} + +size_t quote_c_style(const char *name, struct strbuf *sb, FILE *fp, int nodq) +{ + return quote_c_style_counted(name, -1, sb, fp, nodq); +} + +void quote_two_c_style(struct strbuf *sb, const char *prefix, const char *path, int nodq) +{ + if (quote_c_style(prefix, NULL, NULL, 0) || + quote_c_style(path, NULL, NULL, 0)) { + if (!nodq) + strbuf_addch(sb, '"'); + quote_c_style(prefix, sb, NULL, 1); + quote_c_style(path, sb, NULL, 1); + if (!nodq) + strbuf_addch(sb, '"'); + } else { + strbuf_addstr(sb, prefix); + strbuf_addstr(sb, path); + } +} + +void write_name_quoted(const char *name, FILE *fp, int terminator) +{ + if (terminator) { + quote_c_style(name, NULL, fp, 0); + } else { + fputs(name, fp); + } + fputc(terminator, fp); +} + +extern void write_name_quotedpfx(const char *pfx, size_t pfxlen, + const char *name, FILE *fp, int terminator) +{ + int needquote = 0; + + if (terminator) { + needquote = next_quote_pos(pfx, pfxlen) < pfxlen + || name[next_quote_pos(name, -1)]; + } + if (needquote) { + fputc('"', fp); + quote_c_style_counted(pfx, pfxlen, NULL, fp, 1); + quote_c_style(name, NULL, fp, 1); + fputc('"', fp); + } else { + fwrite(pfx, pfxlen, 1, fp); + fputs(name, fp); + } + fputc(terminator, fp); +} + +/* quote path as relative to the given prefix */ +char *quote_path_relative(const char *in, int len, + struct strbuf *out, const char *prefix) +{ + int needquote; + + if (len < 0) + len = strlen(in); + + /* "../" prefix itself does not need quoting, but "in" might. */ + needquote = next_quote_pos(in, len) < len; + strbuf_setlen(out, 0); + strbuf_grow(out, len); + + if (needquote) + strbuf_addch(out, '"'); + if (prefix) { + int off = 0; + while (prefix[off] && off < len && prefix[off] == in[off]) + if (prefix[off] == '/') { + prefix += off + 1; + in += off + 1; + len -= off + 1; + off = 0; + } else + off++; + + for (; *prefix; prefix++) + if (*prefix == '/') + strbuf_addstr(out, "../"); + } + + quote_c_style_counted (in, len, out, NULL, 1); + + if (needquote) + strbuf_addch(out, '"'); + if (!out->len) + strbuf_addstr(out, "./"); + + return out->buf; +} + +/* + * C-style name unquoting. + * + * Quoted should point at the opening double quote. + * + Returns 0 if it was able to unquote the string properly, and appends the + * result in the strbuf `sb'. + * + Returns -1 in case of error, and doesn't touch the strbuf. Though note + * that this function will allocate memory in the strbuf, so calling + * strbuf_release is mandatory whichever result unquote_c_style returns. + * + * Updates endp pointer to point at one past the ending double quote if given. + */ +int unquote_c_style(struct strbuf *sb, const char *quoted, const char **endp) +{ + size_t oldlen = sb->len, len; + int ch, ac; + + if (*quoted++ != '"') + return -1; + + for (;;) { + len = strcspn(quoted, "\"\\"); + strbuf_add(sb, quoted, len); + quoted += len; + + switch (*quoted++) { + case '"': + if (endp) + *endp = quoted; + return 0; + case '\\': + break; + default: + goto error; + } + + switch ((ch = *quoted++)) { + case 'a': ch = '\a'; break; + case 'b': ch = '\b'; break; + case 'f': ch = '\f'; break; + case 'n': ch = '\n'; break; + case 'r': ch = '\r'; break; + case 't': ch = '\t'; break; + case 'v': ch = '\v'; break; + + case '\\': case '"': + break; /* verbatim */ + + /* octal values with first digit over 4 overflow */ + case '0': case '1': case '2': case '3': + ac = ((ch - '0') << 6); + if ((ch = *quoted++) < '0' || '7' < ch) + goto error; + ac |= ((ch - '0') << 3); + if ((ch = *quoted++) < '0' || '7' < ch) + goto error; + ac |= (ch - '0'); + ch = ac; + break; + default: + goto error; + } + strbuf_addch(sb, ch); + } + + error: + strbuf_setlen(sb, oldlen); + return -1; +} + +/* quoting as a string literal for other languages */ + +void perl_quote_print(FILE *stream, const char *src) +{ + const char sq = '\''; + const char bq = '\\'; + char c; + + fputc(sq, stream); + while ((c = *src++)) { + if (c == sq || c == bq) + fputc(bq, stream); + fputc(c, stream); + } + fputc(sq, stream); +} + +void python_quote_print(FILE *stream, const char *src) +{ + const char sq = '\''; + const char bq = '\\'; + const char nl = '\n'; + char c; + + fputc(sq, stream); + while ((c = *src++)) { + if (c == nl) { + fputc(bq, stream); + fputc('n', stream); + continue; + } + if (c == sq || c == bq) + fputc(bq, stream); + fputc(c, stream); + } + fputc(sq, stream); +} + +void tcl_quote_print(FILE *stream, const char *src) +{ + char c; + + fputc('"', stream); + while ((c = *src++)) { + switch (c) { + case '[': case ']': + case '{': case '}': + case '$': case '\\': case '"': + fputc('\\', stream); + default: + fputc(c, stream); + break; + case '\f': + fputs("\\f", stream); + break; + case '\r': + fputs("\\r", stream); + break; + case '\n': + fputs("\\n", stream); + break; + case '\t': + fputs("\\t", stream); + break; + case '\v': + fputs("\\v", stream); + break; + } + } + fputc('"', stream); +} diff --git a/Documentation/perf_counter/util/quote.h b/Documentation/perf_counter/util/quote.h new file mode 100644 index 0000000..5dfad89 --- /dev/null +++ b/Documentation/perf_counter/util/quote.h @@ -0,0 +1,68 @@ +#ifndef QUOTE_H +#define QUOTE_H + +#include +#include + +/* Help to copy the thing properly quoted for the shell safety. + * any single quote is replaced with '\'', any exclamation point + * is replaced with '\!', and the whole thing is enclosed in a + * single quote pair. + * + * For example, if you are passing the result to system() as an + * argument: + * + * sprintf(cmd, "foobar %s %s", sq_quote(arg0), sq_quote(arg1)) + * + * would be appropriate. If the system() is going to call ssh to + * run the command on the other side: + * + * sprintf(cmd, "git-diff-tree %s %s", sq_quote(arg0), sq_quote(arg1)); + * sprintf(rcmd, "ssh %s %s", sq_util/quote.host), sq_quote(cmd)); + * + * Note that the above examples leak memory! Remember to free result from + * sq_quote() in a real application. + * + * sq_quote_buf() writes to an existing buffer of specified size; it + * will return the number of characters that would have been written + * excluding the final null regardless of the buffer size. + */ + +extern void sq_quote_print(FILE *stream, const char *src); + +extern void sq_quote_buf(struct strbuf *, const char *src); +extern void sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen); + +/* This unwraps what sq_quote() produces in place, but returns + * NULL if the input does not look like what sq_quote would have + * produced. + */ +extern char *sq_dequote(char *); + +/* + * Same as the above, but can be used to unwrap many arguments in the + * same string separated by space. "next" is changed to point to the + * next argument that should be passed as first parameter. When there + * is no more argument to be dequoted, "next" is updated to point to NULL. + */ +extern char *sq_dequote_step(char *arg, char **next); +extern int sq_dequote_to_argv(char *arg, const char ***argv, int *nr, int *alloc); + +extern int unquote_c_style(struct strbuf *, const char *quoted, const char **endp); +extern size_t quote_c_style(const char *name, struct strbuf *, FILE *, int no_dq); +extern void quote_two_c_style(struct strbuf *, const char *, const char *, int); + +extern void write_name_quoted(const char *name, FILE *, int terminator); +extern void write_name_quotedpfx(const char *pfx, size_t pfxlen, + const char *name, FILE *, int terminator); + +/* quote path as relative to the given prefix */ +char *quote_path_relative(const char *in, int len, + struct strbuf *out, const char *prefix); + +/* quoting as a string literal for other languages */ +extern void perl_quote_print(FILE *stream, const char *src); +extern void python_quote_print(FILE *stream, const char *src); +extern void tcl_quote_print(FILE *stream, const char *src); + +#endif diff --git a/Documentation/perf_counter/util/run-command.c b/Documentation/perf_counter/util/run-command.c new file mode 100644 index 0000000..b2f5e85 --- /dev/null +++ b/Documentation/perf_counter/util/run-command.c @@ -0,0 +1,395 @@ +#include "cache.h" +#include "run-command.h" +#include "exec_cmd.h" + +static inline void close_pair(int fd[2]) +{ + close(fd[0]); + close(fd[1]); +} + +static inline void dup_devnull(int to) +{ + int fd = open("/dev/null", O_RDWR); + dup2(fd, to); + close(fd); +} + +int start_command(struct child_process *cmd) +{ + int need_in, need_out, need_err; + int fdin[2], fdout[2], fderr[2]; + + /* + * In case of errors we must keep the promise to close FDs + * that have been passed in via ->in and ->out. + */ + + need_in = !cmd->no_stdin && cmd->in < 0; + if (need_in) { + if (pipe(fdin) < 0) { + if (cmd->out > 0) + close(cmd->out); + return -ERR_RUN_COMMAND_PIPE; + } + cmd->in = fdin[1]; + } + + need_out = !cmd->no_stdout + && !cmd->stdout_to_stderr + && cmd->out < 0; + if (need_out) { + if (pipe(fdout) < 0) { + if (need_in) + close_pair(fdin); + else if (cmd->in) + close(cmd->in); + return -ERR_RUN_COMMAND_PIPE; + } + cmd->out = fdout[0]; + } + + need_err = !cmd->no_stderr && cmd->err < 0; + if (need_err) { + if (pipe(fderr) < 0) { + if (need_in) + close_pair(fdin); + else if (cmd->in) + close(cmd->in); + if (need_out) + close_pair(fdout); + else if (cmd->out) + close(cmd->out); + return -ERR_RUN_COMMAND_PIPE; + } + cmd->err = fderr[0]; + } + +#ifndef __MINGW32__ + fflush(NULL); + cmd->pid = fork(); + if (!cmd->pid) { + if (cmd->no_stdin) + dup_devnull(0); + else if (need_in) { + dup2(fdin[0], 0); + close_pair(fdin); + } else if (cmd->in) { + dup2(cmd->in, 0); + close(cmd->in); + } + + if (cmd->no_stderr) + dup_devnull(2); + else if (need_err) { + dup2(fderr[1], 2); + close_pair(fderr); + } + + if (cmd->no_stdout) + dup_devnull(1); + else if (cmd->stdout_to_stderr) + dup2(2, 1); + else if (need_out) { + dup2(fdout[1], 1); + close_pair(fdout); + } else if (cmd->out > 1) { + dup2(cmd->out, 1); + close(cmd->out); + } + + if (cmd->dir && chdir(cmd->dir)) + die("exec %s: cd to %s failed (%s)", cmd->argv[0], + cmd->dir, strerror(errno)); + if (cmd->env) { + for (; *cmd->env; cmd->env++) { + if (strchr(*cmd->env, '=')) + putenv((char*)*cmd->env); + else + unsetenv(*cmd->env); + } + } + if (cmd->preexec_cb) + cmd->preexec_cb(); + if (cmd->perf_cmd) { + execv_perf_cmd(cmd->argv); + } else { + execvp(cmd->argv[0], (char *const*) cmd->argv); + } + exit(127); + } +#else + int s0 = -1, s1 = -1, s2 = -1; /* backups of stdin, stdout, stderr */ + const char **sargv = cmd->argv; + char **env = environ; + + if (cmd->no_stdin) { + s0 = dup(0); + dup_devnull(0); + } else if (need_in) { + s0 = dup(0); + dup2(fdin[0], 0); + } else if (cmd->in) { + s0 = dup(0); + dup2(cmd->in, 0); + } + + if (cmd->no_stderr) { + s2 = dup(2); + dup_devnull(2); + } else if (need_err) { + s2 = dup(2); + dup2(fderr[1], 2); + } + + if (cmd->no_stdout) { + s1 = dup(1); + dup_devnull(1); + } else if (cmd->stdout_to_stderr) { + s1 = dup(1); + dup2(2, 1); + } else if (need_out) { + s1 = dup(1); + dup2(fdout[1], 1); + } else if (cmd->out > 1) { + s1 = dup(1); + dup2(cmd->out, 1); + } + + if (cmd->dir) + die("chdir in start_command() not implemented"); + if (cmd->env) { + env = copy_environ(); + for (; *cmd->env; cmd->env++) + env = env_setenv(env, *cmd->env); + } + + if (cmd->perf_cmd) { + cmd->argv = prepare_perf_cmd(cmd->argv); + } + + cmd->pid = mingw_spawnvpe(cmd->argv[0], cmd->argv, env); + + if (cmd->env) + free_environ(env); + if (cmd->perf_cmd) + free(cmd->argv); + + cmd->argv = sargv; + if (s0 >= 0) + dup2(s0, 0), close(s0); + if (s1 >= 0) + dup2(s1, 1), close(s1); + if (s2 >= 0) + dup2(s2, 2), close(s2); +#endif + + if (cmd->pid < 0) { + int err = errno; + if (need_in) + close_pair(fdin); + else if (cmd->in) + close(cmd->in); + if (need_out) + close_pair(fdout); + else if (cmd->out) + close(cmd->out); + if (need_err) + close_pair(fderr); + return err == ENOENT ? + -ERR_RUN_COMMAND_EXEC : + -ERR_RUN_COMMAND_FORK; + } + + if (need_in) + close(fdin[0]); + else if (cmd->in) + close(cmd->in); + + if (need_out) + close(fdout[1]); + else if (cmd->out) + close(cmd->out); + + if (need_err) + close(fderr[1]); + + return 0; +} + +static int wait_or_whine(pid_t pid) +{ + for (;;) { + int status, code; + pid_t waiting = waitpid(pid, &status, 0); + + if (waiting < 0) { + if (errno == EINTR) + continue; + error("waitpid failed (%s)", strerror(errno)); + return -ERR_RUN_COMMAND_WAITPID; + } + if (waiting != pid) + return -ERR_RUN_COMMAND_WAITPID_WRONG_PID; + if (WIFSIGNALED(status)) + return -ERR_RUN_COMMAND_WAITPID_SIGNAL; + + if (!WIFEXITED(status)) + return -ERR_RUN_COMMAND_WAITPID_NOEXIT; + code = WEXITSTATUS(status); + switch (code) { + case 127: + return -ERR_RUN_COMMAND_EXEC; + case 0: + return 0; + default: + return -code; + } + } +} + +int finish_command(struct child_process *cmd) +{ + return wait_or_whine(cmd->pid); +} + +int run_command(struct child_process *cmd) +{ + int code = start_command(cmd); + if (code) + return code; + return finish_command(cmd); +} + +static void prepare_run_command_v_opt(struct child_process *cmd, + const char **argv, + int opt) +{ + memset(cmd, 0, sizeof(*cmd)); + cmd->argv = argv; + cmd->no_stdin = opt & RUN_COMMAND_NO_STDIN ? 1 : 0; + cmd->perf_cmd = opt & RUN_PERF_CMD ? 1 : 0; + cmd->stdout_to_stderr = opt & RUN_COMMAND_STDOUT_TO_STDERR ? 1 : 0; +} + +int run_command_v_opt(const char **argv, int opt) +{ + struct child_process cmd; + prepare_run_command_v_opt(&cmd, argv, opt); + return run_command(&cmd); +} + +int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const char *const *env) +{ + struct child_process cmd; + prepare_run_command_v_opt(&cmd, argv, opt); + cmd.dir = dir; + cmd.env = env; + return run_command(&cmd); +} + +#ifdef __MINGW32__ +static __stdcall unsigned run_thread(void *data) +{ + struct async *async = data; + return async->proc(async->fd_for_proc, async->data); +} +#endif + +int start_async(struct async *async) +{ + int pipe_out[2]; + + if (pipe(pipe_out) < 0) + return error("cannot create pipe: %s", strerror(errno)); + async->out = pipe_out[0]; + +#ifndef __MINGW32__ + /* Flush stdio before fork() to avoid cloning buffers */ + fflush(NULL); + + async->pid = fork(); + if (async->pid < 0) { + error("fork (async) failed: %s", strerror(errno)); + close_pair(pipe_out); + return -1; + } + if (!async->pid) { + close(pipe_out[0]); + exit(!!async->proc(pipe_out[1], async->data)); + } + close(pipe_out[1]); +#else + async->fd_for_proc = pipe_out[1]; + async->tid = (HANDLE) _beginthreadex(NULL, 0, run_thread, async, 0, NULL); + if (!async->tid) { + error("cannot create thread: %s", strerror(errno)); + close_pair(pipe_out); + return -1; + } +#endif + return 0; +} + +int finish_async(struct async *async) +{ +#ifndef __MINGW32__ + int ret = 0; + + if (wait_or_whine(async->pid)) + ret = error("waitpid (async) failed"); +#else + DWORD ret = 0; + if (WaitForSingleObject(async->tid, INFINITE) != WAIT_OBJECT_0) + ret = error("waiting for thread failed: %lu", GetLastError()); + else if (!GetExitCodeThread(async->tid, &ret)) + ret = error("cannot get thread exit code: %lu", GetLastError()); + CloseHandle(async->tid); +#endif + return ret; +} + +int run_hook(const char *index_file, const char *name, ...) +{ + struct child_process hook; + const char **argv = NULL, *env[2]; + char index[PATH_MAX]; + va_list args; + int ret; + size_t i = 0, alloc = 0; + + if (access(perf_path("hooks/%s", name), X_OK) < 0) + return 0; + + va_start(args, name); + ALLOC_GROW(argv, i + 1, alloc); + argv[i++] = perf_path("hooks/%s", name); + while (argv[i-1]) { + ALLOC_GROW(argv, i + 1, alloc); + argv[i++] = va_arg(args, const char *); + } + va_end(args); + + memset(&hook, 0, sizeof(hook)); + hook.argv = argv; + hook.no_stdin = 1; + hook.stdout_to_stderr = 1; + if (index_file) { + snprintf(index, sizeof(index), "PERF_INDEX_FILE=%s", index_file); + env[0] = index; + env[1] = NULL; + hook.env = env; + } + + ret = start_command(&hook); + free(argv); + if (ret) { + warning("Could not spawn %s", argv[0]); + return ret; + } + ret = finish_command(&hook); + if (ret == -ERR_RUN_COMMAND_WAITPID_SIGNAL) + warning("%s exited due to uncaught signal", argv[0]); + + return ret; +} diff --git a/Documentation/perf_counter/util/run-command.h b/Documentation/perf_counter/util/run-command.h new file mode 100644 index 0000000..328289f --- /dev/null +++ b/Documentation/perf_counter/util/run-command.h @@ -0,0 +1,93 @@ +#ifndef RUN_COMMAND_H +#define RUN_COMMAND_H + +enum { + ERR_RUN_COMMAND_FORK = 10000, + ERR_RUN_COMMAND_EXEC, + ERR_RUN_COMMAND_PIPE, + ERR_RUN_COMMAND_WAITPID, + ERR_RUN_COMMAND_WAITPID_WRONG_PID, + ERR_RUN_COMMAND_WAITPID_SIGNAL, + ERR_RUN_COMMAND_WAITPID_NOEXIT, +}; +#define IS_RUN_COMMAND_ERR(x) (-(x) >= ERR_RUN_COMMAND_FORK) + +struct child_process { + const char **argv; + pid_t pid; + /* + * Using .in, .out, .err: + * - Specify 0 for no redirections (child inherits stdin, stdout, + * stderr from parent). + * - Specify -1 to have a pipe allocated as follows: + * .in: returns the writable pipe end; parent writes to it, + * the readable pipe end becomes child's stdin + * .out, .err: returns the readable pipe end; parent reads from + * it, the writable pipe end becomes child's stdout/stderr + * The caller of start_command() must close the returned FDs + * after it has completed reading from/writing to it! + * - Specify > 0 to set a channel to a particular FD as follows: + * .in: a readable FD, becomes child's stdin + * .out: a writable FD, becomes child's stdout/stderr + * .err > 0 not supported + * The specified FD is closed by start_command(), even in case + * of errors! + */ + int in; + int out; + int err; + const char *dir; + const char *const *env; + unsigned no_stdin:1; + unsigned no_stdout:1; + unsigned no_stderr:1; + unsigned perf_cmd:1; /* if this is to be perf sub-command */ + unsigned stdout_to_stderr:1; + void (*preexec_cb)(void); +}; + +int start_command(struct child_process *); +int finish_command(struct child_process *); +int run_command(struct child_process *); + +extern int run_hook(const char *index_file, const char *name, ...); + +#define RUN_COMMAND_NO_STDIN 1 +#define RUN_PERF_CMD 2 /*If this is to be perf sub-command */ +#define RUN_COMMAND_STDOUT_TO_STDERR 4 +int run_command_v_opt(const char **argv, int opt); + +/* + * env (the environment) is to be formatted like environ: "VAR=VALUE". + * To unset an environment variable use just "VAR". + */ +int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const char *const *env); + +/* + * The purpose of the following functions is to feed a pipe by running + * a function asynchronously and providing output that the caller reads. + * + * It is expected that no synchronization and mutual exclusion between + * the caller and the feed function is necessary so that the function + * can run in a thread without interfering with the caller. + */ +struct async { + /* + * proc writes to fd and closes it; + * returns 0 on success, non-zero on failure + */ + int (*proc)(int fd, void *data); + void *data; + int out; /* caller reads from here and closes it */ +#ifndef __MINGW32__ + pid_t pid; +#else + HANDLE tid; + int fd_for_proc; +#endif +}; + +int start_async(struct async *async); +int finish_async(struct async *async); + +#endif diff --git a/Documentation/perf_counter/util/strbuf.c b/Documentation/perf_counter/util/strbuf.c new file mode 100644 index 0000000..eaba093 --- /dev/null +++ b/Documentation/perf_counter/util/strbuf.c @@ -0,0 +1,359 @@ +#include "cache.h" + +int prefixcmp(const char *str, const char *prefix) +{ + for (; ; str++, prefix++) + if (!*prefix) + return 0; + else if (*str != *prefix) + return (unsigned char)*prefix - (unsigned char)*str; +} + +/* + * Used as the default ->buf value, so that people can always assume + * buf is non NULL and ->buf is NUL terminated even for a freshly + * initialized strbuf. + */ +char strbuf_slopbuf[1]; + +void strbuf_init(struct strbuf *sb, size_t hint) +{ + sb->alloc = sb->len = 0; + sb->buf = strbuf_slopbuf; + if (hint) + strbuf_grow(sb, hint); +} + +void strbuf_release(struct strbuf *sb) +{ + if (sb->alloc) { + free(sb->buf); + strbuf_init(sb, 0); + } +} + +char *strbuf_detach(struct strbuf *sb, size_t *sz) +{ + char *res = sb->alloc ? sb->buf : NULL; + if (sz) + *sz = sb->len; + strbuf_init(sb, 0); + return res; +} + +void strbuf_attach(struct strbuf *sb, void *buf, size_t len, size_t alloc) +{ + strbuf_release(sb); + sb->buf = buf; + sb->len = len; + sb->alloc = alloc; + strbuf_grow(sb, 0); + sb->buf[sb->len] = '\0'; +} + +void strbuf_grow(struct strbuf *sb, size_t extra) +{ + if (sb->len + extra + 1 <= sb->len) + die("you want to use way too much memory"); + if (!sb->alloc) + sb->buf = NULL; + ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc); +} + +void strbuf_trim(struct strbuf *sb) +{ + char *b = sb->buf; + while (sb->len > 0 && isspace((unsigned char)sb->buf[sb->len - 1])) + sb->len--; + while (sb->len > 0 && isspace(*b)) { + b++; + sb->len--; + } + memmove(sb->buf, b, sb->len); + sb->buf[sb->len] = '\0'; +} +void strbuf_rtrim(struct strbuf *sb) +{ + while (sb->len > 0 && isspace((unsigned char)sb->buf[sb->len - 1])) + sb->len--; + sb->buf[sb->len] = '\0'; +} + +void strbuf_ltrim(struct strbuf *sb) +{ + char *b = sb->buf; + while (sb->len > 0 && isspace(*b)) { + b++; + sb->len--; + } + memmove(sb->buf, b, sb->len); + sb->buf[sb->len] = '\0'; +} + +void strbuf_tolower(struct strbuf *sb) +{ + int i; + for (i = 0; i < sb->len; i++) + sb->buf[i] = tolower(sb->buf[i]); +} + +struct strbuf **strbuf_split(const struct strbuf *sb, int delim) +{ + int alloc = 2, pos = 0; + char *n, *p; + struct strbuf **ret; + struct strbuf *t; + + ret = calloc(alloc, sizeof(struct strbuf *)); + p = n = sb->buf; + while (n < sb->buf + sb->len) { + int len; + n = memchr(n, delim, sb->len - (n - sb->buf)); + if (pos + 1 >= alloc) { + alloc = alloc * 2; + ret = realloc(ret, sizeof(struct strbuf *) * alloc); + } + if (!n) + n = sb->buf + sb->len - 1; + len = n - p + 1; + t = malloc(sizeof(struct strbuf)); + strbuf_init(t, len); + strbuf_add(t, p, len); + ret[pos] = t; + ret[++pos] = NULL; + p = ++n; + } + return ret; +} + +void strbuf_list_free(struct strbuf **sbs) +{ + struct strbuf **s = sbs; + + while (*s) { + strbuf_release(*s); + free(*s++); + } + free(sbs); +} + +int strbuf_cmp(const struct strbuf *a, const struct strbuf *b) +{ + int len = a->len < b->len ? a->len: b->len; + int cmp = memcmp(a->buf, b->buf, len); + if (cmp) + return cmp; + return a->len < b->len ? -1: a->len != b->len; +} + +void strbuf_splice(struct strbuf *sb, size_t pos, size_t len, + const void *data, size_t dlen) +{ + if (pos + len < pos) + die("you want to use way too much memory"); + if (pos > sb->len) + die("`pos' is too far after the end of the buffer"); + if (pos + len > sb->len) + die("`pos + len' is too far after the end of the buffer"); + + if (dlen >= len) + strbuf_grow(sb, dlen - len); + memmove(sb->buf + pos + dlen, + sb->buf + pos + len, + sb->len - pos - len); + memcpy(sb->buf + pos, data, dlen); + strbuf_setlen(sb, sb->len + dlen - len); +} + +void strbuf_insert(struct strbuf *sb, size_t pos, const void *data, size_t len) +{ + strbuf_splice(sb, pos, 0, data, len); +} + +void strbuf_remove(struct strbuf *sb, size_t pos, size_t len) +{ + strbuf_splice(sb, pos, len, NULL, 0); +} + +void strbuf_add(struct strbuf *sb, const void *data, size_t len) +{ + strbuf_grow(sb, len); + memcpy(sb->buf + sb->len, data, len); + strbuf_setlen(sb, sb->len + len); +} + +void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len) +{ + strbuf_grow(sb, len); + memcpy(sb->buf + sb->len, sb->buf + pos, len); + strbuf_setlen(sb, sb->len + len); +} + +void strbuf_addf(struct strbuf *sb, const char *fmt, ...) +{ + int len; + va_list ap; + + if (!strbuf_avail(sb)) + strbuf_grow(sb, 64); + va_start(ap, fmt); + len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap); + va_end(ap); + if (len < 0) + die("your vsnprintf is broken"); + if (len > strbuf_avail(sb)) { + strbuf_grow(sb, len); + va_start(ap, fmt); + len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap); + va_end(ap); + if (len > strbuf_avail(sb)) { + die("this should not happen, your snprintf is broken"); + } + } + strbuf_setlen(sb, sb->len + len); +} + +void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn, + void *context) +{ + for (;;) { + const char *percent; + size_t consumed; + + percent = strchrnul(format, '%'); + strbuf_add(sb, format, percent - format); + if (!*percent) + break; + format = percent + 1; + + consumed = fn(sb, format, context); + if (consumed) + format += consumed; + else + strbuf_addch(sb, '%'); + } +} + +size_t strbuf_expand_dict_cb(struct strbuf *sb, const char *placeholder, + void *context) +{ + struct strbuf_expand_dict_entry *e = context; + size_t len; + + for (; e->placeholder && (len = strlen(e->placeholder)); e++) { + if (!strncmp(placeholder, e->placeholder, len)) { + if (e->value) + strbuf_addstr(sb, e->value); + return len; + } + } + return 0; +} + +size_t strbuf_fread(struct strbuf *sb, size_t size, FILE *f) +{ + size_t res; + size_t oldalloc = sb->alloc; + + strbuf_grow(sb, size); + res = fread(sb->buf + sb->len, 1, size, f); + if (res > 0) + strbuf_setlen(sb, sb->len + res); + else if (res < 0 && oldalloc == 0) + strbuf_release(sb); + return res; +} + +ssize_t strbuf_read(struct strbuf *sb, int fd, size_t hint) +{ + size_t oldlen = sb->len; + size_t oldalloc = sb->alloc; + + strbuf_grow(sb, hint ? hint : 8192); + for (;;) { + ssize_t cnt; + + cnt = read(fd, sb->buf + sb->len, sb->alloc - sb->len - 1); + if (cnt < 0) { + if (oldalloc == 0) + strbuf_release(sb); + else + strbuf_setlen(sb, oldlen); + return -1; + } + if (!cnt) + break; + sb->len += cnt; + strbuf_grow(sb, 8192); + } + + sb->buf[sb->len] = '\0'; + return sb->len - oldlen; +} + +#define STRBUF_MAXLINK (2*PATH_MAX) + +int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint) +{ + size_t oldalloc = sb->alloc; + + if (hint < 32) + hint = 32; + + while (hint < STRBUF_MAXLINK) { + int len; + + strbuf_grow(sb, hint); + len = readlink(path, sb->buf, hint); + if (len < 0) { + if (errno != ERANGE) + break; + } else if (len < hint) { + strbuf_setlen(sb, len); + return 0; + } + + /* .. the buffer was too small - try again */ + hint *= 2; + } + if (oldalloc == 0) + strbuf_release(sb); + return -1; +} + +int strbuf_getline(struct strbuf *sb, FILE *fp, int term) +{ + int ch; + + strbuf_grow(sb, 0); + if (feof(fp)) + return EOF; + + strbuf_reset(sb); + while ((ch = fgetc(fp)) != EOF) { + if (ch == term) + break; + strbuf_grow(sb, 1); + sb->buf[sb->len++] = ch; + } + if (ch == EOF && sb->len == 0) + return EOF; + + sb->buf[sb->len] = '\0'; + return 0; +} + +int strbuf_read_file(struct strbuf *sb, const char *path, size_t hint) +{ + int fd, len; + + fd = open(path, O_RDONLY); + if (fd < 0) + return -1; + len = strbuf_read(sb, fd, hint); + close(fd); + if (len < 0) + return -1; + + return len; +} diff --git a/Documentation/perf_counter/util/strbuf.h b/Documentation/perf_counter/util/strbuf.h new file mode 100644 index 0000000..9ee908a --- /dev/null +++ b/Documentation/perf_counter/util/strbuf.h @@ -0,0 +1,137 @@ +#ifndef STRBUF_H +#define STRBUF_H + +/* + * Strbuf's can be use in many ways: as a byte array, or to store arbitrary + * long, overflow safe strings. + * + * Strbufs has some invariants that are very important to keep in mind: + * + * 1. the ->buf member is always malloc-ed, hence strbuf's can be used to + * build complex strings/buffers whose final size isn't easily known. + * + * It is NOT legal to copy the ->buf pointer away. + * `strbuf_detach' is the operation that detachs a buffer from its shell + * while keeping the shell valid wrt its invariants. + * + * 2. the ->buf member is a byte array that has at least ->len + 1 bytes + * allocated. The extra byte is used to store a '\0', allowing the ->buf + * member to be a valid C-string. Every strbuf function ensure this + * invariant is preserved. + * + * Note that it is OK to "play" with the buffer directly if you work it + * that way: + * + * strbuf_grow(sb, SOME_SIZE); + * ... Here, the memory array starting at sb->buf, and of length + * ... strbuf_avail(sb) is all yours, and you are sure that + * ... strbuf_avail(sb) is at least SOME_SIZE. + * strbuf_setlen(sb, sb->len + SOME_OTHER_SIZE); + * + * Of course, SOME_OTHER_SIZE must be smaller or equal to strbuf_avail(sb). + * + * Doing so is safe, though if it has to be done in many places, adding the + * missing API to the strbuf module is the way to go. + * + * XXX: do _not_ assume that the area that is yours is of size ->alloc - 1 + * even if it's true in the current implementation. Alloc is somehow a + * "private" member that should not be messed with. + */ + +#include + +extern char strbuf_slopbuf[]; +struct strbuf { + size_t alloc; + size_t len; + char *buf; +}; + +#define STRBUF_INIT { 0, 0, strbuf_slopbuf } + +/*----- strbuf life cycle -----*/ +extern void strbuf_init(struct strbuf *, size_t); +extern void strbuf_release(struct strbuf *); +extern char *strbuf_detach(struct strbuf *, size_t *); +extern void strbuf_attach(struct strbuf *, void *, size_t, size_t); +static inline void strbuf_swap(struct strbuf *a, struct strbuf *b) { + struct strbuf tmp = *a; + *a = *b; + *b = tmp; +} + +/*----- strbuf size related -----*/ +static inline size_t strbuf_avail(const struct strbuf *sb) { + return sb->alloc ? sb->alloc - sb->len - 1 : 0; +} + +extern void strbuf_grow(struct strbuf *, size_t); + +static inline void strbuf_setlen(struct strbuf *sb, size_t len) { + if (!sb->alloc) + strbuf_grow(sb, 0); + assert(len < sb->alloc); + sb->len = len; + sb->buf[len] = '\0'; +} +#define strbuf_reset(sb) strbuf_setlen(sb, 0) + +/*----- content related -----*/ +extern void strbuf_trim(struct strbuf *); +extern void strbuf_rtrim(struct strbuf *); +extern void strbuf_ltrim(struct strbuf *); +extern int strbuf_cmp(const struct strbuf *, const struct strbuf *); +extern void strbuf_tolower(struct strbuf *); + +extern struct strbuf **strbuf_split(const struct strbuf *, int delim); +extern void strbuf_list_free(struct strbuf **); + +/*----- add data in your buffer -----*/ +static inline void strbuf_addch(struct strbuf *sb, int c) { + strbuf_grow(sb, 1); + sb->buf[sb->len++] = c; + sb->buf[sb->len] = '\0'; +} + +extern void strbuf_insert(struct strbuf *, size_t pos, const void *, size_t); +extern void strbuf_remove(struct strbuf *, size_t pos, size_t len); + +/* splice pos..pos+len with given data */ +extern void strbuf_splice(struct strbuf *, size_t pos, size_t len, + const void *, size_t); + +extern void strbuf_add(struct strbuf *, const void *, size_t); +static inline void strbuf_addstr(struct strbuf *sb, const char *s) { + strbuf_add(sb, s, strlen(s)); +} +static inline void strbuf_addbuf(struct strbuf *sb, const struct strbuf *sb2) { + strbuf_add(sb, sb2->buf, sb2->len); +} +extern void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len); + +typedef size_t (*expand_fn_t) (struct strbuf *sb, const char *placeholder, void *context); +extern void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn, void *context); +struct strbuf_expand_dict_entry { + const char *placeholder; + const char *value; +}; +extern size_t strbuf_expand_dict_cb(struct strbuf *sb, const char *placeholder, void *context); + +__attribute__((format(printf,2,3))) +extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...); + +extern size_t strbuf_fread(struct strbuf *, size_t, FILE *); +/* XXX: if read fails, any partial read is undone */ +extern ssize_t strbuf_read(struct strbuf *, int fd, size_t hint); +extern int strbuf_read_file(struct strbuf *sb, const char *path, size_t hint); +extern int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint); + +extern int strbuf_getline(struct strbuf *, FILE *, int); + +extern void stripspace(struct strbuf *buf, int skip_comments); +extern int launch_editor(const char *path, struct strbuf *buffer, const char *const *env); + +extern int strbuf_branchname(struct strbuf *sb, const char *name); +extern int strbuf_check_branch_ref(struct strbuf *sb, const char *name); + +#endif /* STRBUF_H */ diff --git a/Documentation/perf_counter/util/usage.c b/Documentation/perf_counter/util/usage.c new file mode 100644 index 0000000..7a10421 --- /dev/null +++ b/Documentation/perf_counter/util/usage.c @@ -0,0 +1,80 @@ +/* + * GIT - The information manager from hell + * + * Copyright (C) Linus Torvalds, 2005 + */ +#include "util.h" + +static void report(const char *prefix, const char *err, va_list params) +{ + char msg[1024]; + vsnprintf(msg, sizeof(msg), err, params); + fprintf(stderr, "%s%s\n", prefix, msg); +} + +static NORETURN void usage_builtin(const char *err) +{ + fprintf(stderr, "usage: %s\n", err); + exit(129); +} + +static NORETURN void die_builtin(const char *err, va_list params) +{ + report("fatal: ", err, params); + exit(128); +} + +static void error_builtin(const char *err, va_list params) +{ + report("error: ", err, params); +} + +static void warn_builtin(const char *warn, va_list params) +{ + report("warning: ", warn, params); +} + +/* If we are in a dlopen()ed .so write to a global variable would segfault + * (ugh), so keep things static. */ +static void (*usage_routine)(const char *err) NORETURN = usage_builtin; +static void (*die_routine)(const char *err, va_list params) NORETURN = die_builtin; +static void (*error_routine)(const char *err, va_list params) = error_builtin; +static void (*warn_routine)(const char *err, va_list params) = warn_builtin; + +void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN) +{ + die_routine = routine; +} + +void usage(const char *err) +{ + usage_routine(err); +} + +void die(const char *err, ...) +{ + va_list params; + + va_start(params, err); + die_routine(err, params); + va_end(params); +} + +int error(const char *err, ...) +{ + va_list params; + + va_start(params, err); + error_routine(err, params); + va_end(params); + return -1; +} + +void warning(const char *warn, ...) +{ + va_list params; + + va_start(params, warn); + warn_routine(warn, params); + va_end(params); +} diff --git a/Documentation/perf_counter/util/util.h b/Documentation/perf_counter/util/util.h new file mode 100644 index 0000000..36e40c3 --- /dev/null +++ b/Documentation/perf_counter/util/util.h @@ -0,0 +1,408 @@ +#ifndef GIT_COMPAT_UTIL_H +#define GIT_COMPAT_UTIL_H + +#define _FILE_OFFSET_BITS 64 + +#ifndef FLEX_ARRAY +/* + * See if our compiler is known to support flexible array members. + */ +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) +# define FLEX_ARRAY /* empty */ +#elif defined(__GNUC__) +# if (__GNUC__ >= 3) +# define FLEX_ARRAY /* empty */ +# else +# define FLEX_ARRAY 0 /* older GNU extension */ +# endif +#endif + +/* + * Otherwise, default to safer but a bit wasteful traditional style + */ +#ifndef FLEX_ARRAY +# define FLEX_ARRAY 1 +#endif +#endif + +#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) + +#ifdef __GNUC__ +#define TYPEOF(x) (__typeof__(x)) +#else +#define TYPEOF(x) +#endif + +#define MSB(x, bits) ((x) & TYPEOF(x)(~0ULL << (sizeof(x) * 8 - (bits)))) +#define HAS_MULTI_BITS(i) ((i) & ((i) - 1)) /* checks if an integer has more than 1 bit set */ + +/* Approximation of the length of the decimal representation of this type. */ +#define decimal_length(x) ((int)(sizeof(x) * 2.56 + 0.5) + 1) + +#if !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(__USLC__) && !defined(_M_UNIX) +#define _XOPEN_SOURCE 600 /* glibc2 and AIX 5.3L need 500, OpenBSD needs 600 for S_ISLNK() */ +#define _XOPEN_SOURCE_EXTENDED 1 /* AIX 5.3L needs this */ +#endif +#define _ALL_SOURCE 1 +#define _GNU_SOURCE 1 +#define _BSD_SOURCE 1 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifndef __MINGW32__ +#include +#include +#include +#include +#ifndef NO_SYS_SELECT_H +#include +#endif +#include +#include +#include +#include +#include +#include +#if defined(__CYGWIN__) +#undef _XOPEN_SOURCE +#include +#define _XOPEN_SOURCE 600 +#include "compat/cygwin.h" +#else +#undef _ALL_SOURCE /* AIX 5.3L defines a struct list with _ALL_SOURCE. */ +#include +#define _ALL_SOURCE 1 +#endif +#else /* __MINGW32__ */ +/* pull in Windows compatibility stuff */ +#include "compat/mingw.h" +#endif /* __MINGW32__ */ + +#ifndef NO_ICONV +#include +#endif + +#ifndef NO_OPENSSL +#include +#include +#endif + +/* On most systems would have given us this, but + * not on some systems (e.g. GNU/Hurd). + */ +#ifndef PATH_MAX +#define PATH_MAX 4096 +#endif + +#ifndef PRIuMAX +#define PRIuMAX "llu" +#endif + +#ifndef PRIu32 +#define PRIu32 "u" +#endif + +#ifndef PRIx32 +#define PRIx32 "x" +#endif + +#ifndef PATH_SEP +#define PATH_SEP ':' +#endif + +#ifndef STRIP_EXTENSION +#define STRIP_EXTENSION "" +#endif + +#ifndef has_dos_drive_prefix +#define has_dos_drive_prefix(path) 0 +#endif + +#ifndef is_dir_sep +#define is_dir_sep(c) ((c) == '/') +#endif + +#ifdef __GNUC__ +#define NORETURN __attribute__((__noreturn__)) +#else +#define NORETURN +#ifndef __attribute__ +#define __attribute__(x) +#endif +#endif + +/* General helper functions */ +extern void usage(const char *err) NORETURN; +extern void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, 2))); +extern int error(const char *err, ...) __attribute__((format (printf, 1, 2))); +extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2))); + +extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN); + +extern int prefixcmp(const char *str, const char *prefix); +extern time_t tm_to_time_t(const struct tm *tm); + +static inline const char *skip_prefix(const char *str, const char *prefix) +{ + size_t len = strlen(prefix); + return strncmp(str, prefix, len) ? NULL : str + len; +} + +#if defined(NO_MMAP) || defined(USE_WIN32_MMAP) + +#ifndef PROT_READ +#define PROT_READ 1 +#define PROT_WRITE 2 +#define MAP_PRIVATE 1 +#define MAP_FAILED ((void*)-1) +#endif + +#define mmap git_mmap +#define munmap git_munmap +extern void *git_mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset); +extern int git_munmap(void *start, size_t length); + +#else /* NO_MMAP || USE_WIN32_MMAP */ + +#include + +#endif /* NO_MMAP || USE_WIN32_MMAP */ + +#ifdef NO_MMAP + +/* This value must be multiple of (pagesize * 2) */ +#define DEFAULT_PACKED_GIT_WINDOW_SIZE (1 * 1024 * 1024) + +#else /* NO_MMAP */ + +/* This value must be multiple of (pagesize * 2) */ +#define DEFAULT_PACKED_GIT_WINDOW_SIZE \ + (sizeof(void*) >= 8 \ + ? 1 * 1024 * 1024 * 1024 \ + : 32 * 1024 * 1024) + +#endif /* NO_MMAP */ + +#ifdef NO_ST_BLOCKS_IN_STRUCT_STAT +#define on_disk_bytes(st) ((st).st_size) +#else +#define on_disk_bytes(st) ((st).st_blocks * 512) +#endif + +#define DEFAULT_PACKED_GIT_LIMIT \ + ((1024L * 1024L) * (sizeof(void*) >= 8 ? 8192 : 256)) + +#ifdef NO_PREAD +#define pread git_pread +extern ssize_t git_pread(int fd, void *buf, size_t count, off_t offset); +#endif +/* + * Forward decl that will remind us if its twin in cache.h changes. + * This function is used in compat/pread.c. But we can't include + * cache.h there. + */ +extern ssize_t read_in_full(int fd, void *buf, size_t count); + +#ifdef NO_SETENV +#define setenv gitsetenv +extern int gitsetenv(const char *, const char *, int); +#endif + +#ifdef NO_MKDTEMP +#define mkdtemp gitmkdtemp +extern char *gitmkdtemp(char *); +#endif + +#ifdef NO_UNSETENV +#define unsetenv gitunsetenv +extern void gitunsetenv(const char *); +#endif + +#ifdef NO_STRCASESTR +#define strcasestr gitstrcasestr +extern char *gitstrcasestr(const char *haystack, const char *needle); +#endif + +#ifdef NO_STRLCPY +#define strlcpy gitstrlcpy +extern size_t gitstrlcpy(char *, const char *, size_t); +#endif + +#ifdef NO_STRTOUMAX +#define strtoumax gitstrtoumax +extern uintmax_t gitstrtoumax(const char *, char **, int); +#endif + +#ifdef NO_HSTRERROR +#define hstrerror githstrerror +extern const char *githstrerror(int herror); +#endif + +#ifdef NO_MEMMEM +#define memmem gitmemmem +void *gitmemmem(const void *haystack, size_t haystacklen, + const void *needle, size_t needlelen); +#endif + +#ifdef FREAD_READS_DIRECTORIES +#ifdef fopen +#undef fopen +#endif +#define fopen(a,b) git_fopen(a,b) +extern FILE *git_fopen(const char*, const char*); +#endif + +#ifdef SNPRINTF_RETURNS_BOGUS +#define snprintf git_snprintf +extern int git_snprintf(char *str, size_t maxsize, + const char *format, ...); +#define vsnprintf git_vsnprintf +extern int git_vsnprintf(char *str, size_t maxsize, + const char *format, va_list ap); +#endif + +#ifdef __GLIBC_PREREQ +#if __GLIBC_PREREQ(2, 1) +#define HAVE_STRCHRNUL +#endif +#endif + +#ifndef HAVE_STRCHRNUL +#define strchrnul gitstrchrnul +static inline char *gitstrchrnul(const char *s, int c) +{ + while (*s && *s != c) + s++; + return (char *)s; +} +#endif + +/* + * Wrappers: + */ +extern char *xstrdup(const char *str); +extern void *xmalloc(size_t size); +extern void *xmemdupz(const void *data, size_t len); +extern char *xstrndup(const char *str, size_t len); +extern void *xrealloc(void *ptr, size_t size); +extern void *xcalloc(size_t nmemb, size_t size); +extern void *xmmap(void *start, size_t length, int prot, int flags, int fd, off_t offset); +extern ssize_t xread(int fd, void *buf, size_t len); +extern ssize_t xwrite(int fd, const void *buf, size_t len); +extern int xdup(int fd); +extern FILE *xfdopen(int fd, const char *mode); +static inline size_t xsize_t(off_t len) +{ + return (size_t)len; +} + +static inline int has_extension(const char *filename, const char *ext) +{ + size_t len = strlen(filename); + size_t extlen = strlen(ext); + return len > extlen && !memcmp(filename + len - extlen, ext, extlen); +} + +/* Sane ctype - no locale, and works with signed chars */ +#undef isascii +#undef isspace +#undef isdigit +#undef isalpha +#undef isalnum +#undef tolower +#undef toupper +extern unsigned char sane_ctype[256]; +#define GIT_SPACE 0x01 +#define GIT_DIGIT 0x02 +#define GIT_ALPHA 0x04 +#define GIT_GLOB_SPECIAL 0x08 +#define GIT_REGEX_SPECIAL 0x10 +#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0) +#define isascii(x) (((x) & ~0x7f) == 0) +#define isspace(x) sane_istest(x,GIT_SPACE) +#define isdigit(x) sane_istest(x,GIT_DIGIT) +#define isalpha(x) sane_istest(x,GIT_ALPHA) +#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) +#define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL) +#define is_regex_special(x) sane_istest(x,GIT_GLOB_SPECIAL | GIT_REGEX_SPECIAL) +#define tolower(x) sane_case((unsigned char)(x), 0x20) +#define toupper(x) sane_case((unsigned char)(x), 0) + +static inline int sane_case(int x, int high) +{ + if (sane_istest(x, GIT_ALPHA)) + x = (x & ~0x20) | high; + return x; +} + +static inline int strtoul_ui(char const *s, int base, unsigned int *result) +{ + unsigned long ul; + char *p; + + errno = 0; + ul = strtoul(s, &p, base); + if (errno || *p || p == s || (unsigned int) ul != ul) + return -1; + *result = ul; + return 0; +} + +static inline int strtol_i(char const *s, int base, int *result) +{ + long ul; + char *p; + + errno = 0; + ul = strtol(s, &p, base); + if (errno || *p || p == s || (int) ul != ul) + return -1; + *result = ul; + return 0; +} + +#ifdef INTERNAL_QSORT +void git_qsort(void *base, size_t nmemb, size_t size, + int(*compar)(const void *, const void *)); +#define qsort git_qsort +#endif + +#ifndef DIR_HAS_BSD_GROUP_SEMANTICS +# define FORCE_DIR_SET_GID S_ISGID +#else +# define FORCE_DIR_SET_GID 0 +#endif + +#ifdef NO_NSEC +#undef USE_NSEC +#define ST_CTIME_NSEC(st) 0 +#define ST_MTIME_NSEC(st) 0 +#else +#ifdef USE_ST_TIMESPEC +#define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctimespec.tv_nsec)) +#define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtimespec.tv_nsec)) +#else +#define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctim.tv_nsec)) +#define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtim.tv_nsec)) +#endif +#endif + +#endif diff --git a/Documentation/perf_counter/util/wrapper.c b/Documentation/perf_counter/util/wrapper.c new file mode 100644 index 0000000..6350d65 --- /dev/null +++ b/Documentation/perf_counter/util/wrapper.c @@ -0,0 +1,206 @@ +/* + * Various trivial helper wrappers around standard functions + */ +#include "cache.h" + +/* + * There's no pack memory to release - but stay close to the Git + * version so wrap this away: + */ +static inline void release_pack_memory(size_t size, int flag) +{ +} + +char *xstrdup(const char *str) +{ + char *ret = strdup(str); + if (!ret) { + release_pack_memory(strlen(str) + 1, -1); + ret = strdup(str); + if (!ret) + die("Out of memory, strdup failed"); + } + return ret; +} + +void *xmalloc(size_t size) +{ + void *ret = malloc(size); + if (!ret && !size) + ret = malloc(1); + if (!ret) { + release_pack_memory(size, -1); + ret = malloc(size); + if (!ret && !size) + ret = malloc(1); + if (!ret) + die("Out of memory, malloc failed"); + } +#ifdef XMALLOC_POISON + memset(ret, 0xA5, size); +#endif + return ret; +} + +/* + * xmemdupz() allocates (len + 1) bytes of memory, duplicates "len" bytes of + * "data" to the allocated memory, zero terminates the allocated memory, + * and returns a pointer to the allocated memory. If the allocation fails, + * the program dies. + */ +void *xmemdupz(const void *data, size_t len) +{ + char *p = xmalloc(len + 1); + memcpy(p, data, len); + p[len] = '\0'; + return p; +} + +char *xstrndup(const char *str, size_t len) +{ + char *p = memchr(str, '\0', len); + return xmemdupz(str, p ? p - str : len); +} + +void *xrealloc(void *ptr, size_t size) +{ + void *ret = realloc(ptr, size); + if (!ret && !size) + ret = realloc(ptr, 1); + if (!ret) { + release_pack_memory(size, -1); + ret = realloc(ptr, size); + if (!ret && !size) + ret = realloc(ptr, 1); + if (!ret) + die("Out of memory, realloc failed"); + } + return ret; +} + +void *xcalloc(size_t nmemb, size_t size) +{ + void *ret = calloc(nmemb, size); + if (!ret && (!nmemb || !size)) + ret = calloc(1, 1); + if (!ret) { + release_pack_memory(nmemb * size, -1); + ret = calloc(nmemb, size); + if (!ret && (!nmemb || !size)) + ret = calloc(1, 1); + if (!ret) + die("Out of memory, calloc failed"); + } + return ret; +} + +void *xmmap(void *start, size_t length, + int prot, int flags, int fd, off_t offset) +{ + void *ret = mmap(start, length, prot, flags, fd, offset); + if (ret == MAP_FAILED) { + if (!length) + return NULL; + release_pack_memory(length, fd); + ret = mmap(start, length, prot, flags, fd, offset); + if (ret == MAP_FAILED) + die("Out of memory? mmap failed: %s", strerror(errno)); + } + return ret; +} + +/* + * xread() is the same a read(), but it automatically restarts read() + * operations with a recoverable error (EAGAIN and EINTR). xread() + * DOES NOT GUARANTEE that "len" bytes is read even if the data is available. + */ +ssize_t xread(int fd, void *buf, size_t len) +{ + ssize_t nr; + while (1) { + nr = read(fd, buf, len); + if ((nr < 0) && (errno == EAGAIN || errno == EINTR)) + continue; + return nr; + } +} + +/* + * xwrite() is the same a write(), but it automatically restarts write() + * operations with a recoverable error (EAGAIN and EINTR). xwrite() DOES NOT + * GUARANTEE that "len" bytes is written even if the operation is successful. + */ +ssize_t xwrite(int fd, const void *buf, size_t len) +{ + ssize_t nr; + while (1) { + nr = write(fd, buf, len); + if ((nr < 0) && (errno == EAGAIN || errno == EINTR)) + continue; + return nr; + } +} + +ssize_t read_in_full(int fd, void *buf, size_t count) +{ + char *p = buf; + ssize_t total = 0; + + while (count > 0) { + ssize_t loaded = xread(fd, p, count); + if (loaded <= 0) + return total ? total : loaded; + count -= loaded; + p += loaded; + total += loaded; + } + + return total; +} + +ssize_t write_in_full(int fd, const void *buf, size_t count) +{ + const char *p = buf; + ssize_t total = 0; + + while (count > 0) { + ssize_t written = xwrite(fd, p, count); + if (written < 0) + return -1; + if (!written) { + errno = ENOSPC; + return -1; + } + count -= written; + p += written; + total += written; + } + + return total; +} + +int xdup(int fd) +{ + int ret = dup(fd); + if (ret < 0) + die("dup failed: %s", strerror(errno)); + return ret; +} + +FILE *xfdopen(int fd, const char *mode) +{ + FILE *stream = fdopen(fd, mode); + if (stream == NULL) + die("Out of memory? fdopen failed: %s", strerror(errno)); + return stream; +} + +int xmkstemp(char *template) +{ + int fd; + + fd = mkstemp(template); + if (fd < 0) + die("Unable to create temporary file: %s", strerror(errno)); + return fd; +} diff --git a/Documentation/perf_counter/wrapper.c b/Documentation/perf_counter/wrapper.c deleted file mode 100644 index 6350d65..0000000 --- a/Documentation/perf_counter/wrapper.c +++ /dev/null @@ -1,206 +0,0 @@ -/* - * Various trivial helper wrappers around standard functions - */ -#include "cache.h" - -/* - * There's no pack memory to release - but stay close to the Git - * version so wrap this away: - */ -static inline void release_pack_memory(size_t size, int flag) -{ -} - -char *xstrdup(const char *str) -{ - char *ret = strdup(str); - if (!ret) { - release_pack_memory(strlen(str) + 1, -1); - ret = strdup(str); - if (!ret) - die("Out of memory, strdup failed"); - } - return ret; -} - -void *xmalloc(size_t size) -{ - void *ret = malloc(size); - if (!ret && !size) - ret = malloc(1); - if (!ret) { - release_pack_memory(size, -1); - ret = malloc(size); - if (!ret && !size) - ret = malloc(1); - if (!ret) - die("Out of memory, malloc failed"); - } -#ifdef XMALLOC_POISON - memset(ret, 0xA5, size); -#endif - return ret; -} - -/* - * xmemdupz() allocates (len + 1) bytes of memory, duplicates "len" bytes of - * "data" to the allocated memory, zero terminates the allocated memory, - * and returns a pointer to the allocated memory. If the allocation fails, - * the program dies. - */ -void *xmemdupz(const void *data, size_t len) -{ - char *p = xmalloc(len + 1); - memcpy(p, data, len); - p[len] = '\0'; - return p; -} - -char *xstrndup(const char *str, size_t len) -{ - char *p = memchr(str, '\0', len); - return xmemdupz(str, p ? p - str : len); -} - -void *xrealloc(void *ptr, size_t size) -{ - void *ret = realloc(ptr, size); - if (!ret && !size) - ret = realloc(ptr, 1); - if (!ret) { - release_pack_memory(size, -1); - ret = realloc(ptr, size); - if (!ret && !size) - ret = realloc(ptr, 1); - if (!ret) - die("Out of memory, realloc failed"); - } - return ret; -} - -void *xcalloc(size_t nmemb, size_t size) -{ - void *ret = calloc(nmemb, size); - if (!ret && (!nmemb || !size)) - ret = calloc(1, 1); - if (!ret) { - release_pack_memory(nmemb * size, -1); - ret = calloc(nmemb, size); - if (!ret && (!nmemb || !size)) - ret = calloc(1, 1); - if (!ret) - die("Out of memory, calloc failed"); - } - return ret; -} - -void *xmmap(void *start, size_t length, - int prot, int flags, int fd, off_t offset) -{ - void *ret = mmap(start, length, prot, flags, fd, offset); - if (ret == MAP_FAILED) { - if (!length) - return NULL; - release_pack_memory(length, fd); - ret = mmap(start, length, prot, flags, fd, offset); - if (ret == MAP_FAILED) - die("Out of memory? mmap failed: %s", strerror(errno)); - } - return ret; -} - -/* - * xread() is the same a read(), but it automatically restarts read() - * operations with a recoverable error (EAGAIN and EINTR). xread() - * DOES NOT GUARANTEE that "len" bytes is read even if the data is available. - */ -ssize_t xread(int fd, void *buf, size_t len) -{ - ssize_t nr; - while (1) { - nr = read(fd, buf, len); - if ((nr < 0) && (errno == EAGAIN || errno == EINTR)) - continue; - return nr; - } -} - -/* - * xwrite() is the same a write(), but it automatically restarts write() - * operations with a recoverable error (EAGAIN and EINTR). xwrite() DOES NOT - * GUARANTEE that "len" bytes is written even if the operation is successful. - */ -ssize_t xwrite(int fd, const void *buf, size_t len) -{ - ssize_t nr; - while (1) { - nr = write(fd, buf, len); - if ((nr < 0) && (errno == EAGAIN || errno == EINTR)) - continue; - return nr; - } -} - -ssize_t read_in_full(int fd, void *buf, size_t count) -{ - char *p = buf; - ssize_t total = 0; - - while (count > 0) { - ssize_t loaded = xread(fd, p, count); - if (loaded <= 0) - return total ? total : loaded; - count -= loaded; - p += loaded; - total += loaded; - } - - return total; -} - -ssize_t write_in_full(int fd, const void *buf, size_t count) -{ - const char *p = buf; - ssize_t total = 0; - - while (count > 0) { - ssize_t written = xwrite(fd, p, count); - if (written < 0) - return -1; - if (!written) { - errno = ENOSPC; - return -1; - } - count -= written; - p += written; - total += written; - } - - return total; -} - -int xdup(int fd) -{ - int ret = dup(fd); - if (ret < 0) - die("dup failed: %s", strerror(errno)); - return ret; -} - -FILE *xfdopen(int fd, const char *mode) -{ - FILE *stream = fdopen(fd, mode); - if (stream == NULL) - die("Out of memory? fdopen failed: %s", strerror(errno)); - return stream; -} - -int xmkstemp(char *template) -{ - int fd; - - fd = mkstemp(template); - if (fd < 0) - die("Unable to create temporary file: %s", strerror(errno)); - return fd; -} -- cgit v0.10.2 From b2ba83ff4f4405cebc10884121ee71338a1a6c94 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 26 Apr 2009 23:38:08 -0700 Subject: x86: apic: Remove duplicated macros XAPIC_DEST_* is dupliicated to the one in apicdef.h [ Impact: cleanup ] Signed-off-by: Yinghai Lu LKML-Reference: <49F552D0.5050505@kernel.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index 9cfe1f4..344eee4 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c @@ -173,13 +173,6 @@ static inline int is_WPEG(struct rio_detail *rio){ rio->type == LookOutAWPEG || rio->type == LookOutBWPEG); } - -/* In clustered mode, the high nibble of APIC ID is a cluster number. - * The low nibble is a 4-bit bitmap. */ -#define XAPIC_DEST_CPUS_SHIFT 4 -#define XAPIC_DEST_CPUS_MASK ((1u << XAPIC_DEST_CPUS_SHIFT) - 1) -#define XAPIC_DEST_CLUSTER_MASK (XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT) - #define SUMMIT_APIC_DFR_VALUE (APIC_DFR_CLUSTER) static const struct cpumask *summit_target_cpus(void) -- cgit v0.10.2 From e0e42142bab96404de535cceb85d6533d5ad7942 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 26 Apr 2009 23:39:38 -0700 Subject: x86: Use dmi check in apic_is_clustered() on 64-bit to mark the TSC unstable We will have systems with 2 and more sockets 8cores/2thread, but we treat them as multi chassis - while they could have a stable TSC domain. Use DMI check instead. [ Impact: do not turn possibly stable TSCs off incorrectly ] Signed-off-by: Yinghai Lu Cc: Ravikiran Thirumalai LKML-Reference: <49F5532A.5000802@kernel.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 1386dbe..28f747d 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2138,31 +2138,14 @@ static void apic_pm_activate(void) { } #endif /* CONFIG_PM */ #ifdef CONFIG_X86_64 -/* - * apic_is_clustered_box() -- Check if we can expect good TSC - * - * Thus far, the major user of this is IBM's Summit2 series: - * - * Clustered boxes may have unsynced TSC problems if they are - * multi-chassis. Use available data to take a good guess. - * If in doubt, go HPET. - */ -__cpuinit int apic_is_clustered_box(void) + +static int __cpuinit apic_cluster_num(void) { int i, clusters, zeros; unsigned id; u16 *bios_cpu_apicid; DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS); - /* - * there is not this kind of box with AMD CPU yet. - * Some AMD box with quadcore cpu and 8 sockets apicid - * will be [4, 0x23] or [8, 0x27] could be thought to - * vsmp box still need checking... - */ - if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box()) - return 0; - bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); bitmap_zero(clustermap, NUM_APIC_CLUSTERS); @@ -2198,18 +2181,67 @@ __cpuinit int apic_is_clustered_box(void) ++zeros; } - /* ScaleMP vSMPowered boxes have one cluster per board and TSCs are - * not guaranteed to be synced between boards - */ - if (is_vsmp_box() && clusters > 1) + return clusters; +} + +static int __cpuinitdata multi_checked; +static int __cpuinitdata multi; + +static int __cpuinit set_multi(const struct dmi_system_id *d) +{ + if (multi) + return 0; + printk(KERN_INFO "APIC: %s detected, Multi Chassis\n", d->ident); + multi = 1; + return 0; +} + +static const __cpuinitconst struct dmi_system_id multi_dmi_table[] = { + { + .callback = set_multi, + .ident = "IBM System Summit2", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "IBM"), + DMI_MATCH(DMI_PRODUCT_NAME, "Summit2"), + }, + }, + {} +}; + +static void __cpuinit dmi_check_multi(void) +{ + if (multi_checked) + return; + + dmi_check_system(multi_dmi_table); + multi_checked = 1; +} + +/* + * apic_is_clustered_box() -- Check if we can expect good TSC + * + * Thus far, the major user of this is IBM's Summit2 series: + * Clustered boxes may have unsynced TSC problems if they are + * multi-chassis. + * Use DMI to check them + */ +__cpuinit int apic_is_clustered_box(void) +{ + dmi_check_multi(); + if (multi) return 1; + if (!is_vsmp_box()) + return 0; + /* - * If clusters > 2, then should be multi-chassis. - * May have to revisit this when multi-core + hyperthreaded CPUs come - * out, but AFAIK this will work even for them. + * ScaleMP vSMPowered boxes have one cluster per board and TSCs are + * not guaranteed to be synced between boards */ - return (clusters > 2); + if (apic_cluster_num() > 1) + return 1; + + return 0; } #endif -- cgit v0.10.2 From 47c8a08bbe77ad3c06f63919a14b0f0b0cd54390 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 27 Apr 2009 17:34:39 +0900 Subject: sh: rtc-generic support. This adds rtc-generic support for SUPERH32. Signed-off-by: Paul Mundt diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 1647489..9db02ce 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -30,6 +30,7 @@ config SUPERH32 select HAVE_DYNAMIC_FTRACE select HAVE_ARCH_KGDB select ARCH_HIBERNATION_POSSIBLE if MMU + select RTC_LIB config SUPERH64 def_bool ARCH = "sh64" diff --git a/arch/sh/include/asm/rtc.h b/arch/sh/include/asm/rtc.h index f7b010d..52b0c2d 100644 --- a/arch/sh/include/asm/rtc.h +++ b/arch/sh/include/asm/rtc.h @@ -6,6 +6,17 @@ extern void (*board_time_init)(void); extern void (*rtc_sh_get_time)(struct timespec *); extern int (*rtc_sh_set_time)(const time_t); +/* some dummy definitions */ +#define RTC_BATT_BAD 0x100 /* battery bad */ +#define RTC_SQWE 0x08 /* enable square-wave output */ +#define RTC_DM_BINARY 0x04 /* all time/date values are BCD if clear */ +#define RTC_24H 0x02 /* 24 hour mode - else hours bit 7 means pm */ +#define RTC_DST_EN 0x01 /* auto switch DST - works f. USA only */ + +struct rtc_time; +unsigned int get_rtc_time(struct rtc_time *); +int set_rtc_time(struct rtc_time *); + #define RTC_CAP_4_DIGIT_YEAR (1 << 0) struct sh_rtc_platform_info { diff --git a/arch/sh/kernel/time_32.c b/arch/sh/kernel/time_32.c index c770413..109409f 100644 --- a/arch/sh/kernel/time_32.c +++ b/arch/sh/kernel/time_32.c @@ -19,6 +19,7 @@ #include /* for rtc_lock */ #include #include +#include #include #include #include @@ -45,6 +46,28 @@ static int null_rtc_set_time(const time_t secs) void (*rtc_sh_get_time)(struct timespec *) = null_rtc_get_time; int (*rtc_sh_set_time)(const time_t) = null_rtc_set_time; +unsigned int get_rtc_time(struct rtc_time *tm) +{ + if (rtc_sh_get_time != null_rtc_get_time) { + struct timespec tv; + + rtc_sh_get_time(&tv); + rtc_time_to_tm(tv.tv_sec, tm); + } + + return RTC_24H; +} +EXPORT_SYMBOL(get_rtc_time); + +int set_rtc_time(struct rtc_time *tm) +{ + unsigned long secs; + + rtc_tm_to_time(tm, &secs); + return rtc_sh_set_time(secs); +} +EXPORT_SYMBOL(set_rtc_time); + #ifndef CONFIG_GENERIC_TIME void do_gettimeofday(struct timeval *tv) { diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 4e9851f..277d35d 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -692,7 +692,7 @@ config RTC_DRV_GENERIC tristate "Generic RTC support" # Please consider writing a new RTC driver instead of using the generic # RTC abstraction - depends on PARISC || M68K || PPC + depends on PARISC || M68K || PPC || SUPERH32 help Say Y or M here to enable RTC support on systems using the generic RTC abstraction. If you do not know what you are doing, you should -- cgit v0.10.2 From c2e0090c668fc99f5be65fd9907da781cb6a2ef5 Mon Sep 17 00:00:00 2001 From: Jean-Christophe PLAGNIOL-VILLARD Date: Mon, 27 Apr 2009 17:54:41 +0900 Subject: sh: mach-r2d: add physmap-flash support for R2D+ boards. The RTS7751R2D_1 boards only support 1MB of socket-mounted MBM29F040 flash, which we just leave alone as it's not terribly interesting. This adds support for the s29gl256p on the r2d+ boards that makes a bit more sense to expose to the user. Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD Signed-off-by: Paul Mundt diff --git a/arch/sh/boards/mach-r2d/setup.c b/arch/sh/boards/mach-r2d/setup.c index c585be0..a625ecb 100644 --- a/arch/sh/boards/mach-r2d/setup.c +++ b/arch/sh/boards/mach-r2d/setup.c @@ -10,6 +10,9 @@ */ #include #include +#include +#include +#include #include #include #include @@ -181,6 +184,50 @@ static struct platform_device sm501_device = { .resource = sm501_resources, }; +static struct mtd_partition r2d_partitions[] = { + { + .name = "U-Boot", + .offset = 0x00000000, + .size = 0x00040000, + .mask_flags = MTD_WRITEABLE, + }, { + .name = "Environment", + .offset = MTDPART_OFS_NXTBLK, + .size = 0x00040000, + .mask_flags = MTD_WRITEABLE, + }, { + .name = "Kernel", + .offset = MTDPART_OFS_NXTBLK, + .size = 0x001c0000, + }, { + .name = "Flash_FS", + .offset = MTDPART_OFS_NXTBLK, + .size = MTDPART_SIZ_FULL, + } +}; + +static struct physmap_flash_data flash_data = { + .width = 2, + .nr_parts = ARRAY_SIZE(r2d_partitions), + .parts = r2d_partitions, +}; + +static struct resource flash_resource = { + .start = 0x00000000, + .end = 0x02000000, + .flags = IORESOURCE_MEM, +}; + +static struct platform_device flash_device = { + .name = "physmap-flash", + .id = -1, + .resource = &flash_resource, + .num_resources = 1, + .dev = { + .platform_data = &flash_data, + }, +}; + static struct platform_device *rts7751r2d_devices[] __initdata = { &sm501_device, &heartbeat_device, @@ -203,6 +250,9 @@ static int __init rts7751r2d_devices_setup(void) if (register_trapped_io(&cf_trapped_io) == 0) platform_device_register(&cf_ide_device); + if (mach_is_r2d_plus()) + platform_device_register(&flash_device); + spi_register_board_info(spi_bus, ARRAY_SIZE(spi_bus)); return platform_add_devices(rts7751r2d_devices, diff --git a/arch/sh/configs/rts7751r2dplus_defconfig b/arch/sh/configs/rts7751r2dplus_defconfig index 2bc1c97..a860435 100644 --- a/arch/sh/configs/rts7751r2dplus_defconfig +++ b/arch/sh/configs/rts7751r2dplus_defconfig @@ -424,7 +424,92 @@ CONFIG_FIRMWARE_IN_KERNEL=y CONFIG_EXTRA_FIRMWARE="" # CONFIG_SYS_HYPERVISOR is not set # CONFIG_CONNECTOR is not set -# CONFIG_MTD is not set +CONFIG_MTD=y +# CONFIG_MTD_DEBUG is not set +CONFIG_MTD_CONCAT=y +CONFIG_MTD_PARTITIONS=y +# CONFIG_MTD_TESTS is not set +# CONFIG_MTD_REDBOOT_PARTS is not set +CONFIG_MTD_CMDLINE_PARTS=y +# CONFIG_MTD_AR7_PARTS is not set + +# +# User Modules And Translation Layers +# +CONFIG_MTD_CHAR=y +# CONFIG_MTD_BLKDEVS is not set +# CONFIG_MTD_BLOCK is not set +# CONFIG_MTD_BLOCK_RO is not set +# CONFIG_FTL is not set +# CONFIG_NFTL is not set +# CONFIG_INFTL is not set +# CONFIG_RFD_FTL is not set +# CONFIG_SSFDC is not set +# CONFIG_MTD_OOPS is not set + +# +# RAM/ROM/Flash chip drivers +# +CONFIG_MTD_CFI=y +# CONFIG_MTD_JEDECPROBE is not set +CONFIG_MTD_GEN_PROBE=y +# CONFIG_MTD_CFI_ADV_OPTIONS is not set +CONFIG_MTD_MAP_BANK_WIDTH_1=y +CONFIG_MTD_MAP_BANK_WIDTH_2=y +CONFIG_MTD_MAP_BANK_WIDTH_4=y +# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set +# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set +# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set +CONFIG_MTD_CFI_I1=y +CONFIG_MTD_CFI_I2=y +# CONFIG_MTD_CFI_I4 is not set +# CONFIG_MTD_CFI_I8 is not set +# CONFIG_MTD_CFI_INTELEXT is not set +CONFIG_MTD_CFI_AMDSTD=y +# CONFIG_MTD_CFI_STAA is not set +CONFIG_MTD_CFI_UTIL=y +# CONFIG_MTD_RAM is not set +# CONFIG_MTD_ROM is not set +# CONFIG_MTD_ABSENT is not set + +# +# Mapping drivers for chip access +# +# CONFIG_MTD_COMPLEX_MAPPINGS is not set +CONFIG_MTD_PHYSMAP=y +# CONFIG_MTD_PHYSMAP_COMPAT is not set +# CONFIG_MTD_INTEL_VR_NOR is not set +# CONFIG_MTD_PLATRAM is not set + +# +# Self-contained MTD device drivers +# +# CONFIG_MTD_PMC551 is not set +# CONFIG_MTD_DATAFLASH is not set +# CONFIG_MTD_M25P80 is not set +# CONFIG_MTD_SLRAM is not set +# CONFIG_MTD_PHRAM is not set +# CONFIG_MTD_MTDRAM is not set +# CONFIG_MTD_BLOCK2MTD is not set + +# +# Disk-On-Chip Device Drivers +# +# CONFIG_MTD_DOC2000 is not set +# CONFIG_MTD_DOC2001 is not set +# CONFIG_MTD_DOC2001PLUS is not set +# CONFIG_MTD_NAND is not set +# CONFIG_MTD_ONENAND is not set + +# +# LPDDR flash memory drivers +# +# CONFIG_MTD_LPDDR is not set + +# +# UBI - Unsorted block images +# +# CONFIG_MTD_UBI is not set # CONFIG_PARPORT is not set CONFIG_BLK_DEV=y # CONFIG_BLK_CPQ_CISS_DA is not set -- cgit v0.10.2 From 788c9700e7855f8a8cc8875e30d2518b57385c20 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 26 Apr 2009 14:21:59 +0100 Subject: [ARM] Kconfig: sort ARM machine class support choice list by option name Signed-off-by: Russell King diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index e02b893..a930e5c 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -253,6 +253,14 @@ config ARCH_CLPS711X help Support for Cirrus Logic 711x/721x based boards. +config ARCH_GEMINI + bool "Cortina Systems Gemini" + select CPU_FA526 + select GENERIC_GPIO + select ARCH_REQUIRE_GPIOLIB + help + Support for the Cortina Systems Gemini family SoCs + config ARCH_EBSA110 bool "EBSA-110" select CPU_SA110 @@ -276,14 +284,6 @@ config ARCH_EP93XX help This enables support for the Cirrus EP93xx series of CPUs. -config ARCH_GEMINI - bool "Cortina Systems Gemini" - select CPU_FA526 - select GENERIC_GPIO - select ARCH_REQUIRE_GPIOLIB - help - Support for the Cortina Systems Gemini family SoCs - config ARCH_FOOTBRIDGE bool "FootBridge" select CPU_SA110 @@ -292,6 +292,16 @@ config ARCH_FOOTBRIDGE Support for systems based on the DC21285 companion chip ("FootBridge"), such as the Simtec CATS and the Rebel NetWinder. +config ARCH_MXC + bool "Freescale MXC/iMX-based" + select GENERIC_TIME + select GENERIC_CLOCKEVENTS + select ARCH_MTD_XIP + select GENERIC_GPIO + select ARCH_REQUIRE_GPIOLIB + help + Support for Freescale MXC/iMX-based family of processors + config ARCH_NETX bool "Hilscher NetX based" select CPU_ARM926T @@ -404,28 +414,6 @@ config ARCH_KIRKWOOD Support for the following Marvell Kirkwood series SoCs: 88F6180, 88F6192 and 88F6281. -config ARCH_KS8695 - bool "Micrel/Kendin KS8695" - select CPU_ARM922T - select GENERIC_GPIO - select ARCH_REQUIRE_GPIOLIB - help - Support for Micrel/Kendin KS8695 "Centaur" (ARM922T) based - System-on-Chip devices. - -config ARCH_NS9XXX - bool "NetSilicon NS9xxx" - select CPU_ARM926T - select GENERIC_GPIO - select GENERIC_TIME - select GENERIC_CLOCKEVENTS - select HAVE_CLK - help - Say Y here if you intend to run this kernel on a NetSilicon NS9xxx - System. - - - config ARCH_LOKI bool "Marvell Loki (88RC8480)" select CPU_FEROCEON @@ -447,16 +435,6 @@ config ARCH_MV78XX0 Support for the following Marvell MV78xx0 series SoCs: MV781x0, MV782x0. -config ARCH_MXC - bool "Freescale MXC/iMX-based" - select GENERIC_TIME - select GENERIC_CLOCKEVENTS - select ARCH_MTD_XIP - select GENERIC_GPIO - select ARCH_REQUIRE_GPIOLIB - help - Support for Freescale MXC/iMX-based family of processors - config ARCH_ORION5X bool "Marvell Orion" depends on MMU @@ -471,6 +449,49 @@ config ARCH_ORION5X Orion-1 (5181), Orion-VoIP (5181L), Orion-NAS (5182), Orion-2 (5281), Orion-1-90 (6183). +config ARCH_MMP + bool "Marvell PXA168/910" + depends on MMU + select GENERIC_GPIO + select ARCH_REQUIRE_GPIOLIB + select HAVE_CLK + select COMMON_CLKDEV + select GENERIC_TIME + select GENERIC_CLOCKEVENTS + select TICK_ONESHOT + select PLAT_PXA + help + Support for Marvell's PXA168/910 processor line. + +config ARCH_KS8695 + bool "Micrel/Kendin KS8695" + select CPU_ARM922T + select GENERIC_GPIO + select ARCH_REQUIRE_GPIOLIB + help + Support for Micrel/Kendin KS8695 "Centaur" (ARM922T) based + System-on-Chip devices. + +config ARCH_NS9XXX + bool "NetSilicon NS9xxx" + select CPU_ARM926T + select GENERIC_GPIO + select GENERIC_TIME + select GENERIC_CLOCKEVENTS + select HAVE_CLK + help + Say Y here if you intend to run this kernel on a NetSilicon NS9xxx + System. + + + +config ARCH_W90X900 + bool "Nuvoton W90X900 CPU" + select CPU_ARM926T + help + Support for Nuvoton (Winbond logic dept.) ARM9 processor,You + can login www.mcuos.com or www.nuvoton.com to know more. + config ARCH_PNX4008 bool "Philips Nexperia PNX4008 Mobile" select CPU_ARM926T @@ -495,19 +516,16 @@ config ARCH_PXA help Support for Intel/Marvell's PXA2xx/PXA3xx processor line. -config ARCH_MMP - bool "Marvell PXA168/910" - depends on MMU - select GENERIC_GPIO - select ARCH_REQUIRE_GPIOLIB - select HAVE_CLK - select COMMON_CLKDEV +config ARCH_MSM + bool "Qualcomm MSM" + select CPU_V6 select GENERIC_TIME select GENERIC_CLOCKEVENTS - select TICK_ONESHOT - select PLAT_PXA help - Support for Marvell's PXA168/910 processor line. + Support for Qualcomm MSM7K based systems. This runs on the ARM11 + apps processor of the MSM7K and depends on a shared memory + interface to the ARM9 modem processor which runs the baseband stack + and controls some vital subsystems (clock and power control, etc). config ARCH_RPC bool "RiscPC" @@ -598,24 +616,6 @@ config ARCH_OMAP help Support for TI's OMAP platform (OMAP1 and OMAP2). -config ARCH_MSM - bool "Qualcomm MSM" - select CPU_V6 - select GENERIC_TIME - select GENERIC_CLOCKEVENTS - help - Support for Qualcomm MSM7K based systems. This runs on the ARM11 - apps processor of the MSM7K and depends on a shared memory - interface to the ARM9 modem processor which runs the baseband stack - and controls some vital subsystems (clock and power control, etc). - -config ARCH_W90X900 - bool "Nuvoton W90X900 CPU" - select CPU_ARM926T - help - Support for Nuvoton (Winbond logic dept.) ARM9 processor,You - can login www.mcuos.com or www.nuvoton.com to know more. - endchoice source "arch/arm/mach-clps711x/Kconfig" -- cgit v0.10.2 From 9bef5de1e0f8915547124082e5c27c63cfa5c2fd Mon Sep 17 00:00:00 2001 From: dmitry pervushin Date: Wed, 22 Apr 2009 23:51:15 +0100 Subject: [ARM] 5461/1: Freescale STMP platform support Header files for STMP37xx boards Signed-off-by: dmitry pervushin Signed-off-by: Russell King diff --git a/arch/arm/mach-stmp37xx/include/mach/entry-macro.S b/arch/arm/mach-stmp37xx/include/mach/entry-macro.S new file mode 100644 index 0000000..fed2787 --- /dev/null +++ b/arch/arm/mach-stmp37xx/include/mach/entry-macro.S @@ -0,0 +1,37 @@ +/* + * Low-level IRQ helper macros for Freescale STMP37XX + * + * Embedded Alley Solutions, Inc + * + * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + */ + +/* + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ + + .macro disable_fiq + .endm + + .macro get_irqnr_and_base, irqnr, irqstat, base, tmp + + mov \base, #0xf0000000 @ vm address of IRQ controller + ldr \irqnr, [\base, #0x30] @ HW_ICOLL_STAT + cmp \irqnr, #0x3f + movne \irqstat, #0 @ Ack this IRQ + strne \irqstat, [\base, #0x00]@ HW_ICOLL_VECTOR + moveqs \irqnr, #0 @ Zero flag set for no IRQ + + .endm + + .macro get_irqnr_preamble, base, tmp + .endm + + .macro arch_ret_to_user, tmp1, tmp2 + .endm diff --git a/arch/arm/mach-stmp37xx/include/mach/irqs.h b/arch/arm/mach-stmp37xx/include/mach/irqs.h new file mode 100644 index 0000000..98f1293 --- /dev/null +++ b/arch/arm/mach-stmp37xx/include/mach/irqs.h @@ -0,0 +1,99 @@ +/* + * Freescale STMP37XX interrupts + * + * Copyright (C) 2005 Sigmatel Inc + * + * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + */ + +/* + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ +#ifndef _ASM_ARCH_IRQS_H +#define _ASM_ARCH_IRQS_H + +#define IRQ_DEBUG_UART 0 +#define IRQ_COMMS_RX 1 +#define IRQ_COMMS_TX 1 +#define IRQ_SSP2_ERROR 2 +#define IRQ_VDD5V 3 +#define IRQ_HEADPHONE_SHORT 4 +#define IRQ_DAC_DMA 5 +#define IRQ_DAC_ERROR 6 +#define IRQ_ADC_DMA 7 +#define IRQ_ADC_ERROR 8 +#define IRQ_SPDIF_DMA 9 +#define IRQ_SAIF2_DMA 9 +#define IRQ_SPDIF_ERROR 10 +#define IRQ_SAIF1_IRQ 10 +#define IRQ_SAIF2_IRQ 10 +#define IRQ_USB_CTRL 11 +#define IRQ_USB_WAKEUP 12 +#define IRQ_GPMI_DMA 13 +#define IRQ_SSP1_DMA 14 +#define IRQ_SSP_ERROR 15 +#define IRQ_GPIO0 16 +#define IRQ_GPIO1 17 +#define IRQ_GPIO2 18 +#define IRQ_SAIF1_DMA 19 +#define IRQ_SSP2_DMA 20 +#define IRQ_ECC8_IRQ 21 +#define IRQ_RTC_ALARM 22 +#define IRQ_UARTAPP_TX_DMA 23 +#define IRQ_UARTAPP_INTERNAL 24 +#define IRQ_UARTAPP_RX_DMA 25 +#define IRQ_I2C_DMA 26 +#define IRQ_I2C_ERROR 27 +#define IRQ_TIMER0 28 +#define IRQ_TIMER1 29 +#define IRQ_TIMER2 30 +#define IRQ_TIMER3 31 +#define IRQ_BATT_BRNOUT 32 +#define IRQ_VDDD_BRNOUT 33 +#define IRQ_VDDIO_BRNOUT 34 +#define IRQ_VDD18_BRNOUT 35 +#define IRQ_TOUCH_DETECT 36 +#define IRQ_LRADC_CH0 37 +#define IRQ_LRADC_CH1 38 +#define IRQ_LRADC_CH2 39 +#define IRQ_LRADC_CH3 40 +#define IRQ_LRADC_CH4 41 +#define IRQ_LRADC_CH5 42 +#define IRQ_LRADC_CH6 43 +#define IRQ_LRADC_CH7 44 +#define IRQ_LCDIF_DMA 45 +#define IRQ_LCDIF_ERROR 46 +#define IRQ_DIGCTL_DEBUG_TRAP 47 +#define IRQ_RTC_1MSEC 48 +#define IRQ_DRI_DMA 49 +#define IRQ_DRI_ATTENTION 50 +#define IRQ_GPMI_ATTENTION 51 +#define IRQ_IR 52 +#define IRQ_DCP_VMI 53 +#define IRQ_DCP 54 +#define IRQ_RESERVED_55 55 +#define IRQ_RESERVED_56 56 +#define IRQ_RESERVED_57 57 +#define IRQ_RESERVED_58 58 +#define IRQ_RESERVED_59 59 +#define SW_IRQ_60 60 +#define SW_IRQ_61 61 +#define SW_IRQ_62 62 +#define SW_IRQ_63 63 + +#define NR_REAL_IRQS 64 +#define NR_IRQS (NR_REAL_IRQS + 32 * 3) + +/* TIMER and BRNOUT are FIQ capable */ +#define FIQ_START IRQ_TIMER0 + +/* Hard disk IRQ is a GPMI attention IRQ */ +#define IRQ_HARDDISK IRQ_GPMI_ATTENTION + +#endif /* _ASM_ARCH_IRQS_H */ diff --git a/arch/arm/mach-stmp37xx/include/mach/pins.h b/arch/arm/mach-stmp37xx/include/mach/pins.h new file mode 100644 index 0000000..d56de0c --- /dev/null +++ b/arch/arm/mach-stmp37xx/include/mach/pins.h @@ -0,0 +1,147 @@ +/* + * Freescale STMP37XX SoC pin multiplexing + * + * Author: Vladislav Buzov + * + * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + */ + +/* + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ +#ifndef __ASM_ARCH_PINS_H +#define __ASM_ARCH_PINS_H + +/* + * Define all STMP37XX pins, a pin name corresponds to a STMP37xx hardware + * interface this pin belongs to. + */ + +/* Bank 0 */ +#define PINID_GPMI_D00 STMP3XXX_PINID(0, 0) +#define PINID_GPMI_D01 STMP3XXX_PINID(0, 1) +#define PINID_GPMI_D02 STMP3XXX_PINID(0, 2) +#define PINID_GPMI_D03 STMP3XXX_PINID(0, 3) +#define PINID_GPMI_D04 STMP3XXX_PINID(0, 4) +#define PINID_GPMI_D05 STMP3XXX_PINID(0, 5) +#define PINID_GPMI_D06 STMP3XXX_PINID(0, 6) +#define PINID_GPMI_D07 STMP3XXX_PINID(0, 7) +#define PINID_GPMI_D08 STMP3XXX_PINID(0, 8) +#define PINID_GPMI_D09 STMP3XXX_PINID(0, 9) +#define PINID_GPMI_D10 STMP3XXX_PINID(0, 10) +#define PINID_GPMI_D11 STMP3XXX_PINID(0, 11) +#define PINID_GPMI_D12 STMP3XXX_PINID(0, 12) +#define PINID_GPMI_D13 STMP3XXX_PINID(0, 13) +#define PINID_GPMI_D14 STMP3XXX_PINID(0, 14) +#define PINID_GPMI_D15 STMP3XXX_PINID(0, 15) +#define PINID_GPMI_A0 STMP3XXX_PINID(0, 16) +#define PINID_GPMI_A1 STMP3XXX_PINID(0, 17) +#define PINID_GPMI_A2 STMP3XXX_PINID(0, 18) +#define PINID_GPMI_RDY0 STMP3XXX_PINID(0, 19) +#define PINID_GPMI_RDY2 STMP3XXX_PINID(0, 20) +#define PINID_GPMI_RDY3 STMP3XXX_PINID(0, 21) +#define PINID_GPMI_RESETN STMP3XXX_PINID(0, 22) +#define PINID_GPMI_IRQ STMP3XXX_PINID(0, 23) +#define PINID_GPMI_WRN STMP3XXX_PINID(0, 24) +#define PINID_GPMI_RDN STMP3XXX_PINID(0, 25) +#define PINID_UART2_CTS STMP3XXX_PINID(0, 26) +#define PINID_UART2_RTS STMP3XXX_PINID(0, 27) +#define PINID_UART2_RX STMP3XXX_PINID(0, 28) +#define PINID_UART2_TX STMP3XXX_PINID(0, 29) + +/* Bank 1 */ +#define PINID_LCD_D00 STMP3XXX_PINID(1, 0) +#define PINID_LCD_D01 STMP3XXX_PINID(1, 1) +#define PINID_LCD_D02 STMP3XXX_PINID(1, 2) +#define PINID_LCD_D03 STMP3XXX_PINID(1, 3) +#define PINID_LCD_D04 STMP3XXX_PINID(1, 4) +#define PINID_LCD_D05 STMP3XXX_PINID(1, 5) +#define PINID_LCD_D06 STMP3XXX_PINID(1, 6) +#define PINID_LCD_D07 STMP3XXX_PINID(1, 7) +#define PINID_LCD_D08 STMP3XXX_PINID(1, 8) +#define PINID_LCD_D09 STMP3XXX_PINID(1, 9) +#define PINID_LCD_D10 STMP3XXX_PINID(1, 10) +#define PINID_LCD_D11 STMP3XXX_PINID(1, 11) +#define PINID_LCD_D12 STMP3XXX_PINID(1, 12) +#define PINID_LCD_D13 STMP3XXX_PINID(1, 13) +#define PINID_LCD_D14 STMP3XXX_PINID(1, 14) +#define PINID_LCD_D15 STMP3XXX_PINID(1, 15) +#define PINID_LCD_RESET STMP3XXX_PINID(1, 16) +#define PINID_LCD_RS STMP3XXX_PINID(1, 17) +#define PINID_LCD_WR_RWN STMP3XXX_PINID(1, 18) +#define PINID_LCD_RD_E STMP3XXX_PINID(1, 19) +#define PINID_LCD_CS STMP3XXX_PINID(1, 20) +#define PINID_LCD_BUSY STMP3XXX_PINID(1, 21) +#define PINID_SSP1_CMD STMP3XXX_PINID(1, 22) +#define PINID_SSP1_SCK STMP3XXX_PINID(1, 23) +#define PINID_SSP1_DATA0 STMP3XXX_PINID(1, 24) +#define PINID_SSP1_DATA1 STMP3XXX_PINID(1, 25) +#define PINID_SSP1_DATA2 STMP3XXX_PINID(1, 26) +#define PINID_SSP1_DATA3 STMP3XXX_PINID(1, 27) +#define PINID_SSP1_DETECT STMP3XXX_PINID(1, 28) + +/* Bank 2 */ +#define PINID_PWM0 STMP3XXX_PINID(2, 0) +#define PINID_PWM1 STMP3XXX_PINID(2, 1) +#define PINID_PWM2 STMP3XXX_PINID(2, 2) +#define PINID_PWM3 STMP3XXX_PINID(2, 3) +#define PINID_PWM4 STMP3XXX_PINID(2, 4) +#define PINID_I2C_SCL STMP3XXX_PINID(2, 5) +#define PINID_I2C_SDA STMP3XXX_PINID(2, 6) +#define PINID_ROTTARYA STMP3XXX_PINID(2, 7) +#define PINID_ROTTARYB STMP3XXX_PINID(2, 8) +#define PINID_EMI_CKE STMP3XXX_PINID(2, 9) +#define PINID_EMI_RASN STMP3XXX_PINID(2, 10) +#define PINID_EMI_CASN STMP3XXX_PINID(2, 11) +#define PINID_EMI_CE0N STMP3XXX_PINID(2, 12) +#define PINID_EMI_CE1N STMP3XXX_PINID(2, 13) +#define PINID_EMI_CE2N STMP3XXX_PINID(2, 14) +#define PINID_EMI_CE3N STMP3XXX_PINID(2, 15) +#define PINID_EMI_A00 STMP3XXX_PINID(2, 16) +#define PINID_EMI_A01 STMP3XXX_PINID(2, 17) +#define PINID_EMI_A02 STMP3XXX_PINID(2, 18) +#define PINID_EMI_A03 STMP3XXX_PINID(2, 19) +#define PINID_EMI_A04 STMP3XXX_PINID(2, 20) +#define PINID_EMI_A05 STMP3XXX_PINID(2, 21) +#define PINID_EMI_A06 STMP3XXX_PINID(2, 22) +#define PINID_EMI_A07 STMP3XXX_PINID(2, 23) +#define PINID_EMI_A08 STMP3XXX_PINID(2, 24) +#define PINID_EMI_A09 STMP3XXX_PINID(2, 25) +#define PINID_EMI_A10 STMP3XXX_PINID(2, 26) +#define PINID_EMI_A11 STMP3XXX_PINID(2, 27) +#define PINID_EMI_A12 STMP3XXX_PINID(2, 28) +#define PINID_EMI_A13 STMP3XXX_PINID(2, 29) +#define PINID_EMI_A14 STMP3XXX_PINID(2, 30) +#define PINID_EMI_WEN STMP3XXX_PINID(2, 31) + +/* Bank 3 */ +#define PINID_EMI_D00 STMP3XXX_PINID(3, 0) +#define PINID_EMI_D01 STMP3XXX_PINID(3, 1) +#define PINID_EMI_D02 STMP3XXX_PINID(3, 2) +#define PINID_EMI_D03 STMP3XXX_PINID(3, 3) +#define PINID_EMI_D04 STMP3XXX_PINID(3, 4) +#define PINID_EMI_D05 STMP3XXX_PINID(3, 5) +#define PINID_EMI_D06 STMP3XXX_PINID(3, 6) +#define PINID_EMI_D07 STMP3XXX_PINID(3, 7) +#define PINID_EMI_D08 STMP3XXX_PINID(3, 8) +#define PINID_EMI_D09 STMP3XXX_PINID(3, 9) +#define PINID_EMI_D10 STMP3XXX_PINID(3, 10) +#define PINID_EMI_D11 STMP3XXX_PINID(3, 11) +#define PINID_EMI_D12 STMP3XXX_PINID(3, 12) +#define PINID_EMI_D13 STMP3XXX_PINID(3, 13) +#define PINID_EMI_D14 STMP3XXX_PINID(3, 14) +#define PINID_EMI_D15 STMP3XXX_PINID(3, 15) +#define PINID_EMI_DQS0 STMP3XXX_PINID(3, 16) +#define PINID_EMI_DQS1 STMP3XXX_PINID(3, 17) +#define PINID_EMI_DQM0 STMP3XXX_PINID(3, 18) +#define PINID_EMI_DQM1 STMP3XXX_PINID(3, 19) +#define PINID_EMI_CLK STMP3XXX_PINID(3, 20) +#define PINID_EMI_CLKN STMP3XXX_PINID(3, 21) + +#endif /* __ASM_ARCH_PINS_H */ -- cgit v0.10.2 From b4175b89921fefb2f352472fa6dccb0fc4fb37d9 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 26 Apr 2009 14:22:29 +0100 Subject: [ARM] sort machine- and plat- by CONFIG* name Signed-off-by: Russell King diff --git a/arch/arm/Makefile b/arch/arm/Makefile index e84729b..885a837 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -99,64 +99,69 @@ CHECKFLAGS += -D__arm__ #Default value head-y := arch/arm/kernel/head$(MMUEXT).o arch/arm/kernel/init_task.o textofs-y := 0x00008000 - - machine-$(CONFIG_ARCH_RPC) := rpc - machine-$(CONFIG_ARCH_EBSA110) := ebsa110 - machine-$(CONFIG_FOOTBRIDGE) := footbridge - machine-$(CONFIG_ARCH_SHARK) := shark - machine-$(CONFIG_ARCH_SA1100) := sa1100 -ifeq ($(CONFIG_ARCH_SA1100),y) +textofs-$(CONFIG_ARCH_CLPS711X) := 0x00028000 # SA1111 DMA bug: we don't want the kernel to live in precious DMA-able memory - textofs-$(CONFIG_SA1111) := 0x00208000 +ifeq ($(CONFIG_ARCH_SA1100),y) +textofs-$(CONFIG_SA1111) := 0x00208000 endif - machine-$(CONFIG_ARCH_PXA) := pxa - machine-$(CONFIG_ARCH_MMP) := mmp - plat-$(CONFIG_PLAT_PXA) := pxa - machine-$(CONFIG_ARCH_L7200) := l7200 - machine-$(CONFIG_ARCH_INTEGRATOR) := integrator - machine-$(CONFIG_ARCH_GEMINI) := gemini - textofs-$(CONFIG_ARCH_CLPS711X) := 0x00028000 - machine-$(CONFIG_ARCH_CLPS711X) := clps711x - machine-$(CONFIG_ARCH_IOP32X) := iop32x - machine-$(CONFIG_ARCH_IOP33X) := iop33x - machine-$(CONFIG_ARCH_IOP13XX) := iop13xx - plat-$(CONFIG_PLAT_IOP) := iop - machine-$(CONFIG_ARCH_IXP4XX) := ixp4xx - machine-$(CONFIG_ARCH_IXP2000) := ixp2000 - machine-$(CONFIG_ARCH_IXP23XX) := ixp23xx - machine-$(CONFIG_ARCH_OMAP1) := omap1 - machine-$(CONFIG_ARCH_OMAP2) := omap2 - machine-$(CONFIG_ARCH_OMAP3) := omap2 - plat-$(CONFIG_ARCH_OMAP) := omap - machine-$(CONFIG_ARCH_S3C2410) := s3c2410 s3c2400 s3c2412 s3c2440 s3c2442 s3c2443 - machine-$(CONFIG_ARCH_S3C24A0) := s3c24a0 - plat-$(CONFIG_PLAT_S3C24XX) := s3c24xx s3c - machine-$(CONFIG_ARCH_S3C64XX) := s3c6400 s3c6410 - plat-$(CONFIG_PLAT_S3C64XX) := s3c64xx s3c - machine-$(CONFIG_ARCH_LH7A40X) := lh7a40x - machine-$(CONFIG_ARCH_VERSATILE) := versatile - machine-$(CONFIG_ARCH_IMX) := imx - machine-$(CONFIG_ARCH_H720X) := h720x - machine-$(CONFIG_ARCH_AAEC2000) := aaec2000 - machine-$(CONFIG_ARCH_REALVIEW) := realview - machine-$(CONFIG_ARCH_AT91) := at91 - machine-$(CONFIG_ARCH_EP93XX) := ep93xx - machine-$(CONFIG_ARCH_PNX4008) := pnx4008 - machine-$(CONFIG_ARCH_NETX) := netx - machine-$(CONFIG_ARCH_NS9XXX) := ns9xxx - machine-$(CONFIG_ARCH_DAVINCI) := davinci - machine-$(CONFIG_ARCH_KIRKWOOD) := kirkwood - machine-$(CONFIG_ARCH_KS8695) := ks8695 - plat-$(CONFIG_ARCH_MXC) := mxc - machine-$(CONFIG_ARCH_MX2) := mx2 - machine-$(CONFIG_ARCH_MX3) := mx3 - machine-$(CONFIG_ARCH_MX1) := mx1 - machine-$(CONFIG_ARCH_ORION5X) := orion5x - plat-$(CONFIG_PLAT_ORION) := orion - machine-$(CONFIG_ARCH_MSM) := msm - machine-$(CONFIG_ARCH_LOKI) := loki - machine-$(CONFIG_ARCH_MV78XX0) := mv78xx0 - machine-$(CONFIG_ARCH_W90X900) := w90x900 + +# Machine directory name. This list is sorted alphanumerically +# by CONFIG_* macro name. +machine-$(CONFIG_ARCH_AAEC2000) := aaec2000 +machine-$(CONFIG_ARCH_AT91) := at91 +machine-$(CONFIG_ARCH_CLPS711X) := clps711x +machine-$(CONFIG_ARCH_DAVINCI) := davinci +machine-$(CONFIG_ARCH_EBSA110) := ebsa110 +machine-$(CONFIG_ARCH_EP93XX) := ep93xx +machine-$(CONFIG_ARCH_GEMINI) := gemini +machine-$(CONFIG_ARCH_H720X) := h720x +machine-$(CONFIG_ARCH_IMX) := imx +machine-$(CONFIG_ARCH_INTEGRATOR) := integrator +machine-$(CONFIG_ARCH_IOP13XX) := iop13xx +machine-$(CONFIG_ARCH_IOP32X) := iop32x +machine-$(CONFIG_ARCH_IOP33X) := iop33x +machine-$(CONFIG_ARCH_IXP2000) := ixp2000 +machine-$(CONFIG_ARCH_IXP23XX) := ixp23xx +machine-$(CONFIG_ARCH_IXP4XX) := ixp4xx +machine-$(CONFIG_ARCH_KIRKWOOD) := kirkwood +machine-$(CONFIG_ARCH_KS8695) := ks8695 +machine-$(CONFIG_ARCH_L7200) := l7200 +machine-$(CONFIG_ARCH_LH7A40X) := lh7a40x +machine-$(CONFIG_ARCH_LOKI) := loki +machine-$(CONFIG_ARCH_MMP) := mmp +machine-$(CONFIG_ARCH_MSM) := msm +machine-$(CONFIG_ARCH_MV78XX0) := mv78xx0 +machine-$(CONFIG_ARCH_MX1) := mx1 +machine-$(CONFIG_ARCH_MX2) := mx2 +machine-$(CONFIG_ARCH_MX3) := mx3 +machine-$(CONFIG_ARCH_NETX) := netx +machine-$(CONFIG_ARCH_NS9XXX) := ns9xxx +machine-$(CONFIG_ARCH_OMAP1) := omap1 +machine-$(CONFIG_ARCH_OMAP2) := omap2 +machine-$(CONFIG_ARCH_OMAP3) := omap2 +machine-$(CONFIG_ARCH_ORION5X) := orion5x +machine-$(CONFIG_ARCH_PNX4008) := pnx4008 +machine-$(CONFIG_ARCH_PXA) := pxa +machine-$(CONFIG_ARCH_REALVIEW) := realview +machine-$(CONFIG_ARCH_RPC) := rpc +machine-$(CONFIG_ARCH_S3C2410) := s3c2410 s3c2400 s3c2412 s3c2440 s3c2442 s3c2443 +machine-$(CONFIG_ARCH_S3C24A0) := s3c24a0 +machine-$(CONFIG_ARCH_S3C64XX) := s3c6400 s3c6410 +machine-$(CONFIG_ARCH_SA1100) := sa1100 +machine-$(CONFIG_ARCH_SHARK) := shark +machine-$(CONFIG_ARCH_VERSATILE) := versatile +machine-$(CONFIG_ARCH_W90X900) := w90x900 +machine-$(CONFIG_FOOTBRIDGE) := footbridge + +# Platform directory name. This list is sorted alphanumerically +# by CONFIG_* macro name. +plat-$(CONFIG_ARCH_MXC) := mxc +plat-$(CONFIG_ARCH_OMAP) := omap +plat-$(CONFIG_PLAT_IOP) := iop +plat-$(CONFIG_PLAT_ORION) := orion +plat-$(CONFIG_PLAT_PXA) := pxa +plat-$(CONFIG_PLAT_S3C24XX) := s3c24xx s3c +plat-$(CONFIG_PLAT_S3C64XX) := s3c64xx s3c ifeq ($(CONFIG_ARCH_EBSA110),y) # This is what happens if you forget the IOCS16 line. -- cgit v0.10.2 From 1e3dd535d641a856e913dd8a17a75bd3c36c49e0 Mon Sep 17 00:00:00 2001 From: dmitry pervushin Date: Wed, 22 Apr 2009 23:52:45 +0100 Subject: [ARM] 5469/1: Freescale STMP platform support [2/10] Headers for STMP378x boards Signed-off-by: dmitry pervushin Signed-off-by: Russell King diff --git a/arch/arm/mach-stmp378x/include/mach/entry-macro.S b/arch/arm/mach-stmp378x/include/mach/entry-macro.S new file mode 100644 index 0000000..731a922 --- /dev/null +++ b/arch/arm/mach-stmp378x/include/mach/entry-macro.S @@ -0,0 +1,35 @@ +/* + * Low-level IRQ helper macros for Freescale STMP378X + * + * Embedded Alley Solutions, Inc + * + * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + */ + +/* + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ + + .macro disable_fiq + .endm + + .macro get_irqnr_and_base, irqnr, irqstat, base, tmp + + mov \base, #0xf0000000 @ vm address of IRQ controller + ldr \irqnr, [\base, #0x70] @ HW_ICOLL_STAT + cmp \irqnr, #0x7f + moveqs \irqnr, #0 @ Zero flag set for no IRQ + + .endm + + .macro get_irqnr_preamble, base, tmp + .endm + + .macro arch_ret_to_user, tmp1, tmp2 + .endm diff --git a/arch/arm/mach-stmp378x/include/mach/irqs.h b/arch/arm/mach-stmp378x/include/mach/irqs.h new file mode 100644 index 0000000..cc59673 --- /dev/null +++ b/arch/arm/mach-stmp378x/include/mach/irqs.h @@ -0,0 +1,95 @@ +/* + * Freescale STMP378X interrupts + * + * Copyright (C) 2005 Sigmatel Inc + * + * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + */ + +/* + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ + +#define IRQ_DEBUG_UART 0 +#define IRQ_COMMS_RX 1 +#define IRQ_COMMS_TX 1 +#define IRQ_SSP2_ERROR 2 +#define IRQ_VDD5V 3 +#define IRQ_HEADPHONE_SHORT 4 +#define IRQ_DAC_DMA 5 +#define IRQ_DAC_ERROR 6 +#define IRQ_ADC_DMA 7 +#define IRQ_ADC_ERROR 8 +#define IRQ_SPDIF_DMA 9 +#define IRQ_SAIF2_DMA 9 +#define IRQ_SPDIF_ERROR 10 +#define IRQ_SAIF1_IRQ 10 +#define IRQ_SAIF2_IRQ 10 +#define IRQ_USB_CTRL 11 +#define IRQ_USB_WAKEUP 12 +#define IRQ_GPMI_DMA 13 +#define IRQ_SSP1_DMA 14 +#define IRQ_SSP_ERROR 15 +#define IRQ_GPIO0 16 +#define IRQ_GPIO1 17 +#define IRQ_GPIO2 18 +#define IRQ_SAIF1_DMA 19 +#define IRQ_SSP2_DMA 20 +#define IRQ_ECC8_IRQ 21 +#define IRQ_RTC_ALARM 22 +#define IRQ_UARTAPP_TX_DMA 23 +#define IRQ_UARTAPP_INTERNAL 24 +#define IRQ_UARTAPP_RX_DMA 25 +#define IRQ_I2C_DMA 26 +#define IRQ_I2C_ERROR 27 +#define IRQ_TIMER0 28 +#define IRQ_TIMER1 29 +#define IRQ_TIMER2 30 +#define IRQ_TIMER3 31 +#define IRQ_BATT_BRNOUT 32 +#define IRQ_VDDD_BRNOUT 33 +#define IRQ_VDDIO_BRNOUT 34 +#define IRQ_VDD18_BRNOUT 35 +#define IRQ_TOUCH_DETECT 36 +#define IRQ_LRADC_CH0 37 +#define IRQ_LRADC_CH1 38 +#define IRQ_LRADC_CH2 39 +#define IRQ_LRADC_CH3 40 +#define IRQ_LRADC_CH4 41 +#define IRQ_LRADC_CH5 42 +#define IRQ_LRADC_CH6 43 +#define IRQ_LRADC_CH7 44 +#define IRQ_LCDIF_DMA 45 +#define IRQ_LCDIF_ERROR 46 +#define IRQ_DIGCTL_DEBUG_TRAP 47 +#define IRQ_RTC_1MSEC 48 +#define IRQ_DRI_DMA 49 +#define IRQ_DRI_ATTENTION 50 +#define IRQ_GPMI_ATTENTION 51 +#define IRQ_IR 52 +#define IRQ_DCP_VMI 53 +#define IRQ_DCP 54 +#define IRQ_BCH 56 +#define IRQ_PXP 57 +#define IRQ_UARTAPP2_TX_DMA 58 +#define IRQ_UARTAPP2_INTERNAL 59 +#define IRQ_UARTAPP2_RX_DMA 60 +#define IRQ_VDAC_DETECT 61 +#define IRQ_VDD5V_DROOP 64 +#define IRQ_DCDC4P2_BO 65 + + +#define NR_REAL_IRQS 128 +#define NR_IRQS (NR_REAL_IRQS + 32 * 3) + +/* All interrupts are FIQ capable */ +#define FIQ_START IRQ_DEBUG_UART + +/* Hard disk IRQ is a GPMI attention IRQ */ +#define IRQ_HARDDISK IRQ_GPMI_ATTENTION diff --git a/arch/arm/mach-stmp378x/include/mach/pins.h b/arch/arm/mach-stmp378x/include/mach/pins.h new file mode 100644 index 0000000..93f952d --- /dev/null +++ b/arch/arm/mach-stmp378x/include/mach/pins.h @@ -0,0 +1,151 @@ +/* + * Freescale STMP378X SoC pin multiplexing + * + * Author: Vladislav Buzov + * + * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + */ + +/* + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ +#ifndef __ASM_ARCH_PINS_H +#define __ASM_ARCH_PINS_H + +/* + * Define all STMP378x pins, a pin name corresponds to a STMP378x hardware + * interface this pin belongs to. + */ + +/* Bank 0 */ +#define PINID_GPMI_D00 STMP3XXX_PINID(0, 0) +#define PINID_GPMI_D01 STMP3XXX_PINID(0, 1) +#define PINID_GPMI_D02 STMP3XXX_PINID(0, 2) +#define PINID_GPMI_D03 STMP3XXX_PINID(0, 3) +#define PINID_GPMI_D04 STMP3XXX_PINID(0, 4) +#define PINID_GPMI_D05 STMP3XXX_PINID(0, 5) +#define PINID_GPMI_D06 STMP3XXX_PINID(0, 6) +#define PINID_GPMI_D07 STMP3XXX_PINID(0, 7) +#define PINID_GPMI_D08 STMP3XXX_PINID(0, 8) +#define PINID_GPMI_D09 STMP3XXX_PINID(0, 9) +#define PINID_GPMI_D10 STMP3XXX_PINID(0, 10) +#define PINID_GPMI_D11 STMP3XXX_PINID(0, 11) +#define PINID_GPMI_D12 STMP3XXX_PINID(0, 12) +#define PINID_GPMI_D13 STMP3XXX_PINID(0, 13) +#define PINID_GPMI_D14 STMP3XXX_PINID(0, 14) +#define PINID_GPMI_D15 STMP3XXX_PINID(0, 15) +#define PINID_GPMI_CLE STMP3XXX_PINID(0, 16) +#define PINID_GPMI_ALE STMP3XXX_PINID(0, 17) +#define PINID_GMPI_CE2N STMP3XXX_PINID(0, 18) +#define PINID_GPMI_RDY0 STMP3XXX_PINID(0, 19) +#define PINID_GPMI_RDY1 STMP3XXX_PINID(0, 20) +#define PINID_GPMI_RDY2 STMP3XXX_PINID(0, 21) +#define PINID_GPMI_RDY3 STMP3XXX_PINID(0, 22) +#define PINID_GPMI_WPN STMP3XXX_PINID(0, 23) +#define PINID_GPMI_WRN STMP3XXX_PINID(0, 24) +#define PINID_GPMI_RDN STMP3XXX_PINID(0, 25) +#define PINID_AUART1_CTS STMP3XXX_PINID(0, 26) +#define PINID_AUART1_RTS STMP3XXX_PINID(0, 27) +#define PINID_AUART1_RX STMP3XXX_PINID(0, 28) +#define PINID_AUART1_TX STMP3XXX_PINID(0, 29) +#define PINID_I2C_SCL STMP3XXX_PINID(0, 30) +#define PINID_I2C_SDA STMP3XXX_PINID(0, 31) + +/* Bank 1 */ +#define PINID_LCD_D00 STMP3XXX_PINID(1, 0) +#define PINID_LCD_D01 STMP3XXX_PINID(1, 1) +#define PINID_LCD_D02 STMP3XXX_PINID(1, 2) +#define PINID_LCD_D03 STMP3XXX_PINID(1, 3) +#define PINID_LCD_D04 STMP3XXX_PINID(1, 4) +#define PINID_LCD_D05 STMP3XXX_PINID(1, 5) +#define PINID_LCD_D06 STMP3XXX_PINID(1, 6) +#define PINID_LCD_D07 STMP3XXX_PINID(1, 7) +#define PINID_LCD_D08 STMP3XXX_PINID(1, 8) +#define PINID_LCD_D09 STMP3XXX_PINID(1, 9) +#define PINID_LCD_D10 STMP3XXX_PINID(1, 10) +#define PINID_LCD_D11 STMP3XXX_PINID(1, 11) +#define PINID_LCD_D12 STMP3XXX_PINID(1, 12) +#define PINID_LCD_D13 STMP3XXX_PINID(1, 13) +#define PINID_LCD_D14 STMP3XXX_PINID(1, 14) +#define PINID_LCD_D15 STMP3XXX_PINID(1, 15) +#define PINID_LCD_D16 STMP3XXX_PINID(1, 16) +#define PINID_LCD_D17 STMP3XXX_PINID(1, 17) +#define PINID_LCD_RESET STMP3XXX_PINID(1, 18) +#define PINID_LCD_RS STMP3XXX_PINID(1, 19) +#define PINID_LCD_WR STMP3XXX_PINID(1, 20) +#define PINID_LCD_CS STMP3XXX_PINID(1, 21) +#define PINID_LCD_DOTCK STMP3XXX_PINID(1, 22) +#define PINID_LCD_ENABLE STMP3XXX_PINID(1, 23) +#define PINID_LCD_HSYNC STMP3XXX_PINID(1, 24) +#define PINID_LCD_VSYNC STMP3XXX_PINID(1, 25) +#define PINID_PWM0 STMP3XXX_PINID(1, 26) +#define PINID_PWM1 STMP3XXX_PINID(1, 27) +#define PINID_PWM2 STMP3XXX_PINID(1, 28) +#define PINID_PWM3 STMP3XXX_PINID(1, 29) +#define PINID_PWM4 STMP3XXX_PINID(1, 30) + +/* Bank 2 */ +#define PINID_SSP1_CMD STMP3XXX_PINID(2, 0) +#define PINID_SSP1_DETECT STMP3XXX_PINID(2, 1) +#define PINID_SSP1_DATA0 STMP3XXX_PINID(2, 2) +#define PINID_SSP1_DATA1 STMP3XXX_PINID(2, 3) +#define PINID_SSP1_DATA2 STMP3XXX_PINID(2, 4) +#define PINID_SSP1_DATA3 STMP3XXX_PINID(2, 5) +#define PINID_SSP1_SCK STMP3XXX_PINID(2, 6) +#define PINID_ROTARYA STMP3XXX_PINID(2, 7) +#define PINID_ROTARYB STMP3XXX_PINID(2, 8) +#define PINID_EMI_A00 STMP3XXX_PINID(2, 9) +#define PINID_EMI_A01 STMP3XXX_PINID(2, 10) +#define PINID_EMI_A02 STMP3XXX_PINID(2, 11) +#define PINID_EMI_A03 STMP3XXX_PINID(2, 12) +#define PINID_EMI_A04 STMP3XXX_PINID(2, 13) +#define PINID_EMI_A05 STMP3XXX_PINID(2, 14) +#define PINID_EMI_A06 STMP3XXX_PINID(2, 15) +#define PINID_EMI_A07 STMP3XXX_PINID(2, 16) +#define PINID_EMI_A08 STMP3XXX_PINID(2, 17) +#define PINID_EMI_A09 STMP3XXX_PINID(2, 18) +#define PINID_EMI_A10 STMP3XXX_PINID(2, 19) +#define PINID_EMI_A11 STMP3XXX_PINID(2, 20) +#define PINID_EMI_A12 STMP3XXX_PINID(2, 21) +#define PINID_EMI_BA0 STMP3XXX_PINID(2, 22) +#define PINID_EMI_BA1 STMP3XXX_PINID(2, 23) +#define PINID_EMI_CASN STMP3XXX_PINID(2, 24) +#define PINID_EMI_CE0N STMP3XXX_PINID(2, 25) +#define PINID_EMI_CE1N STMP3XXX_PINID(2, 26) +#define PINID_GPMI_CE1N STMP3XXX_PINID(2, 27) +#define PINID_GPMI_CE0N STMP3XXX_PINID(2, 28) +#define PINID_EMI_CKE STMP3XXX_PINID(2, 29) +#define PINID_EMI_RASN STMP3XXX_PINID(2, 30) +#define PINID_EMI_WEN STMP3XXX_PINID(2, 31) + +/* Bank 3 */ +#define PINID_EMI_D00 STMP3XXX_PINID(3, 0) +#define PINID_EMI_D01 STMP3XXX_PINID(3, 1) +#define PINID_EMI_D02 STMP3XXX_PINID(3, 2) +#define PINID_EMI_D03 STMP3XXX_PINID(3, 3) +#define PINID_EMI_D04 STMP3XXX_PINID(3, 4) +#define PINID_EMI_D05 STMP3XXX_PINID(3, 5) +#define PINID_EMI_D06 STMP3XXX_PINID(3, 6) +#define PINID_EMI_D07 STMP3XXX_PINID(3, 7) +#define PINID_EMI_D08 STMP3XXX_PINID(3, 8) +#define PINID_EMI_D09 STMP3XXX_PINID(3, 9) +#define PINID_EMI_D10 STMP3XXX_PINID(3, 10) +#define PINID_EMI_D11 STMP3XXX_PINID(3, 11) +#define PINID_EMI_D12 STMP3XXX_PINID(3, 12) +#define PINID_EMI_D13 STMP3XXX_PINID(3, 13) +#define PINID_EMI_D14 STMP3XXX_PINID(3, 14) +#define PINID_EMI_D15 STMP3XXX_PINID(3, 15) +#define PINID_EMI_DQM0 STMP3XXX_PINID(3, 16) +#define PINID_EMI_DQM1 STMP3XXX_PINID(3, 17) +#define PINID_EMI_DQS0 STMP3XXX_PINID(3, 18) +#define PINID_EMI_DQS1 STMP3XXX_PINID(3, 19) +#define PINID_EMI_CLK STMP3XXX_PINID(3, 20) +#define PINID_EMI_CLKN STMP3XXX_PINID(3, 21) + +#endif /* __ASM_ARCH_PINS_H */ -- cgit v0.10.2 From 34acb09025a132943555d0f0ffca6cb05c698cd4 Mon Sep 17 00:00:00 2001 From: dmitry pervushin Date: Wed, 22 Apr 2009 23:54:05 +0100 Subject: [ARM] 5468/1: Freescale STMP platform support [3/10] Minimal definition of register set for 37xx boards Signed-off-by: dmitry pervushin Signed-off-by: Russell King diff --git a/arch/arm/mach-stmp37xx/include/mach/regs-apbh.h b/arch/arm/mach-stmp37xx/include/mach/regs-apbh.h new file mode 100644 index 0000000..3044c20 --- /dev/null +++ b/arch/arm/mach-stmp37xx/include/mach/regs-apbh.h @@ -0,0 +1,102 @@ +/* + * STMP APBH Register Definitions + * + * Copyright (c) 2008 Freescale Semiconductor + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef _INCLUDE_ASM_ARCH_REGS_APBH_H +#define _INCLUDE_ASM_ARCH_REGS_APBH_H + +#include + +#ifndef REGS_APBH_BASE +#define REGS_APBH_BASE (REGS_BASE + 0x00004000) +#endif + +HW_REGISTER(HW_APBH_CTRL0, REGS_APBH_BASE, 0x00) +#define BP_APBH_CTRL0_SFTRST 31 +#define BM_APBH_CTRL0_SFTRST 0x80000000 +#define BP_APBH_CTRL0_CLKGATE 30 +#define BM_APBH_CTRL0_CLKGATE 0x40000000 +#define BP_APBH_CTRL0_RESET_CHANNEL 16 +#define BM_APBH_CTRL0_RESET_CHANNEL 0x00FF0000 +#define BF_APBH_CTRL0_RESET_CHANNEL(v) \ + (((v) << BP_APBH_CTRL0_RESET_CHANNEL) & BM_APBH_CTRL0_RESET_CHANNEL) +HW_REGISTER(HW_APBH_CTRL1, REGS_APBH_BASE, 0x10) +#define BP_APBH_CTRL1_CH1_CMDCMPLT_IRQ_EN 9 +#define BM_APBH_CTRL1_CH1_CMDCMPLT_IRQ_EN 0x00000200 +#define BP_APBH_CTRL1_CH0_CMDCMPLT_IRQ_EN 8 +#define BM_APBH_CTRL1_CH0_CMDCMPLT_IRQ_EN 0x00000100 +#define BP_APBH_CTRL1_CH7_CMDCMPLT_IRQ 7 +#define BM_APBH_CTRL1_CH7_CMDCMPLT_IRQ 0x00000080 +#define BP_APBH_CTRL1_CH1_CMDCMPLT_IRQ 1 +#define BM_APBH_CTRL1_CH1_CMDCMPLT_IRQ 0x00000002 +#define BP_APBH_CTRL1_CH0_CMDCMPLT_IRQ 0 +#define BM_APBH_CTRL1_CH0_CMDCMPLT_IRQ 0x00000001 +#define BP_APBH_CTRL1_CH1_ERR_IRQ 17 +#define BM_APBH_CTRL1_CH1_ERR_IRQ 0x00020000 +HW_REGISTER_0(HW_APBH_DEVSEL, REGS_APBH_BASE, 0x20) +HW_REGISTER_RO_INDEXED(HW_APBH_CHn_CURCMDAR, REGS_APBH_BASE, 0x40, 0x70) +HW_REGISTER_0_INDEXED(HW_APBH_CHn_NXTCMDAR, REGS_APBH_BASE, 0x50, 0x70) +#define BP_APBH_CHn_NXTCMDAR_CMD_ADDR 0 +#define BM_APBH_CHn_NXTCMDAR_CMD_ADDR 0xFFFFFFFF +#define BF_APBH_CHn_NXTCMDAR_CMD_ADDR(v) ((u32) v) +HW_REGISTER_RO_INDEXED(HW_APBH_CHn_CMD, REGS_APBH_BASE, 0x60, 0x70) +#define BM_APBH_CHn_CMD_XFER_COUNT 0xFFFF0000 +#define BP_APBH_CHn_CMD_XFER_COUNT 16 +#define BF_APBH_CHn_CMD_XFER_COUNT(v) \ + (((v) << BP_APBH_CHn_CMD_XFER_COUNT) & BM_APBH_CHn_CMD_XFER_COUNT) +#define BM_APBH_CHn_CMD_CMDWORDS 0x0000F000 +#define BP_APBH_CHn_CMD_CMDWORDS 12 +#define BF_APBH_CHn_CMD_CMDWORDS(v) \ + (((v) << BP_APBH_CHn_CMD_CMDWORDS) & BM_APBH_CHn_CMD_CMDWORDS) +#define BM_APBH_CHn_CMD_WAIT4ENDCMD 0x00000080 +#define BM_APBH_CHn_CMD_SEMAPHORE 0x00000040 +#define BP_APBH_CHn_CMD_SEMAPHORE 6 +#define BF_APBH_CHn_CMD_SEMAPHORE(v) \ + (((v) << BP_APBH_CHn_CMD_SEMAPHORE) & BM_APBH_CHn_CMD_SEMAPHORE) +#define BM_APBH_CHn_CMD_NANDWAIT4READY 0x00000020 +#define BP_APBH_CHn_CMD_NANDLOCK 4 +#define BM_APBH_CHn_CMD_NANDLOCK 0x00000010 +#define BF_APBH_CHn_CMD_NANDLOCK(v) \ + (((v) << BP_APBH_CHn_CMD_NANDLOCK) & BM_APBH_CHn_CMD_NANDLOCK) +#define BM_APBH_CHn_CMD_IRQONCMPLT 0x00000008 +#define BM_APBH_CHn_CMD_CHAIN 0x00000004 +#define BM_APBH_CHn_CMD_DMA_READ 0x00000003 +#define BP_APBH_CHn_CMD_DMA_READ 0 +#define BF_APBH_CHn_CMD_DMA_READ(v) \ + (((v) << BP_APBH_CHn_CMD_DMA_READ) & BM_APBH_CHn_CMD_DMA_READ) +#define BF_APBH_CHn_CMD_COMMAND(v) \ + (((v) << BP_APBH_CHn_CMD_DMA_READ) & BM_APBH_CHn_CMD_DMA_READ) +#define BV_APBH_CHn_CMD_COMMAND__NO_DMA_XFER 0x0 +#define BV_APBH_CHn_CMD_COMMAND__DMA_WRITE 0x1 +#define BV_APBH_CHn_CMD_COMMAND__DMA_READ 0x2 +#define BV_APBH_CHn_CMD_COMMAND__DMA_SENSE 0x3 +HW_REGISTER_INDEXED(HW_APBH_CHn_BAR, REGS_APBH_BASE, 0x70, 0x70) +HW_REGISTER_0_INDEXED(HW_APBH_CHn_SEMA, REGS_APBH_BASE, 0x80, 0x70) +#define BP_APBH_CHn_SEMA_INCREMENT_SEMA 0 +#define BM_APBH_CHn_SEMA_INCREMENT_SEMA 0x000000FF +#define BF_APBH_CHn_SEMA_INCREMENT_SEMA(v) \ + (((v) << BP_APBH_CHn_SEMA_INCREMENT_SEMA) & \ + BM_APBH_CHn_SEMA_INCREMENT_SEMA) +#define BP_APBH_CHn_SEMA_PHORE 16 +#define BM_APBH_CHn_SEMA_PHORE 0x00FF0000 +HW_REGISTER_RO_INDEXED(HW_APBH_CHn_DEBUG1, REGS_APBH_BASE, 0x90, 0x70) +HW_REGISTER_RO_INDEXED(HW_APBH_CHn_DEBUG2, REGS_APBH_BASE, 0xA0, 0x70) +HW_REGISTER_RO(HW_APBH_VERSION, REGS_APBH_BASE, 0x3F0) + +#endif /* _INCLUDE_ASM_ARCH_REGS_APBH_H */ diff --git a/arch/arm/mach-stmp37xx/include/mach/regs-apbx.h b/arch/arm/mach-stmp37xx/include/mach/regs-apbx.h new file mode 100644 index 0000000..a14ddb9 --- /dev/null +++ b/arch/arm/mach-stmp37xx/include/mach/regs-apbx.h @@ -0,0 +1,109 @@ +/* + * STMP APBX Register Definitions + * + * Copyright (c) 2008 Freescale Semiconductor + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef _INCLUDE_ASM_ARCH_REGS_APBX_H +#define _INCLUDE_ASM_ARCH_REGS_APBX_H + +#include + +#ifndef REGS_APBX_BASE +#define REGS_APBX_BASE (REGS_BASE + 0x00024000) +#endif + +HW_REGISTER(HW_APBX_CTRL0, REGS_APBX_BASE, 0x00) +#define BP_APBX_CTRL0_SFTRST 31 +#define BM_APBX_CTRL0_SFTRST 0x80000000 +#define BP_APBX_CTRL0_CLKGATE 30 +#define BM_APBX_CTRL0_CLKGATE 0x40000000 +#define BP_APBX_CTRL0_RESET_CHANNEL 16 +#define BM_APBX_CTRL0_RESET_CHANNEL 0x00FF0000 +#define BF_APBX_CTRL0_RESET_CHANNEL(v) \ + (((v) << BP_APBX_CTRL0_RESET_CHANNEL) & BM_APBX_CTRL0_RESET_CHANNEL) +HW_REGISTER(HW_APBX_CTRL1, REGS_APBX_BASE, 0x10) +HW_REGISTER_0(HW_APBX_DEVSEL, REGS_APBX_BASE, 0x20) +#define BP_APBX_DEVSEL_CH7 28 +#define BM_APBX_DEVSEL_CH7 0xF0000000 +#define BF_APBX_DEVSEL_CH7(v) \ + (((v) << BP_APBX_DEVSEL_CH7) & BM_APBX_DEVSEL_CH7) +#define BV_APBX_DEVSEL_CH7__USE_UART 0x0 +#define BV_APBX_DEVSEL_CH7__USE_IRDA 0x1 +#define BP_APBX_DEVSEL_CH6 24 +#define BM_APBX_DEVSEL_CH6 0x0F000000 +#define BF_APBX_DEVSEL_CH6(v) \ + (((v) << BP_APBX_DEVSEL_CH6) & BM_APBX_DEVSEL_CH6) +#define BV_APBX_DEVSEL_CH6__USE_UART 0x0 +#define BV_APBX_DEVSEL_CH6__USE_IRDA 0x1 +#define BP_APBX_CTRL1_CH7_AHB_ERROR_IRQ 23 +#define BM_APBX_CTRL1_CH7_AHB_ERROR_IRQ 0x00800000 +#define BP_APBX_CTRL1_CH6_AHB_ERROR_IRQ 22 +#define BM_APBX_CTRL1_CH6_AHB_ERROR_IRQ 0x00400000 +#define BP_APBX_CTRL1_CH7_CMDCMPLT_IRQ_EN 15 +#define BM_APBX_CTRL1_CH7_CMDCMPLT_IRQ_EN 0x00008000 +#define BP_APBX_CTRL1_CH6_CMDCMPLT_IRQ_EN 14 +#define BM_APBX_CTRL1_CH6_CMDCMPLT_IRQ_EN 0x00004000 + +HW_REGISTER_RO_INDEXED(HW_APBX_CHn_CURCMDAR, REGS_APBX_BASE, 0x40, 0x70) +HW_REGISTER_0_INDEXED(HW_APBX_CHn_NXTCMDAR, REGS_APBX_BASE, 0x50, 0x70) +#define BP_APBX_CHn_NXTCMDAR_CMD_ADDR 0 +#define BM_APBX_CHn_NXTCMDAR_CMD_ADDR 0xFFFFFFFF +#define BF_APBX_CHn_NXTCMDAR_CMD_ADDR(v) ((u32) v) +HW_REGISTER_RO_INDEXED(HW_APBX_CHn_CMD, REGS_APBX_BASE, 0x60, 0x70) +#define BP_APBX_CHn_CMD_XFER_COUNT 16 +#define BM_APBX_CHn_CMD_XFER_COUNT 0xFFFF0000 +#define BF_APBX_CHn_CMD_XFER_COUNT(v) \ + (((v) << BP_APBX_CHn_CMD_XFER_COUNT) & BM_APBX_CHn_CMD_XFER_COUNT) +#define BP_APBX_CHn_CMD_CMDWORDS 12 +#define BM_APBX_CHn_CMD_CMDWORDS 0x0000F000 +#define BF_APBX_CHn_CMD_CMDWORDS(v) \ + (((v) << BP_APBX_CHn_CMD_CMDWORDS) & BM_APBX_CHn_CMD_CMDWORDS) +#define BP_APBX_CHn_CMD_WAIT4ENDCMD 7 +#define BM_APBX_CHn_CMD_WAIT4ENDCMD 0x00000080 +#define BP_APBX_CHn_CMD_SEMAPHORE 6 +#define BM_APBX_CHn_CMD_SEMAPHORE 0x00000040 +#define BP_APBX_CHn_CMD_IRQONCMPLT 3 +#define BM_APBX_CHn_CMD_IRQONCMPLT 0x00000008 +#define BP_APBX_CHn_CMD_CHAIN 2 +#define BM_APBX_CHn_CMD_CHAIN 0x00000004 +#define BM_APBX_CHn_CMD_DMA_READ 0x00000003 +#define BP_APBX_CHn_CMD_DMA_READ 0 +#define BF_APBX_CHn_CMD_DMA_READ(v) \ + (((v) << BP_APBX_CHn_CMD_DMA_READ) & BM_APBX_CHn_CMD_DMA_READ) +#define BP_APBX_CHn_CMD_COMMAND 0 +#define BM_APBX_CHn_CMD_COMMAND 0x00000003 +#define BF_APBX_CHn_CMD_COMMAND(v) \ + (((v) << BP_APBX_CHn_CMD_COMMAND) & BM_APBX_CHn_CMD_COMMAND) +#define BV_APBX_CHn_CMD_COMMAND__NO_DMA_XFER 0x0 +#define BV_APBX_CHn_CMD_COMMAND__DMA_WRITE 0x1 +#define BV_APBX_CHn_CMD_COMMAND__DMA_READ 0x2 + +HW_REGISTER_RO_INDEXED(HW_APBX_CHn_BAR, REGS_APBX_BASE, 0x70, 0x70) +HW_REGISTER_0_INDEXED(HW_APBX_CHn_SEMA, REGS_APBX_BASE, 0x80, 0x70) +#define BP_APBX_CHn_SEMA_INCREMENT_SEMA 0 +#define BM_APBX_CHn_SEMA_INCREMENT_SEMA 0x000000FF +#define BF_APBX_CHn_SEMA_INCREMENT_SEMA(v) \ + (((v) << BP_APBX_CHn_SEMA_INCREMENT_SEMA) & \ + BM_APBX_CHn_SEMA_INCREMENT_SEMA) +#define BP_APBX_CHn_SEMA_PHORE 16 +#define BM_APBX_CHn_SEMA_PHORE 0x00FF0000 +HW_REGISTER_RO_INDEXED(HW_APBX_CHn_DEBUG1, REGS_APBX_BASE, 0x90, 0x70) +HW_REGISTER_RO_INDEXED(HW_APBX_CHn_DEBUG2, REGS_APBX_BASE, 0xA0, 0x70) +HW_REGISTER_RO(HW_APBX_VERSION, REGS_APBX_BASE, 0x3F0) + +#endif /* _INCLUDE_ASM_ARCH_REGS_APBX_H */ diff --git a/arch/arm/mach-stmp37xx/include/mach/regs-clkctrl.h b/arch/arm/mach-stmp37xx/include/mach/regs-clkctrl.h new file mode 100644 index 0000000..229ee75 --- /dev/null +++ b/arch/arm/mach-stmp37xx/include/mach/regs-clkctrl.h @@ -0,0 +1,85 @@ +#ifndef _INCLUDE_ASM_ARCH_REGS_CLKCTRL_H +#define _INCLUDE_ASM_ARCH_REGS_CLKCTRL_H + +#include + +#define REGS_CLKCTRL_BASE (REGS_BASE + 0x00040000) + +#define HW_CLKCTRL_PLLCTRL0_ADDR (REGS_CLKCTRL_BASE + 0x00) +HW_REGISTER(HW_CLKCTRL_PLLCTRL0, REGS_CLKCTRL_BASE, 0x00) +#define BM_CLKCTRL_PLLCTRL0_EN_USB_CLKS 0x00040000 +#define HW_CLKCTRL_PLLCTRL1_ADDR (REGS_CLKCTRL_BASE + 0x10) +HW_REGISTER(HW_CLKCTRL_PLLCTRL1, REGS_CLKCTRL_BASE, 0x10) + +#define HW_CLKCTRL_CPU_ADDR (REGS_CLKCTRL_BASE + 0x20) +HW_REGISTER(HW_CLKCTRL_CPU, REGS_CLKCTRL_BASE, 0x20) +#define BM_CLKCTRL_CPU_DIV_CPU 0x0000003F +#define BF_CLKCTRL_CPU_DIV_CPU(v) \ + (((v) << 0) & BM_CLKCTRL_CPU_DIV_CPU) + +#define HW_CLKCTRL_HBUS_ADDR (REGS_CLKCTRL_BASE + 0x30) +HW_REGISTER(HW_CLKCTRL_HBUS, REGS_CLKCTRL_BASE, 0x30) +#define BM_CLKCTRL_HBUS_DIV_FRAC_EN 0 /* for compatitibility */ +#define BM_CLKCTRL_HBUS_DIV 0x0000001F +#define BF_CLKCTRL_HBUS_DIV(v) \ + (((v) << 0) & BM_CLKCTRL_HBUS_DIV) +#define HW_CLKCTRL_XBUS_ADDR (REGS_CLKCTRL_BASE + 0x40) +HW_REGISTER(HW_CLKCTRL_XBUS, REGS_CLKCTRL_BASE, 0x40) +#define HW_CLKCTRL_XTAL_ADDR (REGS_CLKCTRL_BASE + 0x50) +HW_REGISTER(HW_CLKCTRL_XTAL, REGS_CLKCTRL_BASE, 0x50) +#define HW_CLKCTRL_PIX_ADDR (REGS_CLKCTRL_BASE + 0x60) +HW_REGISTER(HW_CLKCTRL_PIX, REGS_CLKCTRL_BASE, 0x60) +#define BM_CLKCTRL_PIX_CLKGATE 0x80000000 +#define BM_CLKCTRL_PIX_BUSY 0x20000000 +#define BM_CLKCTRL_PIX_DIV 0x00007FFF +#define BP_CLKCTRL_PIX_DIV 0 +#define BF_CLKCTRL_PIX_DIV(v) \ + (((v) << BP_CLKCTRL_PIX_DIV) & BM_CLKCTRL_PIX_DIV) +#define HW_CLKCTRL_SSP_ADDR (REGS_CLKCTRL_BASE + 0x70) +HW_REGISTER(HW_CLKCTRL_SSP, REGS_CLKCTRL_BASE, 0x70) +#define HW_CLKCTRL_GPMI_ADDR (REGS_CLKCTRL_BASE + 0x80) +HW_REGISTER(HW_CLKCTRL_GPMI, REGS_CLKCTRL_BASE, 0x80) +#define HW_CLKCTRL_SPDIF_ADDR (REGS_CLKCTRL_BASE + 0x90) +HW_REGISTER(HW_CLKCTRL_SPDIF, REGS_CLKCTRL_BASE, 0x90) +#define HW_CLKCTRL_EMI_ADDR (REGS_CLKCTRL_BASE + 0xA0) +HW_REGISTER(HW_CLKCTRL_EMI, REGS_CLKCTRL_BASE, 0xA0) +#define HW_CLKCTRL_IR_ADDR (REGS_CLKCTRL_BASE + 0xB0) +HW_REGISTER(HW_CLKCTRL_IR, REGS_CLKCTRL_BASE, 0xB0) +#define HW_CLKCTRL_SAIF_ADDR (REGS_CLKCTRL_BASE + 0xC0) +HW_REGISTER(HW_CLKCTRL_SAIF, REGS_CLKCTRL_BASE, 0xC0) +#define HW_CLKCTRL_FRAC_ADDR (REGS_CLKCTRL_BASE + 0xD0) +HW_REGISTER(HW_CLKCTRL_FRAC, REGS_CLKCTRL_BASE, 0xD0) +#define BM_CLKCTRL_FRAC_CLKGATEIO 0x80000000 +#define BM_CLKCTRL_FRAC_IO_STABLE 0x40000000 +#define BM_CLKCTRL_FRAC_IOFRAC 0x3F000000 +#define BP_CLKCTRL_FRAC_IOFRAC 24 +#define BF_CLKCTRL_FRAC_IOFRAC(v) \ + (((v) << BP_CLKCTRL_FRAC_IOFRAC) & BM_CLKCTRL_FRAC_IOFRAC) +#define BM_CLKCTRL_FRAC_CLKGATEPIX 0x00800000 +#define BM_CLKCTRL_FRAC_PIXFRAC 0x003F0000 +#define BP_CLKCTRL_FRAC_PIXFRAC 16 +#define BF_CLKCTRL_FRAC_PIXFRAC(v) \ + (((v) << BP_CLKCTRL_FRAC_PIXFRAC) & BM_CLKCTRL_FRAC_PIXFRAC) +#define BM_CLKCTRL_FRAC_CLKGATEEMI 0x00008000 +#define BM_CLKCTRL_FRAC_EMIFRAC 0x00003F00 +#define BP_CLKCTRL_FRAC_EMIFRAC 8 +#define BF_CLKCTRL_FRAC_EMIFRAC(v) \ + (((v) << BP_CLKCTRL_FRAC_EMIFRAC) & BM_CLKCTRL_FRAC_EMIFRAC) +#define BM_CLKCTRL_FRAC_CLKGATECPU 0x00000080 +#define BM_CLKCTRL_FRAC_CPUFRAC 0x0000003F +#define BP_CLKCTRL_FRAC_CPUFRAC 0 +#define BF_CLKCTRL_FRAC_CPUFRAC(v) \ + (((v) << BP_CLKCTRL_FRAC_CPUFRAC) & BM_CLKCTRL_FRAC_CPUFRAC) +#define HW_CLKCTRL_CLKSEQ_ADDR (REGS_CLKCTRL_BASE + 0xE0) +HW_REGISTER(HW_CLKCTRL_CLKSEQ, REGS_CLKCTRL_BASE, 0xE0) +#define BM_CLKCTRL_CLKSEQ_BYPASS_CPU 0x00000080 +#define BM_CLKCTRL_CLKSEQ_BYPASS_EMI 0x00000040 +#define BM_CLKCTRL_CLKSEQ_BYPASS_SSP 0x00000020 +#define BM_CLKCTRL_CLKSEQ_BYPASS_GPMI 0x00000010 +#define BM_CLKCTRL_CLKSEQ_BYPASS_IR 0x00000008 +#define BM_CLKCTRL_CLKSEQ_BYPASS_PIX 0x00000002 +#define BM_CLKCTRL_CLKSEQ_BYPASS_SAIF 0x00000001 +HW_REGISTER_WO(HW_CLKCTRL_RESET, REGS_CLKCTRL_BASE, 0xF0) +#define BM_CLKCTRL_RESET_CHIP 0x00000002 +#define BM_CLKCTRL_RESET_DIG 0x00000001 +#endif /* _INCLUDE_ASM_ARCH_REGS_CLKCTRL_H */ diff --git a/arch/arm/mach-stmp37xx/include/mach/regs-icoll.h b/arch/arm/mach-stmp37xx/include/mach/regs-icoll.h new file mode 100644 index 0000000..8a92f92 --- /dev/null +++ b/arch/arm/mach-stmp37xx/include/mach/regs-icoll.h @@ -0,0 +1,36 @@ +/* + * Freescale STMP378X: clock registers definitions + * + * Embedded Alley Solutions, Inc + * + * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + */ + +/* + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ + +#ifndef _INCLUDE_ASM_ARCH_REGS_ICOLL_H +#define _INCLUDE_ASM_ARCH_REGS_ICOLL_H + + +#include + +#define REGS_ICOLL_BASE (REGS_BASE + 0x00000000) + +HW_REGISTER(HW_ICOLL_VECTOR, REGS_ICOLL_BASE, 0x00) +HW_REGISTER_WO(HW_ICOLL_LEVELACK, REGS_ICOLL_BASE, 0x10) +HW_REGISTER(HW_ICOLL_CTRL, REGS_ICOLL_BASE, 0x20) +#define BM_ICOLL_CTRL_CLKGATE 0x40000000 +#define BM_ICOLL_CTRL_SFTRST 0x80000000 +HW_REGISTER_RO(HW_ICOLL_STAT, REGS_ICOLL_BASE, 0x30) + +HW_REGISTER_INDEXED(HW_ICOLL_PRIORITYn, REGS_ICOLL_BASE, 0x60, 0x10) + +#endif /* _INCLUDE_ASM_ARCH_REGS_CLKCTRL_H */ diff --git a/arch/arm/mach-stmp37xx/include/mach/regs-pinctrl.h b/arch/arm/mach-stmp37xx/include/mach/regs-pinctrl.h new file mode 100644 index 0000000..b114ecd --- /dev/null +++ b/arch/arm/mach-stmp37xx/include/mach/regs-pinctrl.h @@ -0,0 +1,159 @@ +/* + * STMP pinmux register definitions + * + * Copyright (c) 2008 Freescale Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef _INCLUDE_ASM_ARCH_REGS_PINCTRL_H +#define _INCLUDE_ASM_ARCH_REGS_PINCTRL_H + +#include + +#ifndef REGS_PINCTRL_BASE +#define REGS_PINCTRL_BASE (REGS_BASE + 0x00018000) +#endif /* REGS_PINCTRL_BASE */ + +HW_REGISTER(HW_PINCTRL_CTRL, REGS_PINCTRL_BASE, 0) + +#define HW_PINCTRL_MUXSEL0_ADDR (REGS_PINCTRL_BASE + 0x100) +HW_REGISTER(HW_PINCTRL_MUXSEL0, REGS_PINCTRL_BASE, 0x100) +#define HW_PINCTRL_MUXSEL1_ADDR (REGS_PINCTRL_BASE + 0x110) +HW_REGISTER(HW_PINCTRL_MUXSEL1, REGS_PINCTRL_BASE, 0x110) +#define HW_PINCTRL_MUXSEL2_ADDR (REGS_PINCTRL_BASE + 0x120) +HW_REGISTER(HW_PINCTRL_MUXSEL2, REGS_PINCTRL_BASE, 0x120) +#define HW_PINCTRL_MUXSEL3_ADDR (REGS_PINCTRL_BASE + 0x130) +HW_REGISTER(HW_PINCTRL_MUXSEL3, REGS_PINCTRL_BASE, 0x130) +#define BM_PINCTRL_MUXSEL3_BANK1_PIN28 0x03000000 +#define HW_PINCTRL_MUXSEL4_ADDR (REGS_PINCTRL_BASE + 0x140) +HW_REGISTER(HW_PINCTRL_MUXSEL4, REGS_PINCTRL_BASE, 0x140) +#define BM_PINCTRL_MUXSEL4_BANK2_PIN03 0x000000C0 +#define BM_PINCTRL_MUXSEL4_BANK2_PIN04 0x00000300 +#define HW_PINCTRL_MUXSEL5_ADDR (REGS_PINCTRL_BASE + 0x150) +HW_REGISTER(HW_PINCTRL_MUXSEL5, REGS_PINCTRL_BASE, 0x150) +#define HW_PINCTRL_MUXSEL6_ADDR (REGS_PINCTRL_BASE + 0x160) +HW_REGISTER(HW_PINCTRL_MUXSEL6, REGS_PINCTRL_BASE, 0x160) +#define HW_PINCTRL_MUXSEL7_ADDR (REGS_PINCTRL_BASE + 0x170) +HW_REGISTER(HW_PINCTRL_MUXSEL7, REGS_PINCTRL_BASE, 0x170) + +HW_REGISTER(HW_PINCTRL_DRIVE0, REGS_PINCTRL_BASE, 0x200) +#define HW_PINCTRL_DRIVE0_ADDR (REGS_PINCTRL_BASE + 0x200) +HW_REGISTER(HW_PINCTRL_DRIVE1, REGS_PINCTRL_BASE, 0x210) +#define HW_PINCTRL_DRIVE1_ADDR (REGS_PINCTRL_BASE + 0x210) +HW_REGISTER(HW_PINCTRL_DRIVE2, REGS_PINCTRL_BASE, 0x220) +#define HW_PINCTRL_DRIVE2_ADDR (REGS_PINCTRL_BASE + 0x220) +HW_REGISTER(HW_PINCTRL_DRIVE3, REGS_PINCTRL_BASE, 0x230) +#define HW_PINCTRL_DRIVE3_ADDR (REGS_PINCTRL_BASE + 0x230) +HW_REGISTER(HW_PINCTRL_DRIVE4, REGS_PINCTRL_BASE, 0x240) +#define HW_PINCTRL_DRIVE4_ADDR (REGS_PINCTRL_BASE + 0x240) +HW_REGISTER(HW_PINCTRL_DRIVE5, REGS_PINCTRL_BASE, 0x250) +#define HW_PINCTRL_DRIVE5_ADDR (REGS_PINCTRL_BASE + 0x250) +HW_REGISTER(HW_PINCTRL_DRIVE6, REGS_PINCTRL_BASE, 0x260) +#define HW_PINCTRL_DRIVE6_ADDR (REGS_PINCTRL_BASE + 0x260) +HW_REGISTER(HW_PINCTRL_DRIVE7, REGS_PINCTRL_BASE, 0x270) +#define HW_PINCTRL_DRIVE7_ADDR (REGS_PINCTRL_BASE + 0x270) +HW_REGISTER(HW_PINCTRL_DRIVE8, REGS_PINCTRL_BASE, 0x280) +#define HW_PINCTRL_DRIVE8_ADDR (REGS_PINCTRL_BASE + 0x280) +HW_REGISTER(HW_PINCTRL_DRIVE9, REGS_PINCTRL_BASE, 0x290) +#define HW_PINCTRL_DRIVE9_ADDR (REGS_PINCTRL_BASE + 0x290) +HW_REGISTER(HW_PINCTRL_DRIVE10, REGS_PINCTRL_BASE, 0x2a0) +#define HW_PINCTRL_DRIVE10_ADDR (REGS_PINCTRL_BASE + 0x2a0) +HW_REGISTER(HW_PINCTRL_DRIVE11, REGS_PINCTRL_BASE, 0x2b0) +#define HW_PINCTRL_DRIVE11_ADDR (REGS_PINCTRL_BASE + 0x2b0) +HW_REGISTER(HW_PINCTRL_DRIVE12, REGS_PINCTRL_BASE, 0x2c0) +#define HW_PINCTRL_DRIVE12_ADDR (REGS_PINCTRL_BASE + 0x2c0) +HW_REGISTER(HW_PINCTRL_DRIVE13, REGS_PINCTRL_BASE, 0x2d0) +#define HW_PINCTRL_DRIVE13_ADDR (REGS_PINCTRL_BASE + 0x2d0) +HW_REGISTER(HW_PINCTRL_DRIVE14, REGS_PINCTRL_BASE, 0x2e0) +#define HW_PINCTRL_DRIVE14_ADDR (REGS_PINCTRL_BASE + 0x2e0) + + +HW_REGISTER(HW_PINCTRL_PULL0, REGS_PINCTRL_BASE, 0x300) +#define HW_PINCTRL_PULL0_ADDR (REGS_PINCTRL_BASE + 0x300) +#define BM_PINCTRL_PULL0_BANK0_PIN01 0x00000002 +#define BM_PINCTRL_PULL0_BANK0_PIN02 0x00000004 +#define BM_PINCTRL_PULL0_BANK0_PIN03 0x00000008 +#define BM_PINCTRL_PULL0_BANK0_PIN04 0x00000010 +#define BM_PINCTRL_PULL0_BANK0_PIN20 0x00100000 +HW_REGISTER(HW_PINCTRL_PULL1, REGS_PINCTRL_BASE, 0x310) +#define HW_PINCTRL_PULL1_ADDR (REGS_PINCTRL_BASE + 0x310) +#define BM_PINCTRL_PULL1_BANK1_PIN22 0x00400000 +#define BM_PINCTRL_PULL1_BANK1_PIN24 0x01000000 +#define BM_PINCTRL_PULL1_BANK1_PIN25 0x02000000 +#define BM_PINCTRL_PULL1_BANK1_PIN26 0x04000000 +#define BM_PINCTRL_PULL1_BANK1_PIN27 0x08000000 +HW_REGISTER(HW_PINCTRL_PULL2, REGS_PINCTRL_BASE, 0x320) +#define HW_PINCTRL_PULL2_ADDR (REGS_PINCTRL_BASE + 0x320) +HW_REGISTER(HW_PINCTRL_PULL3, REGS_PINCTRL_BASE, 0x330) +#define HW_PINCTRL_PULL3_ADDR (REGS_PINCTRL_BASE + 0x330) + +#define HW_PINCTRL_DOUT0_ADDR (REGS_PINCTRL_BASE + 0x400) +HW_REGISTER(HW_PINCTRL_DOUT0, REGS_PINCTRL_BASE, 0x400) +#define HW_PINCTRL_DOUT1_ADDR (REGS_PINCTRL_BASE + 0x410) +HW_REGISTER(HW_PINCTRL_DOUT1, REGS_PINCTRL_BASE, 0x410) +#define HW_PINCTRL_DOUT2_ADDR (REGS_PINCTRL_BASE + 0x420) +HW_REGISTER(HW_PINCTRL_DOUT2, REGS_PINCTRL_BASE, 0x420) + +#define HW_PINCTRL_DIN0_ADDR (REGS_PINCTRL_BASE + 0x500) +HW_REGISTER_RO(HW_PINCTRL_DIN0, REGS_PINCTRL_BASE, 0x500) +#define HW_PINCTRL_DIN1_ADDR (REGS_PINCTRL_BASE + 0x510) +HW_REGISTER_RO(HW_PINCTRL_DIN1, REGS_PINCTRL_BASE, 0x510) +#define HW_PINCTRL_DIN2_ADDR (REGS_PINCTRL_BASE + 0x520) +HW_REGISTER_RO(HW_PINCTRL_DIN2, REGS_PINCTRL_BASE, 0x520) + +#define HW_PINCTRL_DOE0_ADDR (REGS_PINCTRL_BASE + 0x600) +HW_REGISTER(HW_PINCTRL_DOE0, REGS_PINCTRL_BASE, 0x600) +#define HW_PINCTRL_DOE1_ADDR (REGS_PINCTRL_BASE + 0x610) +HW_REGISTER(HW_PINCTRL_DOE1, REGS_PINCTRL_BASE, 0x610) +#define HW_PINCTRL_DOE2_ADDR (REGS_PINCTRL_BASE + 0x620) +HW_REGISTER(HW_PINCTRL_DOE2, REGS_PINCTRL_BASE, 0x620) + +HW_REGISTER(HW_PINCTRL_PIN2IRQ0, REGS_PINCTRL_BASE, 0x700) +#define HW_PINCTRL_PIN2IRQ0_ADDR (REGS_PINCTRL_BASE + 0x700) +HW_REGISTER(HW_PINCTRL_PIN2IRQ1, REGS_PINCTRL_BASE, 0x710) +#define HW_PINCTRL_PIN2IRQ1_ADDR (REGS_PINCTRL_BASE + 0x710) +HW_REGISTER(HW_PINCTRL_PIN2IRQ2, REGS_PINCTRL_BASE, 0x720) +#define HW_PINCTRL_PIN2IRQ2_ADDR (REGS_PINCTRL_BASE + 0x720) + +HW_REGISTER(HW_PINCTRL_IRQEN0, REGS_PINCTRL_BASE, 0x800) +#define HW_PINCTRL_IRQEN0_ADDR (REGS_PINCTRL_BASE + 0x800) +HW_REGISTER(HW_PINCTRL_IRQEN1, REGS_PINCTRL_BASE, 0x810) +#define HW_PINCTRL_IRQEN1_ADDR (REGS_PINCTRL_BASE + 0x810) +HW_REGISTER(HW_PINCTRL_IRQEN2, REGS_PINCTRL_BASE, 0x820) +#define HW_PINCTRL_IRQEN2_ADDR (REGS_PINCTRL_BASE + 0x820) + +HW_REGISTER(HW_PINCTRL_IRQLEVEL0, REGS_PINCTRL_BASE, 0x900) +#define HW_PINCTRL_IRQLEVEL0_ADDR (REGS_PINCTRL_BASE + 0x900) +HW_REGISTER(HW_PINCTRL_IRQLEVEL1, REGS_PINCTRL_BASE, 0x910) +#define HW_PINCTRL_IRQLEVEL1_ADDR (REGS_PINCTRL_BASE + 0x910) +HW_REGISTER(HW_PINCTRL_IRQLEVEL2, REGS_PINCTRL_BASE, 0x920) +#define HW_PINCTRL_IRQLEVEL2_ADDR (REGS_PINCTRL_BASE + 0x920) + +HW_REGISTER(HW_PINCTRL_IRQPOL0, REGS_PINCTRL_BASE, 0xA00) +#define HW_PINCTRL_IRQPOL0_ADDR (REGS_PINCTRL_BASE + 0xa00) +HW_REGISTER(HW_PINCTRL_IRQPOL1, REGS_PINCTRL_BASE, 0xA10) +#define HW_PINCTRL_IRQPOL1_ADDR (REGS_PINCTRL_BASE + 0xa10) +HW_REGISTER(HW_PINCTRL_IRQPOL2, REGS_PINCTRL_BASE, 0xA20) +#define HW_PINCTRL_IRQPOL2_ADDR (REGS_PINCTRL_BASE + 0xa20) + +HW_REGISTER(HW_PINCTRL_IRQSTAT0, REGS_PINCTRL_BASE, 0xB00) +#define HW_PINCTRL_IRQSTAT0_ADDR (REGS_PINCTRL_BASE + 0xb00) +HW_REGISTER(HW_PINCTRL_IRQSTAT1, REGS_PINCTRL_BASE, 0xB10) +#define HW_PINCTRL_IRQSTAT1_ADDR (REGS_PINCTRL_BASE + 0xb10) +HW_REGISTER(HW_PINCTRL_IRQSTAT2, REGS_PINCTRL_BASE, 0xB20) +#define HW_PINCTRL_IRQSTAT2_ADDR (REGS_PINCTRL_BASE + 0xb20) + +#endif /* _INCLUDE_ASM_ARCH_REGS_PINCTRL_H */ + diff --git a/arch/arm/mach-stmp37xx/include/mach/regs-power.h b/arch/arm/mach-stmp37xx/include/mach/regs-power.h new file mode 100644 index 0000000..d15cd66 --- /dev/null +++ b/arch/arm/mach-stmp37xx/include/mach/regs-power.h @@ -0,0 +1,31 @@ +/* + * STMP POWER Register Definitions + * + * Copyright (c) 2008 Freescale Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __ARCH_ARM___POWER_H +#define __ARCH_ARM___POWER_H 1 + +#include + +#define REGS_POWER_BASE (void __iomem *)(REGS_BASE + 0x44000) +#define REGS_POWER_BASE_PHYS (0x80044000) +#define REGS_POWER_SIZE 0x00002000 +HW_REGISTER(HW_POWER_MINPWR, REGS_POWER_BASE, 0x00000020) +HW_REGISTER(HW_POWER_CHARGE, REGS_POWER_BASE, 0x00000030) +#endif /* __ARCH_ARM___POWER_H */ diff --git a/arch/arm/mach-stmp37xx/include/mach/regs-timrot.h b/arch/arm/mach-stmp37xx/include/mach/regs-timrot.h new file mode 100644 index 0000000..7f00030 --- /dev/null +++ b/arch/arm/mach-stmp37xx/include/mach/regs-timrot.h @@ -0,0 +1,52 @@ +/* + * include/asm-arm/arch-stmp3xxx/regstimer.h + * + * Copyright (c) 2008 SigmaTel Inc + * Copyright (c) 2008 Embedded Alley Solutions, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef __ARCH_ARM_REGS_TIMROT_H +#define __ARCH_ARM_REGS_TIMROT_H + +#include + +#define REGS_TIMROT_BASE (REGS_BASE + 0x00068000) + +HW_REGISTER(HW_TIMROT_ROTCTRL, REGS_TIMROT_BASE, 0) +#define BM_TIMROT_ROTCTRL_SFTRST 0x80000000 +#define BM_TIMROT_ROTCTRL_CLKGATE 0x40000000 + +HW_REGISTER_INDEXED(HW_TIMROT_TIMCTRLn, REGS_TIMROT_BASE, 0x20, 0x20) +#define BM_TIMROT_TIMCTRLn_SELECT 0x0000000F +#define BF_TIMROT_TIMCTRLn_SELECT(v) (((v) << 0) & BM_TIMROT_TIMCTRLn_SELECT) +#define BM_TIMROT_TIMCTRLn_PRESCALE 0x00000030 +#define BF_TIMROT_TIMCTRLn_PRESCALE(v) \ + (((v) << 4) & BM_TIMROT_TIMCTRLn_PRESCALE) +#define BM_TIMROT_TIMCTRLn_RELOAD 0x00000040 +#define BF_TIMROT_TIMCTRLn_RELOAD(v) (((v) << 6) & BM_TIMROT_TIMCTRLn_RELOAD) +#define BM_TIMROT_TIMCTRLn_UPDATE 0x00000080 +#define BF_TIMROT_TIMCTRLn_UPDATE(v) (((v) << 7) & BM_TIMROT_TIMCTRLn_UPDATE) +#define BM_TIMROT_TIMCTRLn_POLARITY 0x00000100 +#define BF_TIMROT_TIMCTRLn_POLARITY(v) \ + (((v) << 8) & BM_TIMROT_TIMCTRLn_POLARITY) +#define BM_TIMROT_TIMCTRLn_IRQ_EN 0x00004000 +#define BF_TIMROT_TIMCTRLn_IRQ_EN(v) \ + (((v) << 14) & BM_TIMROT_TIMCTRLn_IRQ_EN) +#define BM_TIMROT_TIMCTRLn_IRQ 0x00008000 +#define BF_TIMROT_TIMCTRLn_IRQ(v) (((v) << 15) & BM_TIMROT_TIMCTRLn_IRQ) +HW_REGISTER_0_INDEXED(HW_TIMROT_TIMCOUNTn, REGS_TIMROT_BASE, 0x30, 0x20) + +#endif /* __ARCH_ARM_REGSTIMER_H */ -- cgit v0.10.2 From 07d9714365bcab286389d679f73512e35796847c Mon Sep 17 00:00:00 2001 From: dmitry pervushin Date: Wed, 22 Apr 2009 23:54:42 +0100 Subject: [ARM] 5467/1: Freescale STMP platform support [4/10] Minimal definition of register set for 378x boards Signed-off-by: dmitry pervushin Signed-off-by: Russell King diff --git a/arch/arm/mach-stmp378x/include/mach/regs-apbh.h b/arch/arm/mach-stmp378x/include/mach/regs-apbh.h new file mode 100644 index 0000000..db63b04 --- /dev/null +++ b/arch/arm/mach-stmp378x/include/mach/regs-apbh.h @@ -0,0 +1,88 @@ +/* + * STMP APBH Register Definitions + * + * Copyright (c) 2008 Freescale Semiconductor + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + * + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __ARCH_ARM___APBH_H +#define __ARCH_ARM___APBH_H 1 + +#include + +#define REGS_APBH_BASE (REGS_BASE + 0x4000) +#define REGS_APBH_BASE_PHYS (0x80004000) +#define REGS_APBH_SIZE 0x00002000 +HW_REGISTER(HW_APBH_CTRL0, REGS_APBH_BASE, 0x00000000) +#define HW_APBH_CTRL0_ADDR (REGS_APBH_BASE + 0x00000000) +#define BM_APBH_CTRL0_SFTRST 0x80000000 +#define BM_APBH_CTRL0_CLKGATE 0x40000000 +#define BM_APBH_CTRL0_AHB_BURST8_EN 0x20000000 +#define BM_APBH_CTRL0_APB_BURST4_EN 0x10000000 +#define BP_APBH_CTRL0_RESET_CHANNEL 16 +#define BM_APBH_CTRL0_RESET_CHANNEL 0x00FF0000 +#define BF_APBH_CTRL0_RESET_CHANNEL(v) \ + (((v) << 16) & BM_APBH_CTRL0_RESET_CHANNEL) +HW_REGISTER(HW_APBH_CTRL1, REGS_APBH_BASE, 0x00000010) +#define HW_APBH_CTRL1_ADDR (REGS_APBH_BASE + 0x00000010) +HW_REGISTER(HW_APBH_CTRL2, REGS_APBH_BASE, 0x00000020) +HW_REGISTER_0(HW_APBH_DEVSEL, REGS_APBH_BASE, 0x00000030) +HW_REGISTER_0_INDEXED(HW_APBH_CHn_CURCMDAR, REGS_APBH_BASE, 0x00000040, 0x70) +#define BP_APBH_CHn_CURCMDAR_CMD_ADDR 0 +#define BM_APBH_CHn_CURCMDAR_CMD_ADDR 0xFFFFFFFF +#define BF_APBH_CHn_CURCMDAR_CMD_ADDR(v) (v) +HW_REGISTER_0_INDEXED(HW_APBH_CHn_NXTCMDAR, REGS_APBH_BASE, 0x00000050, 0x70) +HW_REGISTER_0_INDEXED(HW_APBH_CHn_CMD, REGS_APBH_BASE, 0x00000060, 0x70) +#define BP_APBH_CHn_CMD_XFER_COUNT 16 +#define BM_APBH_CHn_CMD_XFER_COUNT 0xFFFF0000 +#define BF_APBH_CHn_CMD_XFER_COUNT(v) \ + (((v) << 16) & BM_APBH_CHn_CMD_XFER_COUNT) +#define BP_APBH_CHn_CMD_CMDWORDS 12 +#define BM_APBH_CHn_CMD_CMDWORDS 0x0000F000 +#define BF_APBH_CHn_CMD_CMDWORDS(v) \ + (((v) << 12) & BM_APBH_CHn_CMD_CMDWORDS) +#define BM_APBH_CHn_CMD_HALTONTERMINATE 0x00000100 +#define BM_APBH_CHn_CMD_WAIT4ENDCMD 0x00000080 +#define BM_APBH_CHn_CMD_SEMAPHORE 0x00000040 +#define BM_APBH_CHn_CMD_NANDWAIT4READY 0x00000020 +#define BM_APBH_CHn_CMD_NANDLOCK 0x00000010 +#define BM_APBH_CHn_CMD_IRQONCMPLT 0x00000008 +#define BM_APBH_CHn_CMD_CHAIN 0x00000004 +#define BP_APBH_CHn_CMD_COMMAND 0 +#define BM_APBH_CHn_CMD_COMMAND 0x00000003 +#define BF_APBH_CHn_CMD_COMMAND(v) \ + (((v) << 0) & BM_APBH_CHn_CMD_COMMAND) +#define BV_APBH_CHn_CMD_COMMAND__NO_DMA_XFER 0x0 +#define BV_APBH_CHn_CMD_COMMAND__DMA_WRITE 0x1 +#define BV_APBH_CHn_CMD_COMMAND__DMA_READ 0x2 +#define BV_APBH_CHn_CMD_COMMAND__DMA_SENSE 0x3 +HW_REGISTER_0_INDEXED(HW_APBH_CHn_BAR, REGS_APBH_BASE, 0x00000070, 0x70) +HW_REGISTER_0_INDEXED(HW_APBH_CHn_SEMA, REGS_APBH_BASE, 0x00000080, 0x70) +#define BP_APBH_CHn_SEMA_PHORE 16 +#define BM_APBH_CHn_SEMA_PHORE 0x00FF0000 +#define BF_APBH_CHn_SEMA_PHORE(v) \ + (((v) << 16) & BM_APBH_CHn_SEMA_PHORE) +#define BP_APBH_CHn_SEMA_INCREMENT_SEMA 0 +#define BM_APBH_CHn_SEMA_INCREMENT_SEMA 0x000000FF +#define BF_APBH_CHn_SEMA_INCREMENT_SEMA(v) \ + (((v) << 0) & BM_APBH_CHn_SEMA_INCREMENT_SEMA) +HW_REGISTER_0_INDEXED(HW_APBH_CHn_DEBUG1, REGS_APBH_BASE, 0x00000090, 0x70) +HW_REGISTER_0_INDEXED(HW_APBH_CHn_DEBUG2, REGS_APBH_BASE, 0x000000a0, 0x70) +HW_REGISTER_0(HW_APBH_VERSION, REGS_APBH_BASE, 0x000003f0) +#endif /* __ARCH_ARM___APBH_H */ diff --git a/arch/arm/mach-stmp378x/include/mach/regs-apbx.h b/arch/arm/mach-stmp378x/include/mach/regs-apbx.h new file mode 100644 index 0000000..d0e8e9f --- /dev/null +++ b/arch/arm/mach-stmp378x/include/mach/regs-apbx.h @@ -0,0 +1,79 @@ +/* + * STMP APBX Register Definitions + * + * Copyright (c) 2008 Freescale Semiconductor + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef __ARCH_ARM___APBX_H +#define __ARCH_ARM___APBX_H 1 + +#include + +#define REGS_APBX_BASE (REGS_BASE + 0x24000) +#define REGS_APBX_BASE_PHYS (0x80024000) +#define REGS_APBX_SIZE 0x00002000 +HW_REGISTER(HW_APBX_CTRL0, REGS_APBX_BASE, 0x00000000) +#define HW_APBX_CTRL0_ADDR (REGS_APBX_BASE + 0x00000000) +#define BM_APBX_CTRL0_SFTRST 0x80000000 +#define BM_APBX_CTRL0_CLKGATE 0x40000000 +HW_REGISTER(HW_APBX_CTRL1, REGS_APBX_BASE, 0x00000010) +HW_REGISTER(HW_APBX_CTRL2, REGS_APBX_BASE, 0x00000020) +HW_REGISTER(HW_APBX_CHANNEL_CTRL, REGS_APBX_BASE, 0x00000030) +#define BP_APBX_CHANNEL_CTRL_RESET_CHANNEL 16 +#define BM_APBX_CHANNEL_CTRL_RESET_CHANNEL 0xFFFF0000 +#define BF_APBX_CHANNEL_CTRL_RESET_CHANNEL(v) \ + (((v) << BP_APBX_CHANNEL_CTRL_RESET_CHANNEL) & \ + BM_APBX_CHANNEL_CTRL_RESET_CHANNEL) +HW_REGISTER_0(HW_APBX_DEVSEL, REGS_APBX_BASE, 0x00000040) +HW_REGISTER_0_INDEXED(HW_APBX_CHn_CURCMDAR, REGS_APBX_BASE, 0x00000100, 0x70) +HW_REGISTER_0_INDEXED(HW_APBX_CHn_NXTCMDAR, REGS_APBX_BASE, 0x00000110, 0x70) +HW_REGISTER_0_INDEXED(HW_APBX_CHn_CMD, REGS_APBX_BASE, 0x00000120, 0x70) +#define BP_APBX_CHn_CMD_XFER_COUNT 16 +#define BM_APBX_CHn_CMD_XFER_COUNT 0xFFFF0000 +#define BF_APBX_CHn_CMD_XFER_COUNT(v) \ + (((v) << 16) & BM_APBX_CHn_CMD_XFER_COUNT) +#define BP_APBX_CHn_CMD_CMDWORDS 12 +#define BM_APBX_CHn_CMD_CMDWORDS 0x0000F000 +#define BF_APBX_CHn_CMD_CMDWORDS(v) \ + (((v) << 12) & BM_APBX_CHn_CMD_CMDWORDS) +#define BM_APBX_CHn_CMD_HALTONTERMINATE 0x00000100 +#define BM_APBX_CHn_CMD_WAIT4ENDCMD 0x00000080 +#define BM_APBX_CHn_CMD_SEMAPHORE 0x00000040 +#define BM_APBX_CHn_CMD_IRQONCMPLT 0x00000008 +#define BM_APBX_CHn_CMD_CHAIN 0x00000004 +#define BP_APBX_CHn_CMD_COMMAND 0 +#define BM_APBX_CHn_CMD_COMMAND 0x00000003 +#define BF_APBX_CHn_CMD_COMMAND(v) \ + (((v) << 0) & BM_APBX_CHn_CMD_COMMAND) +#define BV_APBX_CHn_CMD_COMMAND__NO_DMA_XFER 0x0 +#define BV_APBX_CHn_CMD_COMMAND__DMA_WRITE 0x1 +#define BV_APBX_CHn_CMD_COMMAND__DMA_READ 0x2 +HW_REGISTER_0_INDEXED(HW_APBX_CHn_BAR, REGS_APBX_BASE, 0x00000130, 0x70) +HW_REGISTER_0_INDEXED(HW_APBX_CHn_SEMA, REGS_APBX_BASE, 0x00000140, 0x70) +#define BP_APBX_CHn_SEMA_PHORE 16 +#define BM_APBX_CHn_SEMA_PHORE 0x00FF0000 +#define BF_APBX_CHn_SEMA_PHORE(v) \ + (((v) << 16) & BM_APBX_CHn_SEMA_PHORE) +#define BP_APBX_CHn_SEMA_INCREMENT_SEMA 0 +#define BM_APBX_CHn_SEMA_INCREMENT_SEMA 0x000000FF +#define BF_APBX_CHn_SEMA_INCREMENT_SEMA(v) \ + (((v) << 0) & BM_APBX_CHn_SEMA_INCREMENT_SEMA) +HW_REGISTER_0_INDEXED(HW_APBX_CHn_DEBUG1, REGS_APBX_BASE, 0x00000150, 0x70) +HW_REGISTER_0_INDEXED(HW_APBX_CHn_DEBUG2, REGS_APBX_BASE, 0x00000160, 0x70) +HW_REGISTER_0(HW_APBX_VERSION, REGS_APBX_BASE, 0x00000800) +#endif /* __ARCH_ARM___APBX_H */ diff --git a/arch/arm/mach-stmp378x/include/mach/regs-clkctrl.h b/arch/arm/mach-stmp378x/include/mach/regs-clkctrl.h new file mode 100644 index 0000000..a421d9e --- /dev/null +++ b/arch/arm/mach-stmp378x/include/mach/regs-clkctrl.h @@ -0,0 +1,276 @@ +/* + * STMP CLKCTRL Register Definitions + * + * Copyright (c) 2008 Freescale Semiconductor + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __ARCH_ARM___CLKCTRL_H +#define __ARCH_ARM___CLKCTRL_H 1 + +#include + +#define REGS_CLKCTRL_BASE (REGS_BASE + 0x40000) +#define REGS_CLKCTRL_BASE_PHYS (0x80040000) +#define REGS_CLKCTRL_SIZE 0x00002000 +HW_REGISTER(HW_CLKCTRL_PLLCTRL0, REGS_CLKCTRL_BASE, 0x00000000) +#define HW_CLKCTRL_PLLCTRL0_ADDR (REGS_CLKCTRL_BASE + 0x00000000) +#define BP_CLKCTRL_PLLCTRL0_LFR_SEL 28 +#define BM_CLKCTRL_PLLCTRL0_LFR_SEL 0x30000000 +#define BF_CLKCTRL_PLLCTRL0_LFR_SEL(v) \ + (((v) << 28) & BM_CLKCTRL_PLLCTRL0_LFR_SEL) +#define BV_CLKCTRL_PLLCTRL0_LFR_SEL__DEFAULT 0x0 +#define BV_CLKCTRL_PLLCTRL0_LFR_SEL__TIMES_2 0x1 +#define BV_CLKCTRL_PLLCTRL0_LFR_SEL__TIMES_05 0x2 +#define BV_CLKCTRL_PLLCTRL0_LFR_SEL__UNDEFINED 0x3 +#define BP_CLKCTRL_PLLCTRL0_CP_SEL 24 +#define BM_CLKCTRL_PLLCTRL0_CP_SEL 0x03000000 +#define BF_CLKCTRL_PLLCTRL0_CP_SEL(v) \ + (((v) << 24) & BM_CLKCTRL_PLLCTRL0_CP_SEL) +#define BV_CLKCTRL_PLLCTRL0_CP_SEL__DEFAULT 0x0 +#define BV_CLKCTRL_PLLCTRL0_CP_SEL__TIMES_2 0x1 +#define BV_CLKCTRL_PLLCTRL0_CP_SEL__TIMES_05 0x2 +#define BV_CLKCTRL_PLLCTRL0_CP_SEL__UNDEFINED 0x3 +#define BP_CLKCTRL_PLLCTRL0_DIV_SEL 20 +#define BM_CLKCTRL_PLLCTRL0_DIV_SEL 0x00300000 +#define BF_CLKCTRL_PLLCTRL0_DIV_SEL(v) \ + (((v) << 20) & BM_CLKCTRL_PLLCTRL0_DIV_SEL) +#define BV_CLKCTRL_PLLCTRL0_DIV_SEL__DEFAULT 0x0 +#define BV_CLKCTRL_PLLCTRL0_DIV_SEL__LOWER 0x1 +#define BV_CLKCTRL_PLLCTRL0_DIV_SEL__LOWEST 0x2 +#define BV_CLKCTRL_PLLCTRL0_DIV_SEL__UNDEFINED 0x3 +#define BM_CLKCTRL_PLLCTRL0_EN_USB_CLKS 0x00040000 +#define BM_CLKCTRL_PLLCTRL0_POWER 0x00010000 +HW_REGISTER_0(HW_CLKCTRL_PLLCTRL1, REGS_CLKCTRL_BASE, 0x00000010) +#define HW_CLKCTRL_PLLCTRL1_ADDR (REGS_CLKCTRL_BASE + 0x00000010) +#define BM_CLKCTRL_PLLCTRL1_LOCK 0x80000000 +#define BM_CLKCTRL_PLLCTRL1_FORCE_LOCK 0x40000000 +#define BP_CLKCTRL_PLLCTRL1_LOCK_COUNT 0 +#define BM_CLKCTRL_PLLCTRL1_LOCK_COUNT 0x0000FFFF +#define BF_CLKCTRL_PLLCTRL1_LOCK_COUNT(v) \ + (((v) << 0) & BM_CLKCTRL_PLLCTRL1_LOCK_COUNT) +HW_REGISTER(HW_CLKCTRL_CPU, REGS_CLKCTRL_BASE, 0x00000020) +#define HW_CLKCTRL_CPU_ADDR (REGS_CLKCTRL_BASE + 0x00000020) +#define BM_CLKCTRL_CPU_BUSY_REF_XTAL 0x20000000 +#define BM_CLKCTRL_CPU_BUSY_REF_CPU 0x10000000 +#define BM_CLKCTRL_CPU_DIV_XTAL_FRAC_EN 0x04000000 +#define BP_CLKCTRL_CPU_DIV_XTAL 16 +#define BM_CLKCTRL_CPU_DIV_XTAL 0x03FF0000 +#define BF_CLKCTRL_CPU_DIV_XTAL(v) \ + (((v) << 16) & BM_CLKCTRL_CPU_DIV_XTAL) +#define BM_CLKCTRL_CPU_INTERRUPT_WAIT 0x00001000 +#define BM_CLKCTRL_CPU_DIV_CPU_FRAC_EN 0x00000400 +#define BP_CLKCTRL_CPU_DIV_CPU 0 +#define BM_CLKCTRL_CPU_DIV_CPU 0x0000003F +#define BF_CLKCTRL_CPU_DIV_CPU(v) \ + (((v) << 0) & BM_CLKCTRL_CPU_DIV_CPU) +HW_REGISTER(HW_CLKCTRL_HBUS, REGS_CLKCTRL_BASE, 0x00000030) +#define HW_CLKCTRL_HBUS_ADDR (REGS_CLKCTRL_BASE + 0x00000030) +#define BM_CLKCTRL_HBUS_BUSY 0x20000000 +#define BM_CLKCTRL_HBUS_DCP_AS_ENABLE 0x10000000 +#define BM_CLKCTRL_HBUS_PXP_AS_ENABLE 0x08000000 +#define BM_CLKCTRL_HBUS_APBHDMA_AS_ENABLE 0x04000000 +#define BM_CLKCTRL_HBUS_APBXDMA_AS_ENABLE 0x02000000 +#define BM_CLKCTRL_HBUS_TRAFFIC_JAM_AS_ENABLE 0x01000000 +#define BM_CLKCTRL_HBUS_TRAFFIC_AS_ENABLE 0x00800000 +#define BM_CLKCTRL_HBUS_CPU_DATA_AS_ENABLE 0x00400000 +#define BM_CLKCTRL_HBUS_CPU_INSTR_AS_ENABLE 0x00200000 +#define BM_CLKCTRL_HBUS_AUTO_SLOW_MODE 0x00100000 +#define BP_CLKCTRL_HBUS_SLOW_DIV 16 +#define BM_CLKCTRL_HBUS_SLOW_DIV 0x00070000 +#define BF_CLKCTRL_HBUS_SLOW_DIV(v) \ + (((v) << 16) & BM_CLKCTRL_HBUS_SLOW_DIV) +#define BV_CLKCTRL_HBUS_SLOW_DIV__BY1 0x0 +#define BV_CLKCTRL_HBUS_SLOW_DIV__BY2 0x1 +#define BV_CLKCTRL_HBUS_SLOW_DIV__BY4 0x2 +#define BV_CLKCTRL_HBUS_SLOW_DIV__BY8 0x3 +#define BV_CLKCTRL_HBUS_SLOW_DIV__BY16 0x4 +#define BV_CLKCTRL_HBUS_SLOW_DIV__BY32 0x5 +#define BM_CLKCTRL_HBUS_DIV_FRAC_EN 0x00000020 +#define BP_CLKCTRL_HBUS_DIV 0 +#define BM_CLKCTRL_HBUS_DIV 0x0000001F +#define BF_CLKCTRL_HBUS_DIV(v) \ + (((v) << 0) & BM_CLKCTRL_HBUS_DIV) +HW_REGISTER_0(HW_CLKCTRL_XBUS, REGS_CLKCTRL_BASE, 0x00000040) +#define HW_CLKCTRL_XBUS_ADDR (REGS_CLKCTRL_BASE + 0x00000040) +#define BM_CLKCTRL_XBUS_BUSY 0x80000000 +#define BM_CLKCTRL_XBUS_DIV_FRAC_EN 0x00000400 +#define BP_CLKCTRL_XBUS_DIV 0 +#define BM_CLKCTRL_XBUS_DIV 0x000003FF +#define BF_CLKCTRL_XBUS_DIV(v) \ + (((v) << 0) & BM_CLKCTRL_XBUS_DIV) +HW_REGISTER(HW_CLKCTRL_XTAL, REGS_CLKCTRL_BASE, 0x00000050) +#define HW_CLKCTRL_XTAL_ADDR (REGS_CLKCTRL_BASE + 0x00000050) +#define BM_CLKCTRL_XTAL_UART_CLK_GATE 0x80000000 +#define BM_CLKCTRL_XTAL_FILT_CLK24M_GATE 0x40000000 +#define BM_CLKCTRL_XTAL_PWM_CLK24M_GATE 0x20000000 +#define BM_CLKCTRL_XTAL_DRI_CLK24M_GATE 0x10000000 +#define BM_CLKCTRL_XTAL_DIGCTRL_CLK1M_GATE 0x08000000 +#define BM_CLKCTRL_XTAL_TIMROT_CLK32K_GATE 0x04000000 +#define BP_CLKCTRL_XTAL_DIV_UART 0 +#define BM_CLKCTRL_XTAL_DIV_UART 0x00000003 +#define BF_CLKCTRL_XTAL_DIV_UART(v) \ + (((v) << 0) & BM_CLKCTRL_XTAL_DIV_UART) +HW_REGISTER_0(HW_CLKCTRL_PIX, REGS_CLKCTRL_BASE, 0x00000060) +#define HW_CLKCTRL_PIX_ADDR (REGS_CLKCTRL_BASE + 0x00000060) +#define BM_CLKCTRL_PIX_CLKGATE 0x80000000 +#define BM_CLKCTRL_PIX_BUSY 0x20000000 +#define BM_CLKCTRL_PIX_DIV_FRAC_EN 0x00001000 +#define BP_CLKCTRL_PIX_DIV 0 +#define BM_CLKCTRL_PIX_DIV 0x00000FFF +#define BF_CLKCTRL_PIX_DIV(v) \ + (((v) << 0) & BM_CLKCTRL_PIX_DIV) +HW_REGISTER_0(HW_CLKCTRL_SSP, REGS_CLKCTRL_BASE, 0x00000070) +#define HW_CLKCTRL_SSP_ADDR (REGS_CLKCTRL_BASE + 0x00000070) +#define BM_CLKCTRL_SSP_CLKGATE 0x80000000 +#define BM_CLKCTRL_SSP_BUSY 0x20000000 +#define BM_CLKCTRL_SSP_DIV_FRAC_EN 0x00000200 +#define BP_CLKCTRL_SSP_DIV 0 +#define BM_CLKCTRL_SSP_DIV 0x000001FF +#define BF_CLKCTRL_SSP_DIV(v) \ + (((v) << 0) & BM_CLKCTRL_SSP_DIV) +HW_REGISTER_0(HW_CLKCTRL_GPMI, REGS_CLKCTRL_BASE, 0x00000080) +#define HW_CLKCTRL_GPMI_ADDR (REGS_CLKCTRL_BASE + 0x00000080) +#define BM_CLKCTRL_GPMI_CLKGATE 0x80000000 +#define BM_CLKCTRL_GPMI_BUSY 0x20000000 +#define BM_CLKCTRL_GPMI_DIV_FRAC_EN 0x00000400 +#define BP_CLKCTRL_GPMI_DIV 0 +#define BM_CLKCTRL_GPMI_DIV 0x000003FF +#define BF_CLKCTRL_GPMI_DIV(v) \ + (((v) << 0) & BM_CLKCTRL_GPMI_DIV) +HW_REGISTER_0(HW_CLKCTRL_SPDIF, REGS_CLKCTRL_BASE, 0x00000090) +#define HW_CLKCTRL_SPDIF_ADDR (REGS_CLKCTRL_BASE + 0x00000090) +#define BM_CLKCTRL_SPDIF_CLKGATE 0x80000000 +HW_REGISTER_0(HW_CLKCTRL_EMI, REGS_CLKCTRL_BASE, 0x000000a0) +#define HW_CLKCTRL_EMI_ADDR (REGS_CLKCTRL_BASE + 0x000000a0) +#define BM_CLKCTRL_EMI_CLKGATE 0x80000000 +#define BM_CLKCTRL_EMI_SYNC_MODE_EN 0x40000000 +#define BM_CLKCTRL_EMI_BUSY_REF_XTAL 0x20000000 +#define BM_CLKCTRL_EMI_BUSY_REF_EMI 0x10000000 +#define BM_CLKCTRL_EMI_BUSY_REF_CPU 0x08000000 +#define BM_CLKCTRL_EMI_BUSY_SYNC_MODE 0x04000000 +#define BM_CLKCTRL_EMI_BUSY_DCC_RESYNC 0x00020000 +#define BM_CLKCTRL_EMI_DCC_RESYNC_ENABLE 0x00010000 +#define BP_CLKCTRL_EMI_DIV_XTAL 8 +#define BM_CLKCTRL_EMI_DIV_XTAL 0x00000F00 +#define BF_CLKCTRL_EMI_DIV_XTAL(v) \ + (((v) << 8) & BM_CLKCTRL_EMI_DIV_XTAL) +#define BP_CLKCTRL_EMI_DIV_EMI 0 +#define BM_CLKCTRL_EMI_DIV_EMI 0x0000003F +#define BF_CLKCTRL_EMI_DIV_EMI(v) \ + (((v) << 0) & BM_CLKCTRL_EMI_DIV_EMI) +HW_REGISTER_0(HW_CLKCTRL_IR, REGS_CLKCTRL_BASE, 0x000000b0) +#define HW_CLKCTRL_IR_ADDR (REGS_CLKCTRL_BASE + 0x000000b0) +#define BM_CLKCTRL_IR_CLKGATE 0x80000000 +#define BM_CLKCTRL_IR_AUTO_DIV 0x20000000 +#define BM_CLKCTRL_IR_IR_BUSY 0x10000000 +#define BM_CLKCTRL_IR_IROV_BUSY 0x08000000 +#define BP_CLKCTRL_IR_IROV_DIV 16 +#define BM_CLKCTRL_IR_IROV_DIV 0x01FF0000 +#define BF_CLKCTRL_IR_IROV_DIV(v) \ + (((v) << 16) & BM_CLKCTRL_IR_IROV_DIV) +#define BP_CLKCTRL_IR_IR_DIV 0 +#define BM_CLKCTRL_IR_IR_DIV 0x000003FF +#define BF_CLKCTRL_IR_IR_DIV(v) \ + (((v) << 0) & BM_CLKCTRL_IR_IR_DIV) +HW_REGISTER_0(HW_CLKCTRL_SAIF, REGS_CLKCTRL_BASE, 0x000000c0) +#define HW_CLKCTRL_SAIF_ADDR (REGS_CLKCTRL_BASE + 0x000000c0) +#define BM_CLKCTRL_SAIF_CLKGATE 0x80000000 +#define BM_CLKCTRL_SAIF_BUSY 0x20000000 +#define BM_CLKCTRL_SAIF_DIV_FRAC_EN 0x00010000 +#define BP_CLKCTRL_SAIF_DIV 0 +#define BM_CLKCTRL_SAIF_DIV 0x0000FFFF +#define BF_CLKCTRL_SAIF_DIV(v) \ + (((v) << 0) & BM_CLKCTRL_SAIF_DIV) +HW_REGISTER_0(HW_CLKCTRL_TV, REGS_CLKCTRL_BASE, 0x000000d0) +#define HW_CLKCTRL_TV_ADDR (REGS_CLKCTRL_BASE + 0x000000d0) +#define BM_CLKCTRL_TV_CLK_TV108M_GATE 0x80000000 +#define BM_CLKCTRL_TV_CLK_TV_GATE 0x40000000 +HW_REGISTER_0(HW_CLKCTRL_ETM, REGS_CLKCTRL_BASE, 0x000000e0) +#define HW_CLKCTRL_ETM_ADDR (REGS_CLKCTRL_BASE + 0x000000e0) +#define BM_CLKCTRL_ETM_CLKGATE 0x80000000 +#define BM_CLKCTRL_ETM_BUSY 0x20000000 +#define BM_CLKCTRL_ETM_DIV_FRAC_EN 0x00000040 +#define BP_CLKCTRL_ETM_DIV 0 +#define BM_CLKCTRL_ETM_DIV 0x0000003F +#define BF_CLKCTRL_ETM_DIV(v) \ + (((v) << 0) & BM_CLKCTRL_ETM_DIV) +HW_REGISTER(HW_CLKCTRL_FRAC, REGS_CLKCTRL_BASE, 0x000000f0) +#define HW_CLKCTRL_FRAC_ADDR (REGS_CLKCTRL_BASE + 0x000000f0) +#define BM_CLKCTRL_FRAC_CLKGATEIO 0x80000000 +#define BM_CLKCTRL_FRAC_IO_STABLE 0x40000000 +#define BP_CLKCTRL_FRAC_IOFRAC 24 +#define BM_CLKCTRL_FRAC_IOFRAC 0x3F000000 +#define BF_CLKCTRL_FRAC_IOFRAC(v) \ + (((v) << 24) & BM_CLKCTRL_FRAC_IOFRAC) +#define BM_CLKCTRL_FRAC_CLKGATEPIX 0x00800000 +#define BM_CLKCTRL_FRAC_PIX_STABLE 0x00400000 +#define BP_CLKCTRL_FRAC_PIXFRAC 16 +#define BM_CLKCTRL_FRAC_PIXFRAC 0x003F0000 +#define BF_CLKCTRL_FRAC_PIXFRAC(v) \ + (((v) << 16) & BM_CLKCTRL_FRAC_PIXFRAC) +#define BM_CLKCTRL_FRAC_CLKGATEEMI 0x00008000 +#define BM_CLKCTRL_FRAC_EMI_STABLE 0x00004000 +#define BP_CLKCTRL_FRAC_EMIFRAC 8 +#define BM_CLKCTRL_FRAC_EMIFRAC 0x00003F00 +#define BF_CLKCTRL_FRAC_EMIFRAC(v) \ + (((v) << 8) & BM_CLKCTRL_FRAC_EMIFRAC) +#define BM_CLKCTRL_FRAC_CLKGATECPU 0x00000080 +#define BM_CLKCTRL_FRAC_CPU_STABLE 0x00000040 +#define BP_CLKCTRL_FRAC_CPUFRAC 0 +#define BM_CLKCTRL_FRAC_CPUFRAC 0x0000003F +#define BF_CLKCTRL_FRAC_CPUFRAC(v) \ + (((v) << 0) & BM_CLKCTRL_FRAC_CPUFRAC) +HW_REGISTER(HW_CLKCTRL_FRAC1, REGS_CLKCTRL_BASE, 0x00000100) +#define HW_CLKCTRL_FRAC1_ADDR (REGS_CLKCTRL_BASE + 0x00000100) +#define BM_CLKCTRL_FRAC1_CLKGATEVID 0x80000000 +#define BM_CLKCTRL_FRAC1_VID_STABLE 0x40000000 +HW_REGISTER(HW_CLKCTRL_CLKSEQ, REGS_CLKCTRL_BASE, 0x00000110) +#define HW_CLKCTRL_CLKSEQ_ADDR (REGS_CLKCTRL_BASE + 0x00000110) +#define BM_CLKCTRL_CLKSEQ_BYPASS_ETM 0x00000100 +#define BM_CLKCTRL_CLKSEQ_BYPASS_CPU 0x00000080 +#define BM_CLKCTRL_CLKSEQ_BYPASS_EMI 0x00000040 +#define BM_CLKCTRL_CLKSEQ_BYPASS_SSP 0x00000020 +#define BM_CLKCTRL_CLKSEQ_BYPASS_GPMI 0x00000010 +#define BM_CLKCTRL_CLKSEQ_BYPASS_IR 0x00000008 +#define BM_CLKCTRL_CLKSEQ_BYPASS_PIX 0x00000002 +#define BM_CLKCTRL_CLKSEQ_BYPASS_SAIF 0x00000001 +HW_REGISTER_0(HW_CLKCTRL_RESET, REGS_CLKCTRL_BASE, 0x00000120) +#define HW_CLKCTRL_RESET_ADDR (REGS_CLKCTRL_BASE + 0x00000120) +#define BM_CLKCTRL_RESET_CHIP 0x00000002 +#define BM_CLKCTRL_RESET_DIG 0x00000001 +HW_REGISTER_0(HW_CLKCTRL_STATUS, REGS_CLKCTRL_BASE, 0x00000130) +#define HW_CLKCTRL_STATUS_ADDR (REGS_CLKCTRL_BASE + 0x00000130) +#define BP_CLKCTRL_STATUS_CPU_LIMIT 30 +#define BM_CLKCTRL_STATUS_CPU_LIMIT 0xC0000000 +#define BF_CLKCTRL_STATUS_CPU_LIMIT(v) \ + (((v) << 30) & BM_CLKCTRL_STATUS_CPU_LIMIT) +HW_REGISTER_0(HW_CLKCTRL_VERSION, REGS_CLKCTRL_BASE, 0x00000140) +#define HW_CLKCTRL_VERSION_ADDR (REGS_CLKCTRL_BASE + 0x00000140) +#define BP_CLKCTRL_VERSION_MAJOR 24 +#define BM_CLKCTRL_VERSION_MAJOR 0xFF000000 +#define BF_CLKCTRL_VERSION_MAJOR(v) \ + (((v) << 24) & BM_CLKCTRL_VERSION_MAJOR) +#define BP_CLKCTRL_VERSION_MINOR 16 +#define BM_CLKCTRL_VERSION_MINOR 0x00FF0000 +#define BF_CLKCTRL_VERSION_MINOR(v) \ + (((v) << 16) & BM_CLKCTRL_VERSION_MINOR) +#define BP_CLKCTRL_VERSION_STEP 0 +#define BM_CLKCTRL_VERSION_STEP 0x0000FFFF +#define BF_CLKCTRL_VERSION_STEP(v) \ + (((v) << 0) & BM_CLKCTRL_VERSION_STEP) +#endif /* __ARCH_ARM___CLKCTRL_H */ diff --git a/arch/arm/mach-stmp378x/include/mach/regs-icoll.h b/arch/arm/mach-stmp378x/include/mach/regs-icoll.h new file mode 100644 index 0000000..a5a530c --- /dev/null +++ b/arch/arm/mach-stmp378x/include/mach/regs-icoll.h @@ -0,0 +1,213 @@ +/* + * STMP ICOLL Register Definitions + * + * Copyright (c) 2008 Freescale Semiconductor + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __ARCH_ARM___ICOLL_H +#define __ARCH_ARM___ICOLL_H 1 + +#include + +#define REGS_ICOLL_BASE (REGS_BASE + 0x0) +#define REGS_ICOLL_BASE_PHYS (0x80000000) +#define REGS_ICOLL_SIZE 0x00002000 +HW_REGISTER(HW_ICOLL_VECTOR, REGS_ICOLL_BASE, 0x00000000) +#define HW_ICOLL_VECTOR_ADDR (REGS_ICOLL_BASE + 0x00000000) +#define BP_ICOLL_VECTOR_IRQVECTOR 2 +#define BM_ICOLL_VECTOR_IRQVECTOR 0xFFFFFFFC +#define BF_ICOLL_VECTOR_IRQVECTOR(v) \ + (((v) << 2) & BM_ICOLL_VECTOR_IRQVECTOR) +HW_REGISTER_0(HW_ICOLL_LEVELACK, REGS_ICOLL_BASE, 0x00000010) +#define HW_ICOLL_LEVELACK_ADDR (REGS_ICOLL_BASE + 0x00000010) +#define BP_ICOLL_LEVELACK_IRQLEVELACK 0 +#define BM_ICOLL_LEVELACK_IRQLEVELACK 0x0000000F +#define BF_ICOLL_LEVELACK_IRQLEVELACK(v) \ + (((v) << 0) & BM_ICOLL_LEVELACK_IRQLEVELACK) +#define BV_ICOLL_LEVELACK_IRQLEVELACK__LEVEL0 0x1 +#define BV_ICOLL_LEVELACK_IRQLEVELACK__LEVEL1 0x2 +#define BV_ICOLL_LEVELACK_IRQLEVELACK__LEVEL2 0x4 +#define BV_ICOLL_LEVELACK_IRQLEVELACK__LEVEL3 0x8 +HW_REGISTER(HW_ICOLL_CTRL, REGS_ICOLL_BASE, 0x00000020) +#define HW_ICOLL_CTRL_ADDR (REGS_ICOLL_BASE + 0x00000020) +#define BM_ICOLL_CTRL_SFTRST 0x80000000 +#define BV_ICOLL_CTRL_SFTRST__RUN 0x0 +#define BV_ICOLL_CTRL_SFTRST__IN_RESET 0x1 +#define BM_ICOLL_CTRL_CLKGATE 0x40000000 +#define BV_ICOLL_CTRL_CLKGATE__RUN 0x0 +#define BV_ICOLL_CTRL_CLKGATE__NO_CLOCKS 0x1 +#define BP_ICOLL_CTRL_VECTOR_PITCH 21 +#define BM_ICOLL_CTRL_VECTOR_PITCH 0x00E00000 +#define BF_ICOLL_CTRL_VECTOR_PITCH(v) \ + (((v) << 21) & BM_ICOLL_CTRL_VECTOR_PITCH) +#define BV_ICOLL_CTRL_VECTOR_PITCH__DEFAULT_BY4 0x0 +#define BV_ICOLL_CTRL_VECTOR_PITCH__BY4 0x1 +#define BV_ICOLL_CTRL_VECTOR_PITCH__BY8 0x2 +#define BV_ICOLL_CTRL_VECTOR_PITCH__BY12 0x3 +#define BV_ICOLL_CTRL_VECTOR_PITCH__BY16 0x4 +#define BV_ICOLL_CTRL_VECTOR_PITCH__BY20 0x5 +#define BV_ICOLL_CTRL_VECTOR_PITCH__BY24 0x6 +#define BV_ICOLL_CTRL_VECTOR_PITCH__BY28 0x7 +#define BM_ICOLL_CTRL_BYPASS_FSM 0x00100000 +#define BV_ICOLL_CTRL_BYPASS_FSM__NORMAL 0x0 +#define BV_ICOLL_CTRL_BYPASS_FSM__BYPASS 0x1 +#define BM_ICOLL_CTRL_NO_NESTING 0x00080000 +#define BV_ICOLL_CTRL_NO_NESTING__NORMAL 0x0 +#define BV_ICOLL_CTRL_NO_NESTING__NO_NEST 0x1 +#define BM_ICOLL_CTRL_ARM_RSE_MODE 0x00040000 +#define BM_ICOLL_CTRL_FIQ_FINAL_ENABLE 0x00020000 +#define BV_ICOLL_CTRL_FIQ_FINAL_ENABLE__DISABLE 0x0 +#define BV_ICOLL_CTRL_FIQ_FINAL_ENABLE__ENABLE 0x1 +#define BM_ICOLL_CTRL_IRQ_FINAL_ENABLE 0x00010000 +#define BV_ICOLL_CTRL_IRQ_FINAL_ENABLE__DISABLE 0x0 +#define BV_ICOLL_CTRL_IRQ_FINAL_ENABLE__ENABLE 0x1 +HW_REGISTER(HW_ICOLL_VBASE, REGS_ICOLL_BASE, 0x00000040) +#define HW_ICOLL_VBASE_ADDR (REGS_ICOLL_BASE + 0x00000040) +#define BP_ICOLL_VBASE_TABLE_ADDRESS 2 +#define BM_ICOLL_VBASE_TABLE_ADDRESS 0xFFFFFFFC +#define BF_ICOLL_VBASE_TABLE_ADDRESS(v) \ + (((v) << 2) & BM_ICOLL_VBASE_TABLE_ADDRESS) +HW_REGISTER_0(HW_ICOLL_STAT, REGS_ICOLL_BASE, 0x00000070) +#define HW_ICOLL_STAT_ADDR (REGS_ICOLL_BASE + 0x00000070) +#define BP_ICOLL_STAT_VECTOR_NUMBER 0 +#define BM_ICOLL_STAT_VECTOR_NUMBER 0x0000007F +#define BF_ICOLL_STAT_VECTOR_NUMBER(v) \ + (((v) << 0) & BM_ICOLL_STAT_VECTOR_NUMBER) +/* + * multi-register-define name HW_ICOLL_RAWn + * base 0x000000A0 + * count 4 + * offset 0x10 + */ +HW_REGISTER_0_INDEXED(HW_ICOLL_RAWn, REGS_ICOLL_BASE, 0x000000a0, 0x10) +#define BP_ICOLL_RAWn_RAW_IRQS 0 +#define BM_ICOLL_RAWn_RAW_IRQS 0xFFFFFFFF +#define BF_ICOLL_RAWn_RAW_IRQS(v) (v) +/* + * multi-register-define name HW_ICOLL_INTERRUPTn + * base 0x00000120 + * count 128 + * offset 0x10 + */ +HW_REGISTER_INDEXED(HW_ICOLL_INTERRUPTn, REGS_ICOLL_BASE, 0x00000120, 0x10) +#define BM_ICOLL_INTERRUPTn_ENFIQ 0x00000010 +#define BV_ICOLL_INTERRUPTn_ENFIQ__DISABLE 0x0 +#define BV_ICOLL_INTERRUPTn_ENFIQ__ENABLE 0x1 +#define BM_ICOLL_INTERRUPTn_SOFTIRQ 0x00000008 +#define BV_ICOLL_INTERRUPTn_SOFTIRQ__NO_INTERRUPT 0x0 +#define BV_ICOLL_INTERRUPTn_SOFTIRQ__FORCE_INTERRUPT 0x1 +#define BM_ICOLL_INTERRUPTn_ENABLE 0x00000004 +#define BV_ICOLL_INTERRUPTn_ENABLE__DISABLE 0x0 +#define BV_ICOLL_INTERRUPTn_ENABLE__ENABLE 0x1 +#define BP_ICOLL_INTERRUPTn_PRIORITY 0 +#define BM_ICOLL_INTERRUPTn_PRIORITY 0x00000003 +#define BF_ICOLL_INTERRUPTn_PRIORITY(v) \ + (((v) << 0) & BM_ICOLL_INTERRUPTn_PRIORITY) +#define BV_ICOLL_INTERRUPTn_PRIORITY__LEVEL0 0x0 +#define BV_ICOLL_INTERRUPTn_PRIORITY__LEVEL1 0x1 +#define BV_ICOLL_INTERRUPTn_PRIORITY__LEVEL2 0x2 +#define BV_ICOLL_INTERRUPTn_PRIORITY__LEVEL3 0x3 +HW_REGISTER(HW_ICOLL_DEBUG, REGS_ICOLL_BASE, 0x00001120) +#define HW_ICOLL_DEBUG_ADDR (REGS_ICOLL_BASE + 0x00001120) +#define BP_ICOLL_DEBUG_INSERVICE 28 +#define BM_ICOLL_DEBUG_INSERVICE 0xF0000000 +#define BF_ICOLL_DEBUG_INSERVICE(v) \ + (((v) << 28) & BM_ICOLL_DEBUG_INSERVICE) +#define BV_ICOLL_DEBUG_INSERVICE__LEVEL0 0x1 +#define BV_ICOLL_DEBUG_INSERVICE__LEVEL1 0x2 +#define BV_ICOLL_DEBUG_INSERVICE__LEVEL2 0x4 +#define BV_ICOLL_DEBUG_INSERVICE__LEVEL3 0x8 +#define BP_ICOLL_DEBUG_LEVEL_REQUESTS 24 +#define BM_ICOLL_DEBUG_LEVEL_REQUESTS 0x0F000000 +#define BF_ICOLL_DEBUG_LEVEL_REQUESTS(v) \ + (((v) << 24) & BM_ICOLL_DEBUG_LEVEL_REQUESTS) +#define BV_ICOLL_DEBUG_LEVEL_REQUESTS__LEVEL0 0x1 +#define BV_ICOLL_DEBUG_LEVEL_REQUESTS__LEVEL1 0x2 +#define BV_ICOLL_DEBUG_LEVEL_REQUESTS__LEVEL2 0x4 +#define BV_ICOLL_DEBUG_LEVEL_REQUESTS__LEVEL3 0x8 +#define BP_ICOLL_DEBUG_REQUESTS_BY_LEVEL 20 +#define BM_ICOLL_DEBUG_REQUESTS_BY_LEVEL 0x00F00000 +#define BF_ICOLL_DEBUG_REQUESTS_BY_LEVEL(v) \ + (((v) << 20) & BM_ICOLL_DEBUG_REQUESTS_BY_LEVEL) +#define BV_ICOLL_DEBUG_REQUESTS_BY_LEVEL__LEVEL0 0x1 +#define BV_ICOLL_DEBUG_REQUESTS_BY_LEVEL__LEVEL1 0x2 +#define BV_ICOLL_DEBUG_REQUESTS_BY_LEVEL__LEVEL2 0x4 +#define BV_ICOLL_DEBUG_REQUESTS_BY_LEVEL__LEVEL3 0x8 +#define BM_ICOLL_DEBUG_FIQ 0x00020000 +#define BV_ICOLL_DEBUG_FIQ__NO_FIQ_REQUESTED 0x0 +#define BV_ICOLL_DEBUG_FIQ__FIQ_REQUESTED 0x1 +#define BM_ICOLL_DEBUG_IRQ 0x00010000 +#define BV_ICOLL_DEBUG_IRQ__NO_IRQ_REQUESTED 0x0 +#define BV_ICOLL_DEBUG_IRQ__IRQ_REQUESTED 0x1 +#define BP_ICOLL_DEBUG_VECTOR_FSM 0 +#define BM_ICOLL_DEBUG_VECTOR_FSM 0x000003FF +#define BF_ICOLL_DEBUG_VECTOR_FSM(v) \ + (((v) << 0) & BM_ICOLL_DEBUG_VECTOR_FSM) +#define BV_ICOLL_DEBUG_VECTOR_FSM__FSM_IDLE 0x000 +#define BV_ICOLL_DEBUG_VECTOR_FSM__FSM_MULTICYCLE1 0x001 +#define BV_ICOLL_DEBUG_VECTOR_FSM__FSM_MULTICYCLE2 0x002 +#define BV_ICOLL_DEBUG_VECTOR_FSM__FSM_PENDING 0x004 +#define BV_ICOLL_DEBUG_VECTOR_FSM__FSM_MULTICYCLE3 0x008 +#define BV_ICOLL_DEBUG_VECTOR_FSM__FSM_MULTICYCLE4 0x010 +#define BV_ICOLL_DEBUG_VECTOR_FSM__FSM_ISR_RUNNING1 0x020 +#define BV_ICOLL_DEBUG_VECTOR_FSM__FSM_ISR_RUNNING2 0x040 +#define BV_ICOLL_DEBUG_VECTOR_FSM__FSM_ISR_RUNNING3 0x080 +#define BV_ICOLL_DEBUG_VECTOR_FSM__FSM_MULTICYCLE5 0x100 +#define BV_ICOLL_DEBUG_VECTOR_FSM__FSM_MULTICYCLE6 0x200 +HW_REGISTER(HW_ICOLL_DBGREAD0, REGS_ICOLL_BASE, 0x00001130) +#define HW_ICOLL_DBGREAD0_ADDR (REGS_ICOLL_BASE + 0x00001130) +#define BP_ICOLL_DBGREAD0_VALUE 0 +#define BM_ICOLL_DBGREAD0_VALUE 0xFFFFFFFF +#define BF_ICOLL_DBGREAD0_VALUE(v) (v) +HW_REGISTER(HW_ICOLL_DBGREAD1, REGS_ICOLL_BASE, 0x00001140) +#define HW_ICOLL_DBGREAD1_ADDR (REGS_ICOLL_BASE + 0x00001140) +#define BP_ICOLL_DBGREAD1_VALUE 0 +#define BM_ICOLL_DBGREAD1_VALUE 0xFFFFFFFF +#define BF_ICOLL_DBGREAD1_VALUE(v) (v) +HW_REGISTER(HW_ICOLL_DBGFLAG, REGS_ICOLL_BASE, 0x00001150) +#define HW_ICOLL_DBGFLAG_ADDR (REGS_ICOLL_BASE + 0x00001150) +#define BP_ICOLL_DBGFLAG_FLAG 0 +#define BM_ICOLL_DBGFLAG_FLAG 0x0000FFFF +#define BF_ICOLL_DBGFLAG_FLAG(v) \ + (((v) << 0) & BM_ICOLL_DBGFLAG_FLAG) +/* + * multi-register-define name HW_ICOLL_DBGREQUESTn + * base 0x00001160 + * count 4 + * offset 0x10 + */ +HW_REGISTER_0_INDEXED(HW_ICOLL_DBGREQUESTn, REGS_ICOLL_BASE, 0x00001160, + 0x10) +#define BP_ICOLL_DBGREQUESTn_BITS 0 +#define BM_ICOLL_DBGREQUESTn_BITS 0xFFFFFFFF +#define BF_ICOLL_DBGREQUESTn_BITS(v) (v) +HW_REGISTER_0(HW_ICOLL_VERSION, REGS_ICOLL_BASE, 0x000011e0) +#define HW_ICOLL_VERSION_ADDR (REGS_ICOLL_BASE + 0x000011e0) +#define BP_ICOLL_VERSION_MAJOR 24 +#define BM_ICOLL_VERSION_MAJOR 0xFF000000 +#define BF_ICOLL_VERSION_MAJOR(v) \ + (((v) << 24) & BM_ICOLL_VERSION_MAJOR) +#define BP_ICOLL_VERSION_MINOR 16 +#define BM_ICOLL_VERSION_MINOR 0x00FF0000 +#define BF_ICOLL_VERSION_MINOR(v) \ + (((v) << 16) & BM_ICOLL_VERSION_MINOR) +#define BP_ICOLL_VERSION_STEP 0 +#define BM_ICOLL_VERSION_STEP 0x0000FFFF +#define BF_ICOLL_VERSION_STEP(v) \ + (((v) << 0) & BM_ICOLL_VERSION_STEP) +#endif /* __ARCH_ARM___ICOLL_H */ diff --git a/arch/arm/mach-stmp378x/include/mach/regs-pinctrl.h b/arch/arm/mach-stmp378x/include/mach/regs-pinctrl.h new file mode 100644 index 0000000..6c42d2a --- /dev/null +++ b/arch/arm/mach-stmp378x/include/mach/regs-pinctrl.h @@ -0,0 +1,143 @@ +/* + * STMP PINCTRL Register Definitions + * + * Copyright (c) 2008 Freescale Semiconductor + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __ARCH_ARM___PINCTRL_H +#define __ARCH_ARM___PINCTRL_H 1 + +#include + +#define REGS_PINCTRL_BASE (REGS_BASE + 0x18000) +#define REGS_PINCTRL_BASE_PHYS (0x80018000) +#define REGS_PINCTRL_SIZE 0x00002000 +HW_REGISTER(HW_PINCTRL_CTRL, REGS_PINCTRL_BASE, 0x00000000) +#define HW_PINCTRL_CTRL_ADDR (REGS_PINCTRL_BASE + 0x00000000) +#define BM_PINCTRL_CTRL_SFTRST 0x80000000 +#define BM_PINCTRL_CTRL_CLKGATE 0x40000000 +#define BM_PINCTRL_CTRL_PRESENT3 0x08000000 +#define BM_PINCTRL_CTRL_PRESENT2 0x04000000 +#define BM_PINCTRL_CTRL_PRESENT1 0x02000000 +#define BM_PINCTRL_CTRL_PRESENT0 0x01000000 +#define BM_PINCTRL_CTRL_IRQOUT2 0x00000004 +#define BM_PINCTRL_CTRL_IRQOUT1 0x00000002 +#define BM_PINCTRL_CTRL_IRQOUT0 0x00000001 +HW_REGISTER(HW_PINCTRL_MUXSEL0, REGS_PINCTRL_BASE, 0x00000100) +#define HW_PINCTRL_MUXSEL0_ADDR (REGS_PINCTRL_BASE + 0x00000100) +HW_REGISTER(HW_PINCTRL_MUXSEL1, REGS_PINCTRL_BASE, 0x00000110) +#define HW_PINCTRL_MUXSEL1_ADDR (REGS_PINCTRL_BASE + 0x00000110) +HW_REGISTER(HW_PINCTRL_MUXSEL2, REGS_PINCTRL_BASE, 0x00000120) +#define HW_PINCTRL_MUXSEL2_ADDR (REGS_PINCTRL_BASE + 0x00000120) +HW_REGISTER(HW_PINCTRL_MUXSEL3, REGS_PINCTRL_BASE, 0x00000130) +#define HW_PINCTRL_MUXSEL3_ADDR (REGS_PINCTRL_BASE + 0x00000130) +HW_REGISTER(HW_PINCTRL_MUXSEL4, REGS_PINCTRL_BASE, 0x00000140) +#define HW_PINCTRL_MUXSEL4_ADDR (REGS_PINCTRL_BASE + 0x00000140) +HW_REGISTER(HW_PINCTRL_MUXSEL5, REGS_PINCTRL_BASE, 0x00000150) +#define HW_PINCTRL_MUXSEL5_ADDR (REGS_PINCTRL_BASE + 0x00000150) +HW_REGISTER(HW_PINCTRL_MUXSEL6, REGS_PINCTRL_BASE, 0x00000160) +#define HW_PINCTRL_MUXSEL6_ADDR (REGS_PINCTRL_BASE + 0x00000160) +HW_REGISTER(HW_PINCTRL_MUXSEL7, REGS_PINCTRL_BASE, 0x00000170) +#define HW_PINCTRL_MUXSEL7_ADDR (REGS_PINCTRL_BASE + 0x00000170) +HW_REGISTER(HW_PINCTRL_DRIVE0, REGS_PINCTRL_BASE, 0x00000200) +#define HW_PINCTRL_DRIVE0_ADDR (REGS_PINCTRL_BASE + 0x00000200) +HW_REGISTER(HW_PINCTRL_DRIVE1, REGS_PINCTRL_BASE, 0x00000210) +#define HW_PINCTRL_DRIVE1_ADDR (REGS_PINCTRL_BASE + 0x00000210) +HW_REGISTER(HW_PINCTRL_DRIVE2, REGS_PINCTRL_BASE, 0x00000220) +#define HW_PINCTRL_DRIVE2_ADDR (REGS_PINCTRL_BASE + 0x00000220) +HW_REGISTER(HW_PINCTRL_DRIVE3, REGS_PINCTRL_BASE, 0x00000230) +#define HW_PINCTRL_DRIVE3_ADDR (REGS_PINCTRL_BASE + 0x00000230) +HW_REGISTER(HW_PINCTRL_DRIVE4, REGS_PINCTRL_BASE, 0x00000240) +#define HW_PINCTRL_DRIVE4_ADDR (REGS_PINCTRL_BASE + 0x00000240) +HW_REGISTER(HW_PINCTRL_DRIVE5, REGS_PINCTRL_BASE, 0x00000250) +#define HW_PINCTRL_DRIVE5_ADDR (REGS_PINCTRL_BASE + 0x00000250) +HW_REGISTER(HW_PINCTRL_DRIVE6, REGS_PINCTRL_BASE, 0x00000260) +#define HW_PINCTRL_DRIVE6_ADDR (REGS_PINCTRL_BASE + 0x00000260) +HW_REGISTER(HW_PINCTRL_DRIVE7, REGS_PINCTRL_BASE, 0x00000270) +#define HW_PINCTRL_DRIVE7_ADDR (REGS_PINCTRL_BASE + 0x00000270) +HW_REGISTER(HW_PINCTRL_DRIVE8, REGS_PINCTRL_BASE, 0x00000280) +#define HW_PINCTRL_DRIVE8_ADDR (REGS_PINCTRL_BASE + 0x00000280) +HW_REGISTER(HW_PINCTRL_DRIVE9, REGS_PINCTRL_BASE, 0x00000290) +#define HW_PINCTRL_DRIVE9_ADDR (REGS_PINCTRL_BASE + 0x00000290) +HW_REGISTER(HW_PINCTRL_DRIVE10, REGS_PINCTRL_BASE, 0x000002a0) +#define HW_PINCTRL_DRIVE10_ADDR (REGS_PINCTRL_BASE + 0x000002a0) +HW_REGISTER(HW_PINCTRL_DRIVE11, REGS_PINCTRL_BASE, 0x000002b0) +#define HW_PINCTRL_DRIVE11_ADDR (REGS_PINCTRL_BASE + 0x000002b0) +HW_REGISTER(HW_PINCTRL_DRIVE12, REGS_PINCTRL_BASE, 0x000002c0) +#define HW_PINCTRL_DRIVE12_ADDR (REGS_PINCTRL_BASE + 0x000002c0) +HW_REGISTER(HW_PINCTRL_DRIVE13, REGS_PINCTRL_BASE, 0x000002d0) +#define HW_PINCTRL_DRIVE13_ADDR (REGS_PINCTRL_BASE + 0x000002d0) +HW_REGISTER(HW_PINCTRL_DRIVE14, REGS_PINCTRL_BASE, 0x000002e0) +#define HW_PINCTRL_DRIVE14_ADDR (REGS_PINCTRL_BASE + 0x000002e0) +HW_REGISTER(HW_PINCTRL_PULL0, REGS_PINCTRL_BASE, 0x00000400) +#define HW_PINCTRL_PULL0_ADDR (REGS_PINCTRL_BASE + 0x00000400) +HW_REGISTER(HW_PINCTRL_PULL1, REGS_PINCTRL_BASE, 0x00000410) +#define HW_PINCTRL_PULL1_ADDR (REGS_PINCTRL_BASE + 0x00000410) +HW_REGISTER(HW_PINCTRL_PULL2, REGS_PINCTRL_BASE, 0x00000420) +#define HW_PINCTRL_PULL2_ADDR (REGS_PINCTRL_BASE + 0x00000420) +HW_REGISTER(HW_PINCTRL_PULL3, REGS_PINCTRL_BASE, 0x00000430) +#define HW_PINCTRL_PULL3_ADDR (REGS_PINCTRL_BASE + 0x00000430) +HW_REGISTER(HW_PINCTRL_DOUT0, REGS_PINCTRL_BASE, 0x00000500) +#define HW_PINCTRL_DOUT0_ADDR (REGS_PINCTRL_BASE + 0x00000500) +HW_REGISTER(HW_PINCTRL_DOUT1, REGS_PINCTRL_BASE, 0x00000510) +#define HW_PINCTRL_DOUT1_ADDR (REGS_PINCTRL_BASE + 0x00000510) +HW_REGISTER(HW_PINCTRL_DOUT2, REGS_PINCTRL_BASE, 0x00000520) +#define HW_PINCTRL_DOUT2_ADDR (REGS_PINCTRL_BASE + 0x00000520) +HW_REGISTER(HW_PINCTRL_DIN0, REGS_PINCTRL_BASE, 0x00000600) +#define HW_PINCTRL_DIN0_ADDR (REGS_PINCTRL_BASE + 0x00000600) +HW_REGISTER(HW_PINCTRL_DIN1, REGS_PINCTRL_BASE, 0x00000610) +#define HW_PINCTRL_DIN1_ADDR (REGS_PINCTRL_BASE + 0x00000610) +HW_REGISTER(HW_PINCTRL_DIN2, REGS_PINCTRL_BASE, 0x00000620) +#define HW_PINCTRL_DIN2_ADDR (REGS_PINCTRL_BASE + 0x00000620) +HW_REGISTER(HW_PINCTRL_DOE0, REGS_PINCTRL_BASE, 0x00000700) +#define HW_PINCTRL_DOE0_ADDR (REGS_PINCTRL_BASE + 0x00000700) +HW_REGISTER(HW_PINCTRL_DOE1, REGS_PINCTRL_BASE, 0x00000710) +#define HW_PINCTRL_DOE1_ADDR (REGS_PINCTRL_BASE + 0x00000710) +HW_REGISTER(HW_PINCTRL_DOE2, REGS_PINCTRL_BASE, 0x00000720) +#define HW_PINCTRL_DOE2_ADDR (REGS_PINCTRL_BASE + 0x00000720) +HW_REGISTER(HW_PINCTRL_PIN2IRQ0, REGS_PINCTRL_BASE, 0x00000800) +#define HW_PINCTRL_PIN2IRQ0_ADDR (REGS_PINCTRL_BASE + 0x00000800) +HW_REGISTER(HW_PINCTRL_PIN2IRQ1, REGS_PINCTRL_BASE, 0x00000810) +#define HW_PINCTRL_PIN2IRQ1_ADDR (REGS_PINCTRL_BASE + 0x00000810) +HW_REGISTER(HW_PINCTRL_PIN2IRQ2, REGS_PINCTRL_BASE, 0x00000820) +#define HW_PINCTRL_PIN2IRQ2_ADDR (REGS_PINCTRL_BASE + 0x00000820) +HW_REGISTER(HW_PINCTRL_IRQEN0, REGS_PINCTRL_BASE, 0x00000900) +#define HW_PINCTRL_IRQEN0_ADDR (REGS_PINCTRL_BASE + 0x00000900) +HW_REGISTER(HW_PINCTRL_IRQEN1, REGS_PINCTRL_BASE, 0x00000910) +#define HW_PINCTRL_IRQEN1_ADDR (REGS_PINCTRL_BASE + 0x00000910) +HW_REGISTER(HW_PINCTRL_IRQEN2, REGS_PINCTRL_BASE, 0x00000920) +#define HW_PINCTRL_IRQEN2_ADDR (REGS_PINCTRL_BASE + 0x00000920) +HW_REGISTER(HW_PINCTRL_IRQLEVEL0, REGS_PINCTRL_BASE, 0x00000a00) +#define HW_PINCTRL_IRQLEVEL0_ADDR (REGS_PINCTRL_BASE + 0x00000a00) +HW_REGISTER(HW_PINCTRL_IRQLEVEL1, REGS_PINCTRL_BASE, 0x00000a10) +#define HW_PINCTRL_IRQLEVEL1_ADDR (REGS_PINCTRL_BASE + 0x00000a10) +HW_REGISTER(HW_PINCTRL_IRQLEVEL2, REGS_PINCTRL_BASE, 0x00000a20) +#define HW_PINCTRL_IRQLEVEL2_ADDR (REGS_PINCTRL_BASE + 0x00000a20) +HW_REGISTER(HW_PINCTRL_IRQPOL0, REGS_PINCTRL_BASE, 0x00000b00) +#define HW_PINCTRL_IRQPOL0_ADDR (REGS_PINCTRL_BASE + 0x00000b00) +HW_REGISTER(HW_PINCTRL_IRQPOL1, REGS_PINCTRL_BASE, 0x00000b10) +#define HW_PINCTRL_IRQPOL1_ADDR (REGS_PINCTRL_BASE + 0x00000b10) +HW_REGISTER(HW_PINCTRL_IRQPOL2, REGS_PINCTRL_BASE, 0x00000b20) +#define HW_PINCTRL_IRQPOL2_ADDR (REGS_PINCTRL_BASE + 0x00000b20) +HW_REGISTER(HW_PINCTRL_IRQSTAT0, REGS_PINCTRL_BASE, 0x00000c00) +#define HW_PINCTRL_IRQSTAT0_ADDR (REGS_PINCTRL_BASE + 0x00000c00) +HW_REGISTER(HW_PINCTRL_IRQSTAT1, REGS_PINCTRL_BASE, 0x00000c10) +#define HW_PINCTRL_IRQSTAT1_ADDR (REGS_PINCTRL_BASE + 0x00000c10) +HW_REGISTER(HW_PINCTRL_IRQSTAT2, REGS_PINCTRL_BASE, 0x00000c20) +#define HW_PINCTRL_IRQSTAT2_ADDR (REGS_PINCTRL_BASE + 0x00000c20) +#endif /* __ARCH_ARM___PINCTRL_H */ diff --git a/arch/arm/mach-stmp378x/include/mach/regs-power.h b/arch/arm/mach-stmp378x/include/mach/regs-power.h new file mode 100644 index 0000000..1c81afe --- /dev/null +++ b/arch/arm/mach-stmp378x/include/mach/regs-power.h @@ -0,0 +1,32 @@ +/* + * STMP POWER Register Definitions + * + * Copyright (c) 2008 Freescale Semiconductor + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __ARCH_ARM___POWER_H +#define __ARCH_ARM___POWER_H 1 + +#include + +#define REGS_POWER_BASE (void __iomem *)(REGS_BASE + 0x44000) +#define REGS_POWER_BASE_PHYS (0x80044000) +#define REGS_POWER_SIZE 0x00002000 +HW_REGISTER(HW_POWER_MINPWR, REGS_POWER_BASE, 0x00000020) +HW_REGISTER(HW_POWER_CHARGE, REGS_POWER_BASE, 0x00000030) +#endif /* __ARCH_ARM___POWER_H */ diff --git a/arch/arm/mach-stmp378x/include/mach/regs-timrot.h b/arch/arm/mach-stmp378x/include/mach/regs-timrot.h new file mode 100644 index 0000000..bb6355a --- /dev/null +++ b/arch/arm/mach-stmp378x/include/mach/regs-timrot.h @@ -0,0 +1,216 @@ +/* + * STMP TIMROT Register Definitions + * + * Copyright (c) 2008 Freescale Semiconductor + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __ARCH_ARM___TIMROT_H +#define __ARCH_ARM___TIMROT_H 1 + +#include + +#define REGS_TIMROT_BASE (REGS_BASE + 0x68000) +#define REGS_TIMROT_BASE_PHYS (0x80068000) +#define REGS_TIMROT_SIZE 0x00002000 +HW_REGISTER(HW_TIMROT_ROTCTRL, REGS_TIMROT_BASE, 0x00000000) +#define HW_TIMROT_ROTCTRL_ADDR (REGS_TIMROT_BASE + 0x00000000) +#define BM_TIMROT_ROTCTRL_SFTRST 0x80000000 +#define BM_TIMROT_ROTCTRL_CLKGATE 0x40000000 +#define BM_TIMROT_ROTCTRL_ROTARY_PRESENT 0x20000000 +#define BM_TIMROT_ROTCTRL_TIM3_PRESENT 0x10000000 +#define BM_TIMROT_ROTCTRL_TIM2_PRESENT 0x08000000 +#define BM_TIMROT_ROTCTRL_TIM1_PRESENT 0x04000000 +#define BM_TIMROT_ROTCTRL_TIM0_PRESENT 0x02000000 +#define BP_TIMROT_ROTCTRL_STATE 22 +#define BM_TIMROT_ROTCTRL_STATE 0x01C00000 +#define BF_TIMROT_ROTCTRL_STATE(v) \ + (((v) << 22) & BM_TIMROT_ROTCTRL_STATE) +#define BP_TIMROT_ROTCTRL_DIVIDER 16 +#define BM_TIMROT_ROTCTRL_DIVIDER 0x003F0000 +#define BF_TIMROT_ROTCTRL_DIVIDER(v) \ + (((v) << 16) & BM_TIMROT_ROTCTRL_DIVIDER) +#define BM_TIMROT_ROTCTRL_RELATIVE 0x00001000 +#define BP_TIMROT_ROTCTRL_OVERSAMPLE 10 +#define BM_TIMROT_ROTCTRL_OVERSAMPLE 0x00000C00 +#define BF_TIMROT_ROTCTRL_OVERSAMPLE(v) \ + (((v) << 10) & BM_TIMROT_ROTCTRL_OVERSAMPLE) +#define BV_TIMROT_ROTCTRL_OVERSAMPLE__8X 0x0 +#define BV_TIMROT_ROTCTRL_OVERSAMPLE__4X 0x1 +#define BV_TIMROT_ROTCTRL_OVERSAMPLE__2X 0x2 +#define BV_TIMROT_ROTCTRL_OVERSAMPLE__1X 0x3 +#define BM_TIMROT_ROTCTRL_POLARITY_B 0x00000200 +#define BM_TIMROT_ROTCTRL_POLARITY_A 0x00000100 +#define BP_TIMROT_ROTCTRL_SELECT_B 4 +#define BM_TIMROT_ROTCTRL_SELECT_B 0x00000070 +#define BF_TIMROT_ROTCTRL_SELECT_B(v) \ + (((v) << 4) & BM_TIMROT_ROTCTRL_SELECT_B) +#define BV_TIMROT_ROTCTRL_SELECT_B__NEVER_TICK 0x0 +#define BV_TIMROT_ROTCTRL_SELECT_B__PWM0 0x1 +#define BV_TIMROT_ROTCTRL_SELECT_B__PWM1 0x2 +#define BV_TIMROT_ROTCTRL_SELECT_B__PWM2 0x3 +#define BV_TIMROT_ROTCTRL_SELECT_B__PWM3 0x4 +#define BV_TIMROT_ROTCTRL_SELECT_B__PWM4 0x5 +#define BV_TIMROT_ROTCTRL_SELECT_B__ROTARYA 0x6 +#define BV_TIMROT_ROTCTRL_SELECT_B__ROTARYB 0x7 +#define BP_TIMROT_ROTCTRL_SELECT_A 0 +#define BM_TIMROT_ROTCTRL_SELECT_A 0x00000007 +#define BF_TIMROT_ROTCTRL_SELECT_A(v) \ + (((v) << 0) & BM_TIMROT_ROTCTRL_SELECT_A) +#define BV_TIMROT_ROTCTRL_SELECT_A__NEVER_TICK 0x0 +#define BV_TIMROT_ROTCTRL_SELECT_A__PWM0 0x1 +#define BV_TIMROT_ROTCTRL_SELECT_A__PWM1 0x2 +#define BV_TIMROT_ROTCTRL_SELECT_A__PWM2 0x3 +#define BV_TIMROT_ROTCTRL_SELECT_A__PWM3 0x4 +#define BV_TIMROT_ROTCTRL_SELECT_A__PWM4 0x5 +#define BV_TIMROT_ROTCTRL_SELECT_A__ROTARYA 0x6 +#define BV_TIMROT_ROTCTRL_SELECT_A__ROTARYB 0x7 +HW_REGISTER_0(HW_TIMROT_ROTCOUNT, REGS_TIMROT_BASE, 0x00000010) +#define HW_TIMROT_ROTCOUNT_ADDR (REGS_TIMROT_BASE + 0x00000010) +#define BP_TIMROT_ROTCOUNT_UPDOWN 0 +#define BM_TIMROT_ROTCOUNT_UPDOWN 0x0000FFFF +#define BF_TIMROT_ROTCOUNT_UPDOWN(v) \ + (((v) << 0) & BM_TIMROT_ROTCOUNT_UPDOWN) +/* + * multi-register-define name HW_TIMROT_TIMCTRLn + * base 0x00000020 + * count 3 + * offset 0x20 + */ +HW_REGISTER_INDEXED(HW_TIMROT_TIMCTRLn, REGS_TIMROT_BASE, 0x00000020, 0x20) +#define BM_TIMROT_TIMCTRLn_IRQ 0x00008000 +#define BM_TIMROT_TIMCTRLn_IRQ_EN 0x00004000 +#define BM_TIMROT_TIMCTRLn_POLARITY 0x00000100 +#define BM_TIMROT_TIMCTRLn_UPDATE 0x00000080 +#define BM_TIMROT_TIMCTRLn_RELOAD 0x00000040 +#define BP_TIMROT_TIMCTRLn_PRESCALE 4 +#define BM_TIMROT_TIMCTRLn_PRESCALE 0x00000030 +#define BF_TIMROT_TIMCTRLn_PRESCALE(v) \ + (((v) << 4) & BM_TIMROT_TIMCTRLn_PRESCALE) +#define BV_TIMROT_TIMCTRLn_PRESCALE__DIV_BY_1 0x0 +#define BV_TIMROT_TIMCTRLn_PRESCALE__DIV_BY_2 0x1 +#define BV_TIMROT_TIMCTRLn_PRESCALE__DIV_BY_4 0x2 +#define BV_TIMROT_TIMCTRLn_PRESCALE__DIV_BY_8 0x3 +#define BP_TIMROT_TIMCTRLn_SELECT 0 +#define BM_TIMROT_TIMCTRLn_SELECT 0x0000000F +#define BF_TIMROT_TIMCTRLn_SELECT(v) \ + (((v) << 0) & BM_TIMROT_TIMCTRLn_SELECT) +#define BV_TIMROT_TIMCTRLn_SELECT__NEVER_TICK 0x0 +#define BV_TIMROT_TIMCTRLn_SELECT__PWM0 0x1 +#define BV_TIMROT_TIMCTRLn_SELECT__PWM1 0x2 +#define BV_TIMROT_TIMCTRLn_SELECT__PWM2 0x3 +#define BV_TIMROT_TIMCTRLn_SELECT__PWM3 0x4 +#define BV_TIMROT_TIMCTRLn_SELECT__PWM4 0x5 +#define BV_TIMROT_TIMCTRLn_SELECT__ROTARYA 0x6 +#define BV_TIMROT_TIMCTRLn_SELECT__ROTARYB 0x7 +#define BV_TIMROT_TIMCTRLn_SELECT__32KHZ_XTAL 0x8 +#define BV_TIMROT_TIMCTRLn_SELECT__8KHZ_XTAL 0x9 +#define BV_TIMROT_TIMCTRLn_SELECT__4KHZ_XTAL 0xA +#define BV_TIMROT_TIMCTRLn_SELECT__1KHZ_XTAL 0xB +#define BV_TIMROT_TIMCTRLn_SELECT__TICK_ALWAYS 0xC +/* + * multi-register-define name HW_TIMROT_TIMCOUNTn + * base 0x00000030 + * count 3 + * offset 0x20 + */ +HW_REGISTER_0_INDEXED(HW_TIMROT_TIMCOUNTn, REGS_TIMROT_BASE, 0x00000030, + 0x20) +#define BP_TIMROT_TIMCOUNTn_RUNNING_COUNT 16 +#define BM_TIMROT_TIMCOUNTn_RUNNING_COUNT 0xFFFF0000 +#define BF_TIMROT_TIMCOUNTn_RUNNING_COUNT(v) \ + (((v) << 16) & BM_TIMROT_TIMCOUNTn_RUNNING_COUNT) +#define BP_TIMROT_TIMCOUNTn_FIXED_COUNT 0 +#define BM_TIMROT_TIMCOUNTn_FIXED_COUNT 0x0000FFFF +#define BF_TIMROT_TIMCOUNTn_FIXED_COUNT(v) \ + (((v) << 0) & BM_TIMROT_TIMCOUNTn_FIXED_COUNT) +HW_REGISTER(HW_TIMROT_TIMCTRL3, REGS_TIMROT_BASE, 0x00000080) +#define HW_TIMROT_TIMCTRL3_ADDR (REGS_TIMROT_BASE + 0x00000080) +#define BP_TIMROT_TIMCTRL3_TEST_SIGNAL 16 +#define BM_TIMROT_TIMCTRL3_TEST_SIGNAL 0x000F0000 +#define BF_TIMROT_TIMCTRL3_TEST_SIGNAL(v) \ + (((v) << 16) & BM_TIMROT_TIMCTRL3_TEST_SIGNAL) +#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__NEVER_TICK 0x0 +#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__PWM0 0x1 +#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__PWM1 0x2 +#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__PWM2 0x3 +#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__PWM3 0x4 +#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__PWM4 0x5 +#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__ROTARYA 0x6 +#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__ROTARYB 0x7 +#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__32KHZ_XTAL 0x8 +#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__8KHZ_XTAL 0x9 +#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__4KHZ_XTAL 0xA +#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__1KHZ_XTAL 0xB +#define BV_TIMROT_TIMCTRL3_TEST_SIGNAL__TICK_ALWAYS 0xC +#define BM_TIMROT_TIMCTRL3_IRQ 0x00008000 +#define BM_TIMROT_TIMCTRL3_IRQ_EN 0x00004000 +#define BM_TIMROT_TIMCTRL3_DUTY_VALID 0x00000400 +#define BM_TIMROT_TIMCTRL3_DUTY_CYCLE 0x00000200 +#define BM_TIMROT_TIMCTRL3_POLARITY 0x00000100 +#define BM_TIMROT_TIMCTRL3_UPDATE 0x00000080 +#define BM_TIMROT_TIMCTRL3_RELOAD 0x00000040 +#define BP_TIMROT_TIMCTRL3_PRESCALE 4 +#define BM_TIMROT_TIMCTRL3_PRESCALE 0x00000030 +#define BF_TIMROT_TIMCTRL3_PRESCALE(v) \ + (((v) << 4) & BM_TIMROT_TIMCTRL3_PRESCALE) +#define BV_TIMROT_TIMCTRL3_PRESCALE__DIV_BY_1 0x0 +#define BV_TIMROT_TIMCTRL3_PRESCALE__DIV_BY_2 0x1 +#define BV_TIMROT_TIMCTRL3_PRESCALE__DIV_BY_4 0x2 +#define BV_TIMROT_TIMCTRL3_PRESCALE__DIV_BY_8 0x3 +#define BP_TIMROT_TIMCTRL3_SELECT 0 +#define BM_TIMROT_TIMCTRL3_SELECT 0x0000000F +#define BF_TIMROT_TIMCTRL3_SELECT(v) \ + (((v) << 0) & BM_TIMROT_TIMCTRL3_SELECT) +#define BV_TIMROT_TIMCTRL3_SELECT__NEVER_TICK 0x0 +#define BV_TIMROT_TIMCTRL3_SELECT__PWM0 0x1 +#define BV_TIMROT_TIMCTRL3_SELECT__PWM1 0x2 +#define BV_TIMROT_TIMCTRL3_SELECT__PWM2 0x3 +#define BV_TIMROT_TIMCTRL3_SELECT__PWM3 0x4 +#define BV_TIMROT_TIMCTRL3_SELECT__PWM4 0x5 +#define BV_TIMROT_TIMCTRL3_SELECT__ROTARYA 0x6 +#define BV_TIMROT_TIMCTRL3_SELECT__ROTARYB 0x7 +#define BV_TIMROT_TIMCTRL3_SELECT__32KHZ_XTAL 0x8 +#define BV_TIMROT_TIMCTRL3_SELECT__8KHZ_XTAL 0x9 +#define BV_TIMROT_TIMCTRL3_SELECT__4KHZ_XTAL 0xA +#define BV_TIMROT_TIMCTRL3_SELECT__1KHZ_XTAL 0xB +#define BV_TIMROT_TIMCTRL3_SELECT__TICK_ALWAYS 0xC +HW_REGISTER_0(HW_TIMROT_TIMCOUNT3, REGS_TIMROT_BASE, 0x00000090) +#define HW_TIMROT_TIMCOUNT3_ADDR (REGS_TIMROT_BASE + 0x00000090) +#define BP_TIMROT_TIMCOUNT3_LOW_RUNNING_COUNT 16 +#define BM_TIMROT_TIMCOUNT3_LOW_RUNNING_COUNT 0xFFFF0000 +#define BF_TIMROT_TIMCOUNT3_LOW_RUNNING_COUNT(v) \ + (((v) << 16) & BM_TIMROT_TIMCOUNT3_LOW_RUNNING_COUNT) +#define BP_TIMROT_TIMCOUNT3_HIGH_FIXED_COUNT 0 +#define BM_TIMROT_TIMCOUNT3_HIGH_FIXED_COUNT 0x0000FFFF +#define BF_TIMROT_TIMCOUNT3_HIGH_FIXED_COUNT(v) \ + (((v) << 0) & BM_TIMROT_TIMCOUNT3_HIGH_FIXED_COUNT) +HW_REGISTER_0(HW_TIMROT_VERSION, REGS_TIMROT_BASE, 0x000000a0) +#define HW_TIMROT_VERSION_ADDR (REGS_TIMROT_BASE + 0x000000a0) +#define BP_TIMROT_VERSION_MAJOR 24 +#define BM_TIMROT_VERSION_MAJOR 0xFF000000 +#define BF_TIMROT_VERSION_MAJOR(v) \ + (((v) << 24) & BM_TIMROT_VERSION_MAJOR) +#define BP_TIMROT_VERSION_MINOR 16 +#define BM_TIMROT_VERSION_MINOR 0x00FF0000 +#define BF_TIMROT_VERSION_MINOR(v) \ + (((v) << 16) & BM_TIMROT_VERSION_MINOR) +#define BP_TIMROT_VERSION_STEP 0 +#define BM_TIMROT_VERSION_STEP 0x0000FFFF +#define BF_TIMROT_VERSION_STEP(v) \ + (((v) << 0) & BM_TIMROT_VERSION_STEP) +#endif /* __ARCH_ARM___TIMROT_H */ -- cgit v0.10.2 From e317872ac532fd845c597e55ceb5a9bceee878c1 Mon Sep 17 00:00:00 2001 From: dmitry pervushin Date: Wed, 22 Apr 2009 23:55:23 +0100 Subject: [ARM] 5466/1: Freescale STMP platform support [5/10] Shared (platform) headers Signed-off-by: dmitry pervushin Signed-off-by: Russell King diff --git a/arch/arm/plat-stmp3xxx/include/mach/clkdev.h b/arch/arm/plat-stmp3xxx/include/mach/clkdev.h new file mode 100644 index 0000000..f9c3977 --- /dev/null +++ b/arch/arm/plat-stmp3xxx/include/mach/clkdev.h @@ -0,0 +1,18 @@ +/* + * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + * + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ +#ifndef __ASM_MACH_CLKDEV_H +#define __ASM_MACH_CLKDEV_H + +#define __clk_get(clk) ({ 1; }) +#define __clk_put(clk) do { } while (0) + +#endif diff --git a/arch/arm/plat-stmp3xxx/include/mach/cputype.h b/arch/arm/plat-stmp3xxx/include/mach/cputype.h new file mode 100644 index 0000000..b4e205b --- /dev/null +++ b/arch/arm/plat-stmp3xxx/include/mach/cputype.h @@ -0,0 +1,33 @@ +/* + * Freescale STMP37XX/STMP378X CPU type detection + * + * Embedded Alley Solutions, Inc + * + * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + */ + +/* + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ +#ifndef __ASM_PLAT_CPU_H +#define __ASM_PLAT_CPU_H + +#ifdef CONFIG_ARCH_STMP37XX +#define cpu_is_stmp37xx() (1) +#else +#define cpu_is_stmp37xx() (0) +#endif + +#ifdef CONFIG_ARCH_STMP378X +#define cpu_is_stmp378x() (1) +#else +#define cpu_is_stmp378x() (0) +#endif + +#endif /* __ASM_PLAT_CPU_H */ diff --git a/arch/arm/plat-stmp3xxx/include/mach/debug-macro.S b/arch/arm/plat-stmp3xxx/include/mach/debug-macro.S new file mode 100644 index 0000000..fb3b969 --- /dev/null +++ b/arch/arm/plat-stmp3xxx/include/mach/debug-macro.S @@ -0,0 +1,42 @@ +/* + * Debugging macro include header + * + * Embedded Alley Solutions, Inc + * + * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + */ + +/* + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ + + .macro addruart,rx + mrc p15, 0, \rx, c1, c0 + tst \rx, #1 @ MMU enabled? + moveq \rx, #0x80000000 @ physical base address + addeq \rx, \rx, #0x00070000 + movne \rx, #0xf0000000 @ virtual base + addne \rx, \rx, #0x00070000 + .endm + + .macro senduart,rd,rx + strb \rd, [\rx, #0] @ data register at 0 + .endm + + .macro waituart,rd,rx +1001: ldr \rd, [\rx, #0x18] @ UARTFLG + tst \rd, #1 << 5 @ UARTFLGUTXFF - 1 when full + bne 1001b + .endm + + .macro busyuart,rd,rx +1001: ldr \rd, [\rx, #0x18] @ UARTFLG + tst \rd, #1 << 3 @ UARTFLGUBUSY - 1 when busy + bne 1001b + .endm diff --git a/arch/arm/plat-stmp3xxx/include/mach/dma.h b/arch/arm/plat-stmp3xxx/include/mach/dma.h new file mode 100644 index 0000000..1e305b2 --- /dev/null +++ b/arch/arm/plat-stmp3xxx/include/mach/dma.h @@ -0,0 +1,155 @@ +/* + * Freescale STMP37XX/STMP378X DMA helper interface + * + * Embedded Alley Solutions, Inc + * + * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + */ + +/* + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ +#ifndef __ASM_PLAT_STMP3XXX_DMA_H +#define __ASM_PLAT_STMP3XXX_DMA_H + +#include +#include + +#if !defined(MAX_PIO_WORDS) +#define MAX_PIO_WORDS (15) +#endif + +#define STMP3XXX_BUS_APBH 0 +#define STMP3XXX_BUS_APBX 1 +#define STMP3XXX_DMA_MAX_CHANNEL 16 + + +#define STMP3xxx_DMA(channel, bus) ((bus) * 16 + (channel)) + +#define MAX_DMA_ADDRESS 0xffffffff + +#define MAX_DMA_CHANNELS 32 + +struct stmp3xxx_dma_command { + u32 next; + u32 cmd; + union { + u32 buf_ptr; + u32 alternate; + }; + u32 pio_words[MAX_PIO_WORDS]; +}; + +struct stmp3xxx_dma_descriptor { + struct stmp3xxx_dma_command *command; + dma_addr_t handle; + + /* The virtual address of the buffer pointer */ + void *virtual_buf_ptr; + /* The next descriptor in a the DMA chain (optional) */ + struct stmp3xxx_dma_descriptor *next_descr; +}; + +struct stmp37xx_circ_dma_chain { + unsigned total_count; + struct stmp3xxx_dma_descriptor *chain; + + unsigned free_index; + unsigned free_count; + unsigned active_index; + unsigned active_count; + unsigned cooked_index; + unsigned cooked_count; + + int bus; + unsigned channel; +}; + +static inline struct stmp3xxx_dma_descriptor + *stmp3xxx_dma_circ_get_free_head(struct stmp37xx_circ_dma_chain *chain) +{ + return &(chain->chain[chain->free_index]); +} + +static inline struct stmp3xxx_dma_descriptor + *stmp3xxx_dma_circ_get_cooked_head(struct stmp37xx_circ_dma_chain *chain) +{ + return &(chain->chain[chain->cooked_index]); +} + +int stmp3xxx_dma_request(int ch, struct device *dev, const char *name); +int stmp3xxx_dma_release(int ch); +int stmp3xxx_dma_allocate_command(int ch, + struct stmp3xxx_dma_descriptor *descriptor); +int stmp3xxx_dma_free_command(int ch, + struct stmp3xxx_dma_descriptor *descriptor); +void stmp3xxx_dma_continue(int channel, u32 semaphore); +void stmp3xxx_dma_go(int ch, struct stmp3xxx_dma_descriptor *head, + u32 semaphore); +int stmp3xxx_dma_running(int ch); +int stmp3xxx_dma_make_chain(int ch, struct stmp37xx_circ_dma_chain *chain, + struct stmp3xxx_dma_descriptor descriptors[], + unsigned items); +void stmp3xxx_dma_free_chain(struct stmp37xx_circ_dma_chain *chain); +void stmp37xx_circ_clear_chain(struct stmp37xx_circ_dma_chain *chain); +void stmp37xx_circ_advance_free(struct stmp37xx_circ_dma_chain *chain, + unsigned count); +void stmp37xx_circ_advance_active(struct stmp37xx_circ_dma_chain *chain, + unsigned count); +unsigned stmp37xx_circ_advance_cooked(struct stmp37xx_circ_dma_chain *chain); +int stmp3xxx_dma_read_semaphore(int ch); +void stmp3xxx_dma_init(void); +void stmp3xxx_dma_set_alt_target(int ch, int target); +void stmp3xxx_dma_suspend(void); +void stmp3xxx_dma_resume(void); + +/* + * STMP37xx and STMP378x have different DMA control + * registers layout + */ + +void stmp3xxx_arch_dma_freeze(int ch); +void stmp3xxx_arch_dma_unfreeze(int ch); +void stmp3xxx_arch_dma_reset_channel(int ch); +void stmp3xxx_arch_dma_enable_interrupt(int ch); +void stmp3xxx_arch_dma_clear_interrupt(int ch); +int stmp3xxx_arch_dma_is_interrupt(int ch); + +static inline void stmp3xxx_dma_reset_channel(int ch) +{ + stmp3xxx_arch_dma_reset_channel(ch); +} + + +static inline void stmp3xxx_dma_freeze(int ch) +{ + stmp3xxx_arch_dma_freeze(ch); +} + +static inline void stmp3xxx_dma_unfreeze(int ch) +{ + stmp3xxx_arch_dma_unfreeze(ch); +} + +static inline void stmp3xxx_dma_enable_interrupt(int ch) +{ + stmp3xxx_arch_dma_enable_interrupt(ch); +} + +static inline void stmp3xxx_dma_clear_interrupt(int ch) +{ + stmp3xxx_arch_dma_clear_interrupt(ch); +} + +static inline int stmp3xxx_dma_is_interrupt(int ch) +{ + return stmp3xxx_arch_dma_is_interrupt(ch); +} + +#endif /* __ASM_PLAT_STMP3XXX_DMA_H */ diff --git a/arch/arm/plat-stmp3xxx/include/mach/gpio.h b/arch/arm/plat-stmp3xxx/include/mach/gpio.h new file mode 100644 index 0000000..a8b5792 --- /dev/null +++ b/arch/arm/plat-stmp3xxx/include/mach/gpio.h @@ -0,0 +1,28 @@ +/* + * Freescale STMP37XX/STMP378X GPIO interface + * + * Embedded Alley Solutions, Inc + * + * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + */ + +/* + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ +#ifndef __ASM_PLAT_GPIO_H +#define __ASM_PLAT_GPIO_H + +#define ARCH_NR_GPIOS (32 * 3) +#define gpio_to_irq(gpio) __gpio_to_irq(gpio) +#define gpio_get_value(gpio) __gpio_get_value(gpio) +#define gpio_set_value(gpio, value) __gpio_set_value(gpio, value) + +#include + +#endif /* __ASM_PLAT_GPIO_H */ diff --git a/arch/arm/plat-stmp3xxx/include/mach/hardware.h b/arch/arm/plat-stmp3xxx/include/mach/hardware.h new file mode 100644 index 0000000..47b8978 --- /dev/null +++ b/arch/arm/plat-stmp3xxx/include/mach/hardware.h @@ -0,0 +1,32 @@ +/* + * This file contains the hardware definitions of the Freescale STMP3XXX + * + * Copyright (C) 2005 Sigmatel Inc + * + * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + */ + +/* + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ +#ifndef __ASM_ARCH_HARDWARE_H +#define __ASM_ARCH_HARDWARE_H + +/* + * Where in virtual memory the IO devices (timers, system controllers + * and so on) + */ +#define IO_BASE 0xF0000000 /* VA of IO */ +#define IO_SIZE 0x00100000 /* How much? */ +#define IO_START 0x80000000 /* PA of IO */ + +/* macro to get at IO space when running virtually */ +#define IO_ADDRESS(x) (((x) & 0x000fffff) | IO_BASE) + +#endif diff --git a/arch/arm/plat-stmp3xxx/include/mach/io.h b/arch/arm/plat-stmp3xxx/include/mach/io.h new file mode 100644 index 0000000..d08b1b7 --- /dev/null +++ b/arch/arm/plat-stmp3xxx/include/mach/io.h @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2005 Sigmatel Inc + * + * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + */ + +/* + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ +#ifndef __ASM_ARM_ARCH_IO_H +#define __ASM_ARM_ARCH_IO_H + +#define IO_SPACE_LIMIT 0xffffffff + +#define __io(a) __typesafe_io(a) +#define __mem_pci(a) (a) +#define __mem_isa(a) (a) + +#endif diff --git a/arch/arm/plat-stmp3xxx/include/mach/memory.h b/arch/arm/plat-stmp3xxx/include/mach/memory.h new file mode 100644 index 0000000..7b875a0 --- /dev/null +++ b/arch/arm/plat-stmp3xxx/include/mach/memory.h @@ -0,0 +1,22 @@ +/* + * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + */ + +/* + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ +#ifndef __ASM_ARCH_MEMORY_H +#define __ASM_ARCH_MEMORY_H + +/* + * Physical DRAM offset. + */ +#define PHYS_OFFSET UL(0x40000000) + +#endif diff --git a/arch/arm/plat-stmp3xxx/include/mach/pinmux.h b/arch/arm/plat-stmp3xxx/include/mach/pinmux.h new file mode 100644 index 0000000..526c068 --- /dev/null +++ b/arch/arm/plat-stmp3xxx/include/mach/pinmux.h @@ -0,0 +1,158 @@ +/* + * Freescale STMP37XX/STMP378X Pin Multiplexing + * + * Author: Vladislav Buzov + * + * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + */ + +/* + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ +#ifndef __PINMUX_H +#define __PINMUX_H + +#include +#include +#include +#include + +/* Pin definitions */ +#include "pins.h" +#include + +/* + * Each pin may be routed up to four different HW interfaces + * including GPIO + */ +enum pin_fun { + PIN_FUN1 = 0, + PIN_FUN2, + PIN_FUN3, + PIN_GPIO, +}; + +/* + * Each pin may have different output drive strength in range from + * 4mA to 20mA. The most common case is 4, 8 and 12 mA strengths. + */ +enum pin_strength { + PIN_4MA = 0, + PIN_8MA, + PIN_12MA, + PIN_16MA, + PIN_20MA, +}; + +/* + * Each pin can be programmed for 1.8V or 3.3V + */ +enum pin_voltage { + PIN_1_8V = 0, + PIN_3_3V, +}; + +/* + * Structure to define a group of pins and their parameters + */ +struct pin_desc { + unsigned id; + enum pin_fun fun; + enum pin_strength strength; + enum pin_voltage voltage; + unsigned pullup:1; +}; + +struct pin_group { + struct pin_desc *pins; + int nr_pins; +}; + +/* Set pin drive strength */ +void stmp3xxx_pin_strength(unsigned id, enum pin_strength strength, + const char *label); + +/* Set pin voltage */ +void stmp3xxx_pin_voltage(unsigned id, enum pin_voltage voltage, + const char *label); + +/* Enable pull-up resistor for a pin */ +void stmp3xxx_pin_pullup(unsigned id, int enable, const char *label); + +/* + * Request a pin ownership, only one module (identified by @label) + * may own a pin. + */ +int stmp3xxx_request_pin(unsigned id, enum pin_fun fun, const char *label); + +/* Release pin */ +void stmp3xxx_release_pin(unsigned id, const char *label); + +void stmp3xxx_set_pin_type(unsigned id, enum pin_fun fun); + +/* + * Each bank is associated with a number of registers to control + * pin function, drive strength, voltage and pull-up reigster. The + * number of registers of a given type depends on the number of bits + * describin particular pin. + */ +#define HW_MUXSEL_NUM 2 /* registers per bank */ +#define HW_MUXSEL_PIN_LEN 2 /* bits per pin */ +#define HW_MUXSEL_PIN_NUM 16 /* pins per register */ +#define HW_MUXSEL_PINFUN_MASK 0x3 /* pin function mask */ +#define HW_MUXSEL_PINFUN_NUM 4 /* four options for a pin */ + +#define HW_DRIVE_NUM 4 /* registers per bank */ +#define HW_DRIVE_PIN_LEN 4 /* bits per pin */ +#define HW_DRIVE_PIN_NUM 8 /* pins per register */ +#define HW_DRIVE_PINDRV_MASK 0x3 /* pin strength mask - 2 bits */ +#define HW_DRIVE_PINDRV_NUM 5 /* five possible strength values */ +#define HW_DRIVE_PINV_MASK 0x4 /* pin voltage mask - 1 bit */ + + +struct stmp3xxx_pinmux_bank { + struct gpio_chip chip; + + /* Pins allocation map */ + unsigned long pin_map; + + /* Pin owner names */ + const char *pin_labels[32]; + + /* Bank registers */ + void __iomem *hw_muxsel[HW_MUXSEL_NUM]; + void __iomem *hw_drive[HW_DRIVE_NUM]; + void __iomem *hw_pull; + + void __iomem *pin2irq, + *irqlevel, + *irqpolarity, + *irqen, + *irqstat; + + /* HW MUXSEL register function bit values */ + u8 functions[HW_MUXSEL_PINFUN_NUM]; + + /* + * HW DRIVE register strength bit values: + * 0xff - requested strength is not supported for this bank + */ + u8 strengths[HW_DRIVE_PINDRV_NUM]; + + /* GPIO things */ + void __iomem *hw_gpio_read, + *hw_gpio_set, + *hw_gpio_clr, + *hw_gpio_doe; + int irq, virq; +}; + +int __init stmp3xxx_pinmux_init(int virtual_irq_start); + +#endif /* __PINMUX_H */ diff --git a/arch/arm/plat-stmp3xxx/include/mach/pins.h b/arch/arm/plat-stmp3xxx/include/mach/pins.h new file mode 100644 index 0000000..c573318 --- /dev/null +++ b/arch/arm/plat-stmp3xxx/include/mach/pins.h @@ -0,0 +1,30 @@ +/* + * Freescale STMP37XX/STMP378X Pin multiplexing interface definitions + * + * Author: Vladislav Buzov + * + * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + */ + +/* + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ +#ifndef __ASM_PLAT_PINS_H +#define __ASM_PLAT_PINS_H + +#define STMP3XXX_PINID(bank, pin) (bank * 32 + pin) +#define STMP3XXX_PINID_TO_BANK(pinid) (pinid / 32) +#define STMP3XXX_PINID_TO_PINNUM(pinid) (pinid % 32) + +/* + * Special invalid pin identificator to show a pin doesn't exist + */ +#define PINID_NO_PIN STMP3XXX_PINID(0xFF, 0xFF) + +#endif /* __ASM_PLAT_PINS_H */ diff --git a/arch/arm/plat-stmp3xxx/include/mach/platform.h b/arch/arm/plat-stmp3xxx/include/mach/platform.h new file mode 100644 index 0000000..525c413 --- /dev/null +++ b/arch/arm/plat-stmp3xxx/include/mach/platform.h @@ -0,0 +1,47 @@ +/* + * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + */ + +/* + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ +#ifndef __ASM_PLAT_PLATFORM_H +#define __ASM_PLAT_PLATFORM_H + +#include + +/* Virtual address where registers are mapped */ +#define STMP3XXX_REGS_PHBASE 0x80000000 +#ifdef __ASSEMBLER__ +#define STMP3XXX_REGS_BASE 0xF0000000 +#else +#define STMP3XXX_REGS_BASE (void __iomem *)0xF0000000 +#endif +#define STMP3XXX_REGS_SIZE SZ_1M + +/* Virtual address where OCRAM is mapped */ +#define STMP3XXX_OCRAM_PHBASE 0x00000000 +#ifdef __ASSEMBLER__ +#define STMP3XXX_OCRAM_BASE 0xf1000000 +#else +#define STMP3XXX_OCRAM_BASE (void __iomem *)0xf1000000 +#endif +#define STMP3XXX_OCRAM_SIZE (32 * SZ_1K) + +#ifdef CONFIG_ARCH_STMP37XX +#define IRQ_PRIORITY_REG_RD HW_ICOLL_PRIORITYn_RD +#define IRQ_PRIORITY_REG_WR HW_ICOLL_PRIORITYn_WR +#endif + +#ifdef CONFIG_ARCH_STMP378X +#define IRQ_PRIORITY_REG_RD HW_ICOLL_INTERRUPTn_RD +#define IRQ_PRIORITY_REG_WR HW_ICOLL_INTERRUPTn_WR +#endif + +#endif /* __ASM_ARCH_PLATFORM_H */ diff --git a/arch/arm/plat-stmp3xxx/include/mach/stmp3xxx.h b/arch/arm/plat-stmp3xxx/include/mach/stmp3xxx.h new file mode 100644 index 0000000..78cf1be --- /dev/null +++ b/arch/arm/plat-stmp3xxx/include/mach/stmp3xxx.h @@ -0,0 +1,34 @@ +/* + * Freescale STMP37XX/STMP378X core structure and function declarations + * + * Embedded Alley Solutions, Inc + * + * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + */ + +/* + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ +#ifndef __ASM_PLAT_STMP3XXX_H +#define __ASM_PLAT_STMP3XXX_H + +#include + +extern struct sys_timer stmp3xxx_timer; + +void stmp3xxx_init_irq(struct irq_chip *chip); +void stmp3xxx_init(void); +int stmp3xxx_reset_block(void __iomem *hwreg, int just_enable); +extern struct platform_device stmp3xxx_dbguart; + +struct pin_group; +void stmp3xxx_release_pin_group(struct pin_group *pin_group, const char *label); +int stmp3xxx_request_pin_group(struct pin_group *pin_group, const char *label); + +#endif /* __ASM_PLAT_STMP3XXX_H */ diff --git a/arch/arm/plat-stmp3xxx/include/mach/stmp3xxx_regs.h b/arch/arm/plat-stmp3xxx/include/mach/stmp3xxx_regs.h new file mode 100644 index 0000000..47797b2 --- /dev/null +++ b/arch/arm/plat-stmp3xxx/include/mach/stmp3xxx_regs.h @@ -0,0 +1,195 @@ +/* + * Freescale STMP37XX/STMP378X SoC register access interfaces + * + * The SoC registers may be accessed via: + * + * - single 32 bit address, or + * - four 32 bit addresses - general purpose, set, clear and toggle bits + * + * Multiple IP blocks (e.g. SSP, UART) provide identical register sets per + * each module + * + * Embedded Alley Solutions, Inc + * + * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + */ + +/* + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ +#ifndef __ASM_PLAT_STMP3XXX_REGS_H +#define __ASM_PLAT_STMP3XXX_REGS_H + +#ifndef __ASSEMBLER__ +#include +#endif + +#include "platform.h" + +#define REGS_BASE STMP3XXX_REGS_BASE + +#define HW_STMP3xxx_SET 0x04 +#define HW_STMP3xxx_CLR 0x08 +#define HW_STMP3xxx_TOG 0x0c + +#ifndef __ASSEMBLER__ +#define HW_REGISTER_FUNCS(id, base, offset, regset, rd, wr) \ + static const u32 id##_OFFSET = offset; \ + static inline u32 id##_RD_NB(const void __iomem *regbase) { \ + if (!rd) \ + printk(KERN_ERR"%s: cannot READ at %p+%x\n", \ + #id, regbase, offset); \ + return __raw_readl(regbase + offset); \ + } \ + static inline void id##_WR_NB(void __iomem *regbase, u32 v) { \ + if (!wr) \ + printk(KERN_ERR"%s: cannot WRITE at %p+%x\n", \ + #id, regbase, offset); \ + __raw_writel(v, regbase + offset); \ + } \ + static inline void id##_SET_NB(void __iomem *regbase, u32 v) { \ + if (!wr) \ + printk(KERN_ERR"%s: cannot SET at %p+%x\n", \ + #id, regbase, offset); \ + if (regset) \ + __raw_writel(v, regbase + \ + offset + HW_STMP3xxx_SET); \ + else \ + __raw_writel(v | __raw_readl(regbase + offset), \ + regbase + offset); \ + } \ + static inline void id##_CLR_NB(void __iomem *regbase, u32 v) { \ + if (!wr) \ + printk(KERN_ERR"%s: cannot CLR at %p+%x\n", \ + #id, regbase, offset); \ + if (regset) \ + __raw_writel(v, regbase + \ + offset + HW_STMP3xxx_CLR); \ + else \ + __raw_writel( \ + ~v & __raw_readl(regbase + offset), \ + regbase + offset); \ + } \ + static inline void id##_TOG_NB(void __iomem *regbase, u32 v) { \ + if (!wr) \ + printk(KERN_ERR"%s: cannot TOG at %p+%x\n", \ + #id, regbase, offset); \ + if (regset) \ + __raw_writel(v, regbase + \ + offset + HW_STMP3xxx_TOG); \ + else \ + __raw_writel(v ^ __raw_readl(regbase + offset), \ + regbase + offset); \ + } \ + static inline u32 id##_RD(void) { return id##_RD_NB(base); } \ + static inline void id##_WR(u32 v) { id##_WR_NB(base, v); } \ + static inline void id##_SET(u32 v) { id##_SET_NB(base, v); } \ + static inline void id##_CLR(u32 v) { id##_CLR_NB(base, v); } \ + static inline void id##_TOG(u32 v) { id##_TOG_NB(base, v); } + +#define HW_REGISTER_FUNCS_INDEXED(id, base, offset, regset, rd, wr, step)\ + static inline u32 id##_OFFSET(int i) { \ + return offset + i * step; \ + } \ + static inline u32 id##_RD_NB(const void __iomem *regbase, int i) {\ + if (!rd) \ + printk(KERN_ERR"%s(%d): can't READ at %p+%x\n", \ + #id, i, regbase, offset + i * step); \ + return __raw_readl(regbase + offset + i * step); \ + } \ + static inline void id##_WR_NB(void __iomem *regbase, int i, u32 v) {\ + if (!wr) \ + printk(KERN_ERR"%s(%d): can't WRITE at %p+%x\n",\ + #id, i, regbase, offset + i * step); \ + __raw_writel(v, regbase + offset + i * step); \ + } \ + static inline void id##_SET_NB(void __iomem *regbase, int i, u32 v) {\ + if (!wr) \ + printk(KERN_ERR"%s(%d): can't SET at %p+%x\n", \ + #id, i, regbase, offset + i * step); \ + if (regset) \ + __raw_writel(v, regbase + offset + \ + i * step + HW_STMP3xxx_SET); \ + else \ + __raw_writel(v | __raw_readl(regbase + \ + offset + i * step), \ + regbase + offset + i * step); \ + } \ + static inline void id##_CLR_NB(void __iomem *regbase, int i, u32 v) {\ + if (!wr) \ + printk(KERN_ERR"%s(%d): cannot CLR at %p+%x\n", \ + #id, i, regbase, offset + i * step); \ + if (regset) \ + __raw_writel(v, regbase + offset + \ + i * step + HW_STMP3xxx_CLR); \ + else \ + __raw_writel(~v & __raw_readl(regbase + \ + offset + i * step), \ + regbase + offset + i * step); \ + } \ + static inline void id##_TOG_NB(void __iomem *regbase, int i, u32 v) {\ + if (!wr) \ + printk(KERN_ERR"%s(%d): cannot TOG at %p+%x\n", \ + #id, i, regbase, offset + i * step); \ + if (regset) \ + __raw_writel(v, regbase + offset + \ + i * step + HW_STMP3xxx_TOG); \ + else \ + __raw_writel(v ^ __raw_readl(regbase + offset \ + + i * step), \ + regbase + offset + i * step); \ + } \ + static inline u32 id##_RD(int i) \ + { \ + return id##_RD_NB(base, i); \ + } \ + static inline void id##_WR(int i, u32 v) \ + { \ + id##_WR_NB(base, i, v); \ + } \ + static inline void id##_SET(int i, u32 v) \ + { \ + id##_SET_NB(base, i, v); \ + } \ + static inline void id##_CLR(int i, u32 v) \ + { \ + id##_CLR_NB(base, i, v); \ + } \ + static inline void id##_TOG(int i, u32 v) \ + { \ + id##_TOG_NB(base, i, v); \ + } + +#define HW_REGISTER_WO(id, base, offset)\ + HW_REGISTER_FUNCS(id, base, offset, 1, 0, 1) +#define HW_REGISTER_RO(id, base, offset)\ + HW_REGISTER_FUNCS(id, base, offset, 1, 1, 0) +#define HW_REGISTER(id, base, offset) \ + HW_REGISTER_FUNCS(id, base, offset, 1, 1, 1) +#define HW_REGISTER_0(id, base, offset) \ + HW_REGISTER_FUNCS(id, base, offset, 0, 1, 1) +#define HW_REGISTER_INDEXED(id, base, offset, step) \ + HW_REGISTER_FUNCS_INDEXED(id, base, offset, 1, 1, 1, step) +#define HW_REGISTER_RO_INDEXED(id, base, offset, step) \ + HW_REGISTER_FUNCS_INDEXED(id, base, offset, 1, 1, 0, step) +#define HW_REGISTER_0_INDEXED(id, base, offset, step) \ + HW_REGISTER_FUNCS_INDEXED(id, base, offset, 0, 1, 1, step) +#else /* __ASSEMBLER__ */ +#define HW_REGISTER_FUNCS(id, base, offset, regset, rd, wr) +#define HW_REGISTER_FUNCS_INDEXED(id, base, offset, regset, rd, wr, step) +#define HW_REGISTER_WO(id, base, offset) +#define HW_REGISTER_RO(id, base, offset) +#define HW_REGISTER(id, base, offset) +#define HW_REGISTER_0(id, base, offset) +#define HW_REGISTER_INDEXED(id, base, offset, step) +#define HW_REGISTER_RO_INDEXED(id, base, offset, step) +#define HW_REGISTER_0_INDEXED(id, base, offset, step) +#endif /* __ASSEMBLER__ */ + +#endif /* __ASM_PLAT_STMP3XXX_REGS_H */ diff --git a/arch/arm/plat-stmp3xxx/include/mach/system.h b/arch/arm/plat-stmp3xxx/include/mach/system.h new file mode 100644 index 0000000..dac48d2 --- /dev/null +++ b/arch/arm/plat-stmp3xxx/include/mach/system.h @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2005 Sigmatel Inc + * + * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + */ + +/* + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ +#ifndef __ASM_ARCH_SYSTEM_H +#define __ASM_ARCH_SYSTEM_H + +#include +#include +#include + +static inline void arch_idle(void) +{ + /* + * This should do all the clock switching + * and wait for interrupt tricks + */ + + cpu_do_idle(); +} + +static inline void arch_reset(char mode, const char *cmd) +{ + /* Set BATTCHRG to default value */ + HW_POWER_CHARGE_WR(0x00010000); + + /* Set MINPWR to default value */ + HW_POWER_MINPWR_WR(0); + + /* Reset digital side of chip (but not power or RTC) */ + HW_CLKCTRL_RESET_WR(BM_CLKCTRL_RESET_DIG); + + /* Should not return */ +} + +#endif diff --git a/arch/arm/plat-stmp3xxx/include/mach/timex.h b/arch/arm/plat-stmp3xxx/include/mach/timex.h new file mode 100644 index 0000000..3373985 --- /dev/null +++ b/arch/arm/plat-stmp3xxx/include/mach/timex.h @@ -0,0 +1,20 @@ +/* + * Copyright (C) 1999 ARM Limited + * + * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + */ + +/* + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ + +/* + * System time clock is sourced from the 32k clock + */ +#define CLOCK_TICK_RATE (32768) diff --git a/arch/arm/plat-stmp3xxx/include/mach/uncompress.h b/arch/arm/plat-stmp3xxx/include/mach/uncompress.h new file mode 100644 index 0000000..f79f5ee --- /dev/null +++ b/arch/arm/plat-stmp3xxx/include/mach/uncompress.h @@ -0,0 +1,53 @@ +/* + * + * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + */ + +/* + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ +#ifndef __ASM_PLAT_UNCOMPRESS_H +#define __ASM_PLAT_UNCOMPRESS_H + +/* + * Register includes are for when the MMU enabled; we need to define our + * own stuff here for pre-MMU use + */ +#define UARTDBG_BASE 0x80070000 +#define UART(c) (((volatile unsigned *)UARTDBG_BASE)[c]) + +/* + * This does not append a newline + */ +static void putc(char c) +{ + /* Wait for TX fifo empty */ + while ((UART(6) & (1<<7)) == 0) + continue; + + /* Write byte */ + UART(0) = c; + + /* Wait for last bit to exit the UART */ + while (UART(6) & (1<<3)) + continue; +} + +static void flush(void) +{ +} + +/* + * nothing to do + */ +#define arch_decomp_setup() + +#define arch_decomp_wdog() + +#endif /* __ASM_PLAT_UNCOMPRESS_H */ diff --git a/arch/arm/plat-stmp3xxx/include/mach/vmalloc.h b/arch/arm/plat-stmp3xxx/include/mach/vmalloc.h new file mode 100644 index 0000000..541b880 --- /dev/null +++ b/arch/arm/plat-stmp3xxx/include/mach/vmalloc.h @@ -0,0 +1,12 @@ +/* + * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + * + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ +#define VMALLOC_END (0xF0000000) -- cgit v0.10.2 From 5cccd37ea15970846a93b4b01fafd6e043bafe8e Mon Sep 17 00:00:00 2001 From: dmitry pervushin Date: Thu, 23 Apr 2009 12:24:13 +0100 Subject: [ARM] 5477/1: Freescale STMP platform support [6/10] Sources: common STMP3xxx platform support Signed-off-by: dmitry pervushin Signed-off-by: Russell King diff --git a/arch/arm/plat-stmp3xxx/Kconfig b/arch/arm/plat-stmp3xxx/Kconfig new file mode 100644 index 0000000..2cf37c3 --- /dev/null +++ b/arch/arm/plat-stmp3xxx/Kconfig @@ -0,0 +1,37 @@ +if ARCH_STMP3XXX + +menu "Freescale STMP3xxx implementations" + +choice + prompt "Select STMP3xxx chip family" + +config ARCH_STMP37XX + bool "Freescale SMTP37xx" + select CPU_ARM926T + ---help--- + STMP37xx refers to 3700 through 3769 chips + +config ARCH_STMP378X + bool "Freescale STMP378x" + select CPU_ARM926T + ---help--- + STMP378x refers to 3780 through 3789 chips + +endchoice + +choice + prompt "Select STMP3xxx board type" + +config MACH_STMP37XX + depends on ARCH_STMP37XX + bool "Freescale STMP37xx development board" + +config MACH_STMP378X + depends on ARCH_STMP378X + bool "Freescale STMP378x development board" + +endchoice + +endmenu + +endif diff --git a/arch/arm/plat-stmp3xxx/Makefile b/arch/arm/plat-stmp3xxx/Makefile new file mode 100644 index 0000000..b634800 --- /dev/null +++ b/arch/arm/plat-stmp3xxx/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for the linux kernel. +# +# Object file lists. +obj-y += core.o timer.o irq.o dma.o clock.o pinmux.o diff --git a/arch/arm/plat-stmp3xxx/clock.c b/arch/arm/plat-stmp3xxx/clock.c new file mode 100644 index 0000000..9a1d46b --- /dev/null +++ b/arch/arm/plat-stmp3xxx/clock.c @@ -0,0 +1,1112 @@ +/* + * Clock manipulation routines for Freescale STMP37XX/STMP378X + * + * Author: Vitaly Wool + * + * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + */ + +/* + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "clock.h" + +static DEFINE_SPINLOCK(clocks_lock); + +static struct clk osc_24M; +static struct clk pll_clk; +static struct clk cpu_clk; +static struct clk hclk; + +static int propagate_rate(struct clk *); + +static inline int clk_is_busy(struct clk *clk) +{ + return __raw_readl(clk->busy_reg) & (1 << clk->busy_bit); +} + +static inline int clk_good(struct clk *clk) +{ + return clk && !IS_ERR(clk) && clk->ops; +} + +static int std_clk_enable(struct clk *clk) +{ + if (clk->enable_reg) { + u32 clk_reg = __raw_readl(clk->enable_reg); + if (clk->enable_negate) + clk_reg &= ~(1 << clk->enable_shift); + else + clk_reg |= (1 << clk->enable_shift); + __raw_writel(clk_reg, clk->enable_reg); + if (clk->enable_wait) + udelay(clk->enable_wait); + return 0; + } else + return -EINVAL; +} + +static int std_clk_disable(struct clk *clk) +{ + if (clk->enable_reg) { + u32 clk_reg = __raw_readl(clk->enable_reg); + if (clk->enable_negate) + clk_reg |= (1 << clk->enable_shift); + else + clk_reg &= ~(1 << clk->enable_shift); + __raw_writel(clk_reg, clk->enable_reg); + return 0; + } else + return -EINVAL; +} + +static int io_set_rate(struct clk *clk, u32 rate) +{ + u32 reg_frac, clkctrl_frac; + int i, ret = 0, mask = 0x1f; + + clkctrl_frac = (clk->parent->rate * 18 + rate - 1) / rate; + + if (clkctrl_frac < 18 || clkctrl_frac > 35) { + ret = -EINVAL; + goto out; + } + + reg_frac = __raw_readl(clk->scale_reg); + reg_frac &= ~(mask << clk->scale_shift); + __raw_writel(reg_frac | (clkctrl_frac << clk->scale_shift), + clk->scale_reg); + if (clk->busy_reg) { + for (i = 10000; i; i--) + if (!clk_is_busy(clk)) + break; + if (!i) + ret = -ETIMEDOUT; + else + ret = 0; + } +out: + return ret; +} + +static long io_get_rate(struct clk *clk) +{ + long rate = clk->parent->rate * 18; + int mask = 0x1f; + + rate /= (__raw_readl(clk->scale_reg) >> clk->scale_shift) & mask; + clk->rate = rate; + + return rate; +} + +static long per_get_rate(struct clk *clk) +{ + long rate = clk->parent->rate; + long div; + const int mask = 0xff; + + if (clk->enable_reg && + !(__raw_readl(clk->enable_reg) & clk->enable_shift)) + clk->rate = 0; + else { + div = (__raw_readl(clk->scale_reg) >> clk->scale_shift) & mask; + if (div) + rate /= div; + clk->rate = rate; + } + + return clk->rate; +} + +static int per_set_rate(struct clk *clk, u32 rate) +{ + int ret = -EINVAL; + int div = (clk->parent->rate + rate - 1) / rate; + u32 reg_frac; + const int mask = 0xff; + int try = 10; + int i = -1; + + if (div == 0 || div > mask) + goto out; + + reg_frac = __raw_readl(clk->scale_reg); + reg_frac &= ~(mask << clk->scale_shift); + + while (try--) { + __raw_writel(reg_frac | (div << clk->scale_shift), + clk->scale_reg); + + if (clk->busy_reg) { + for (i = 10000; i; i--) + if (!clk_is_busy(clk)) + break; + } + if (i) + break; + } + + if (!i) + ret = -ETIMEDOUT; + else + ret = 0; + +out: + if (ret != 0) + printk(KERN_ERR "%s: error %d\n", __func__, ret); + return ret; +} + +static long lcdif_get_rate(struct clk *clk) +{ + long rate = clk->parent->rate; + long div; + const int mask = 0xff; + + div = (__raw_readl(clk->scale_reg) >> clk->scale_shift) & mask; + if (div) { + rate /= div; + div = (HW_CLKCTRL_FRAC_RD() & BM_CLKCTRL_FRAC_PIXFRAC) >> + BP_CLKCTRL_FRAC_PIXFRAC; + rate /= div; + } + clk->rate = rate; + + return rate; +} + +static int lcdif_set_rate(struct clk *clk, u32 rate) +{ + int ret = 0; + /* + * On 3700, we can get most timings exact by modifying ref_pix + * and the divider, but keeping the phase timings at 1 (2 + * phases per cycle). + * + * ref_pix can be between 480e6*18/35=246.9MHz and 480e6*18/18=480MHz, + * which is between 18/(18*480e6)=2.084ns and 35/(18*480e6)=4.050ns. + * + * ns_cycle >= 2*18e3/(18*480) = 25/6 + * ns_cycle <= 2*35e3/(18*480) = 875/108 + * + * Multiply the ns_cycle by 'div' to lengthen it until it fits the + * bounds. This is the divider we'll use after ref_pix. + * + * 6 * ns_cycle >= 25 * div + * 108 * ns_cycle <= 875 * div + */ + u32 ns_cycle = 1000000 / rate; + u32 div, reg_val; + u32 lowest_result = (u32) -1; + u32 lowest_div = 0, lowest_fracdiv = 0; + + for (div = 1; div < 256; ++div) { + u32 fracdiv; + u32 ps_result; + int lower_bound = 6 * ns_cycle >= 25 * div; + int upper_bound = 108 * ns_cycle <= 875 * div; + if (!lower_bound) + break; + if (!upper_bound) + continue; + /* + * Found a matching div. Calculate fractional divider needed, + * rounded up. + */ + fracdiv = ((clk->parent->rate / 1000 * 18 / 2) * + ns_cycle + 1000 * div - 1) / + (1000 * div); + if (fracdiv < 18 || fracdiv > 35) { + ret = -EINVAL; + goto out; + } + /* Calculate the actual cycle time this results in */ + ps_result = 6250 * div * fracdiv / 27; + + /* Use the fastest result that doesn't break ns_cycle */ + if (ps_result <= lowest_result) { + lowest_result = ps_result; + lowest_div = div; + lowest_fracdiv = fracdiv; + } + } + + if (div >= 256 || lowest_result == (u32) -1) { + ret = -EINVAL; + goto out; + } + pr_debug("Programming PFD=%u,DIV=%u ref_pix=%uMHz " + "PIXCLK=%uMHz cycle=%u.%03uns\n", + lowest_fracdiv, lowest_div, + 480*18/lowest_fracdiv, 480*18/lowest_fracdiv/lowest_div, + lowest_result / 1000, lowest_result % 1000); + + /* Program ref_pix phase fractional divider */ + HW_CLKCTRL_FRAC_WR((HW_CLKCTRL_FRAC_RD() & ~BM_CLKCTRL_FRAC_PIXFRAC) | + BF_CLKCTRL_FRAC_PIXFRAC(lowest_fracdiv)); + /* Ungate PFD */ + HW_CLKCTRL_FRAC_CLR(BM_CLKCTRL_FRAC_CLKGATEPIX); + + /* Program pix divider */ + reg_val = __raw_readl(clk->scale_reg); + reg_val &= ~(BM_CLKCTRL_PIX_DIV | BM_CLKCTRL_PIX_CLKGATE); + reg_val |= BF_CLKCTRL_PIX_DIV(lowest_div); + __raw_writel(reg_val, clk->scale_reg); + + /* Wait for divider update */ + if (clk->busy_reg) { + int i; + for (i = 10000; i; i--) + if (!clk_is_busy(clk)) + break; + if (!i) { + ret = -ETIMEDOUT; + goto out; + } + } + + /* Switch to ref_pix source */ + HW_CLKCTRL_CLKSEQ_CLR(BM_CLKCTRL_CLKSEQ_BYPASS_PIX); + +out: + return ret; +} + + +static int cpu_set_rate(struct clk *clk, u32 rate) +{ + if (rate < 24000) + return -EINVAL; + else if (rate == 24000) { + /* switch to the 24M source */ + clk_set_parent(clk, &osc_24M); + } else { + int i; + u32 clkctrl_cpu = 1; + u32 c = clkctrl_cpu; + u32 clkctrl_frac = 1; + u32 val; + for ( ; c < 0x40; c++) { + u32 f = (pll_clk.rate*18/c + rate/2) / rate; + int s1, s2; + + if (f < 18 || f > 35) + continue; + s1 = pll_clk.rate*18/clkctrl_frac/clkctrl_cpu - rate; + s2 = pll_clk.rate*18/c/f - rate; + pr_debug("%s: s1 %d, s2 %d\n", __func__, s1, s2); + if (abs(s1) > abs(s2)) { + clkctrl_cpu = c; + clkctrl_frac = f; + } + if (s2 == 0) + break; + }; + pr_debug("%s: clkctrl_cpu %d, clkctrl_frac %d\n", __func__, + clkctrl_cpu, clkctrl_frac); + if (c == 0x40) { + int d = pll_clk.rate*18/clkctrl_frac/clkctrl_cpu - + rate; + if (abs(d) > 100 || + clkctrl_frac < 18 || clkctrl_frac > 35) + return -EINVAL; + } + + /* 4.6.2 */ + val = __raw_readl(clk->scale_reg); + val &= ~(0x3f << clk->scale_shift); + val |= clkctrl_frac; + clk_set_parent(clk, &osc_24M); + udelay(10); + __raw_writel(val, clk->scale_reg); + /* ungate */ + __raw_writel(1<<7, clk->scale_reg + 8); + /* write clkctrl_cpu */ + clk->saved_div = clkctrl_cpu; + HW_CLKCTRL_CPU_WR((HW_CLKCTRL_CPU_RD() & ~0x3f) | clkctrl_cpu); + for (i = 10000; i; i--) + if (!clk_is_busy(clk)) + break; + if (!i) { + printk(KERN_ERR "couldn't set up CPU divisor\n"); + return -ETIMEDOUT; + } + clk_set_parent(clk, &pll_clk); + clk->saved_div = 0; + udelay(10); + } + return 0; +} + +static long cpu_get_rate(struct clk *clk) +{ + long rate = clk->parent->rate * 18; + + rate /= (__raw_readl(clk->scale_reg) >> clk->scale_shift) & 0x3f; + rate /= HW_CLKCTRL_CPU_RD() & 0x3f; + rate = ((rate + 9) / 10) * 10; + clk->rate = rate; + + return rate; +} + +static long cpu_round_rate(struct clk *clk, u32 rate) +{ + unsigned long r = 0; + + if (rate <= 24000) + r = 24000; + else { + u32 clkctrl_cpu = 1; + u32 clkctrl_frac; + do { + clkctrl_frac = + (pll_clk.rate*18 / clkctrl_cpu + rate/2) / rate; + if (clkctrl_frac > 35) + continue; + if (pll_clk.rate*18 / clkctrl_frac / clkctrl_cpu/10 == + rate / 10) + break; + } while (pll_clk.rate / 2 >= clkctrl_cpu++ * rate); + if (pll_clk.rate / 2 < (clkctrl_cpu - 1) * rate) + clkctrl_cpu--; + pr_debug("%s: clkctrl_cpu %d, clkctrl_frac %d\n", __func__, + clkctrl_cpu, clkctrl_frac); + if (clkctrl_frac < 18) + clkctrl_frac = 18; + if (clkctrl_frac > 35) + clkctrl_frac = 35; + + r = pll_clk.rate * 18; + r /= clkctrl_frac; + r /= clkctrl_cpu; + r = 10 * ((r + 9) / 10); + } + return r; +} + +static long emi_get_rate(struct clk *clk) +{ + long rate = clk->parent->rate * 18; + + rate /= (__raw_readl(clk->scale_reg) >> clk->scale_shift) & 0x3f; + rate /= HW_CLKCTRL_EMI_RD() & 0x3f; + clk->rate = rate; + + return rate; +} + +static int clkseq_set_parent(struct clk *clk, struct clk *parent) +{ + int ret = -EINVAL; + int shift = 8; + + /* bypass? */ + if (parent == &osc_24M) + shift = 4; + + if (clk->bypass_reg) { + u32 hbus_mask = BM_CLKCTRL_HBUS_DIV_FRAC_EN | + BM_CLKCTRL_HBUS_DIV; + + if (clk == &cpu_clk && shift == 4) { + u32 hbus_val = HW_CLKCTRL_HBUS_RD(); + u32 cpu_val = HW_CLKCTRL_CPU_RD(); + hbus_val &= ~hbus_mask; + hbus_val |= 1; + clk->saved_div = cpu_val & BM_CLKCTRL_CPU_DIV_CPU; + cpu_val &= ~BM_CLKCTRL_CPU_DIV_CPU; + cpu_val |= 1; + __raw_writel(1 << clk->bypass_shift, + clk->bypass_reg + shift); + if (machine_is_stmp378x()) { + HW_CLKCTRL_HBUS_WR(hbus_val); + HW_CLKCTRL_CPU_WR(cpu_val); + hclk.rate = 0; + } + } else if (clk == &cpu_clk && shift == 8) { + u32 hbus_val = HW_CLKCTRL_HBUS_RD(); + u32 cpu_val = HW_CLKCTRL_CPU_RD(); + hbus_val &= ~hbus_mask; + hbus_val |= 2; + cpu_val &= ~BM_CLKCTRL_CPU_DIV_CPU; + if (clk->saved_div) + cpu_val |= clk->saved_div; + else + cpu_val |= 2; + if (machine_is_stmp378x()) { + HW_CLKCTRL_HBUS_WR(hbus_val); + HW_CLKCTRL_CPU_WR(cpu_val); + hclk.rate = 0; + } + __raw_writel(1 << clk->bypass_shift, + clk->bypass_reg + shift); + } else + __raw_writel(1 << clk->bypass_shift, + clk->bypass_reg + shift); + + ret = 0; + } + + return ret; +} + +static int hbus_set_rate(struct clk *clk, u32 rate) +{ + u8 div = 0; + int is_frac = 0; + u32 clkctrl_hbus; + struct clk *parent = clk->parent; + + pr_debug("%s: rate %d, parent rate %d\n", __func__, rate, + parent->rate); + + if (rate > parent->rate) + return -EINVAL; + + if (((parent->rate + rate/2) / rate) * rate != parent->rate && + parent->rate / rate < 32) { + pr_debug("%s: switching to fractional mode\n", __func__); + is_frac = 1; + } + + if (is_frac) + div = (32 * rate + parent->rate / 2) / parent->rate; + else + div = (parent->rate + rate - 1) / rate; + pr_debug("%s: div calculated is %d\n", __func__, div); + if (!div || div > 0x1f) + return -EINVAL; + + clk_set_parent(&cpu_clk, &osc_24M); + udelay(10); + clkctrl_hbus = __raw_readl(clk->scale_reg); + clkctrl_hbus &= ~0x3f; + clkctrl_hbus |= div; + clkctrl_hbus |= (is_frac << 5); + + __raw_writel(clkctrl_hbus, clk->scale_reg); + if (clk->busy_reg) { + int i; + for (i = 10000; i; i--) + if (!clk_is_busy(clk)) + break; + if (!i) { + printk(KERN_ERR "couldn't set up CPU divisor\n"); + return -ETIMEDOUT; + } + } + clk_set_parent(&cpu_clk, &pll_clk); + __raw_writel(clkctrl_hbus, clk->scale_reg); + udelay(10); + return 0; +} + +static long hbus_get_rate(struct clk *clk) +{ + long rate = clk->parent->rate; + + if (__raw_readl(clk->scale_reg) & 0x20) { + rate *= __raw_readl(clk->scale_reg) & 0x1f; + rate /= 32; + } else + rate /= __raw_readl(clk->scale_reg) & 0x1f; + clk->rate = rate; + + return rate; +} + +static int xbus_set_rate(struct clk *clk, u32 rate) +{ + u16 div = 0; + u32 clkctrl_xbus; + + pr_debug("%s: rate %d, parent rate %d\n", __func__, rate, + clk->parent->rate); + + div = (clk->parent->rate + rate - 1) / rate; + pr_debug("%s: div calculated is %d\n", __func__, div); + if (!div || div > 0x3ff) + return -EINVAL; + + clkctrl_xbus = __raw_readl(clk->scale_reg); + clkctrl_xbus &= ~0x3ff; + clkctrl_xbus |= div; + __raw_writel(clkctrl_xbus, clk->scale_reg); + if (clk->busy_reg) { + int i; + for (i = 10000; i; i--) + if (!clk_is_busy(clk)) + break; + if (!i) { + printk(KERN_ERR "couldn't set up xbus divisor\n"); + return -ETIMEDOUT; + } + } + return 0; +} + +static long xbus_get_rate(struct clk *clk) +{ + long rate = clk->parent->rate; + + rate /= __raw_readl(clk->scale_reg) & 0x3ff; + clk->rate = rate; + + return rate; +} + + +/* Clock ops */ + +static struct clk_ops std_ops = { + .enable = std_clk_enable, + .disable = std_clk_disable, + .get_rate = per_get_rate, + .set_rate = per_set_rate, + .set_parent = clkseq_set_parent, +}; + +static struct clk_ops min_ops = { + .enable = std_clk_enable, + .disable = std_clk_disable, +}; + +static struct clk_ops cpu_ops = { + .enable = std_clk_enable, + .disable = std_clk_disable, + .get_rate = cpu_get_rate, + .set_rate = cpu_set_rate, + .round_rate = cpu_round_rate, + .set_parent = clkseq_set_parent, +}; + +static struct clk_ops io_ops = { + .enable = std_clk_enable, + .disable = std_clk_disable, + .get_rate = io_get_rate, + .set_rate = io_set_rate, +}; + +static struct clk_ops hbus_ops = { + .get_rate = hbus_get_rate, + .set_rate = hbus_set_rate, +}; + +static struct clk_ops xbus_ops = { + .get_rate = xbus_get_rate, + .set_rate = xbus_set_rate, +}; + +static struct clk_ops lcdif_ops = { + .enable = std_clk_enable, + .disable = std_clk_disable, + .get_rate = lcdif_get_rate, + .set_rate = lcdif_set_rate, + .set_parent = clkseq_set_parent, +}; + +static struct clk_ops emi_ops = { + .get_rate = emi_get_rate, +}; + +/* List of on-chip clocks */ + +static struct clk osc_24M = { + .flags = FIXED_RATE | ENABLED, + .rate = 24000, +}; + +static struct clk pll_clk = { + .parent = &osc_24M, + .enable_reg = HW_CLKCTRL_PLLCTRL0_ADDR, + .enable_shift = 16, + .enable_wait = 10, + .flags = FIXED_RATE | ENABLED, + .rate = 480000, + .ops = &min_ops, +}; + +static struct clk cpu_clk = { + .parent = &pll_clk, + .scale_reg = HW_CLKCTRL_FRAC_ADDR, + .scale_shift = 0, + .bypass_reg = HW_CLKCTRL_CLKSEQ_ADDR, + .bypass_shift = 7, + .busy_reg = HW_CLKCTRL_CPU_ADDR, + .busy_bit = 28, + .flags = RATE_PROPAGATES | ENABLED, + .ops = &cpu_ops, +}; + +static struct clk io_clk = { + .parent = &pll_clk, + .enable_reg = HW_CLKCTRL_FRAC_ADDR, + .enable_shift = 31, + .enable_negate = 1, + .scale_reg = HW_CLKCTRL_FRAC_ADDR, + .scale_shift = 24, + .flags = RATE_PROPAGATES | ENABLED, + .ops = &io_ops, +}; + +static struct clk hclk = { + .parent = &cpu_clk, + .scale_reg = HW_CLKCTRL_HBUS_ADDR, + .bypass_reg = HW_CLKCTRL_CLKSEQ_ADDR, + .bypass_shift = 7, + .busy_reg = HW_CLKCTRL_HBUS_ADDR, + .busy_bit = 29, + .flags = RATE_PROPAGATES | ENABLED, + .ops = &hbus_ops, +}; + +static struct clk xclk = { + .parent = &osc_24M, + .scale_reg = HW_CLKCTRL_XBUS_ADDR, + .busy_reg = HW_CLKCTRL_XBUS_ADDR, + .busy_bit = 31, + .flags = RATE_PROPAGATES | ENABLED, + .ops = &xbus_ops, +}; + +static struct clk uart_clk = { + .parent = &xclk, + .enable_reg = HW_CLKCTRL_XTAL_ADDR, + .enable_shift = 31, + .enable_negate = 1, + .flags = ENABLED, + .ops = &min_ops, +}; + +static struct clk audio_clk = { + .parent = &xclk, + .enable_reg = HW_CLKCTRL_XTAL_ADDR, + .enable_shift = 30, + .enable_negate = 1, + .ops = &min_ops, +}; + +static struct clk pwm_clk = { + .parent = &xclk, + .enable_reg = HW_CLKCTRL_XTAL_ADDR, + .enable_shift = 29, + .enable_negate = 1, + .ops = &min_ops, +}; + +static struct clk dri_clk = { + .parent = &xclk, + .enable_reg = HW_CLKCTRL_XTAL_ADDR, + .enable_shift = 28, + .enable_negate = 1, + .ops = &min_ops, +}; + +static struct clk digctl_clk = { + .parent = &xclk, + .enable_reg = HW_CLKCTRL_XTAL_ADDR, + .enable_shift = 27, + .enable_negate = 1, + .ops = &min_ops, +}; + +static struct clk timer_clk = { + .parent = &xclk, + .enable_reg = HW_CLKCTRL_XTAL_ADDR, + .enable_shift = 26, + .enable_negate = 1, + .flags = ENABLED, + .ops = &min_ops, +}; + +static struct clk lcdif_clk = { + .parent = &pll_clk, + .scale_reg = HW_CLKCTRL_PIX_ADDR, + .busy_reg = HW_CLKCTRL_PIX_ADDR, + .busy_bit = 29, + .enable_reg = HW_CLKCTRL_PIX_ADDR, + .enable_shift = 31, + .enable_negate = 1, + .bypass_reg = HW_CLKCTRL_CLKSEQ_ADDR, + .bypass_shift = 1, + .flags = NEEDS_SET_PARENT, + .ops = &lcdif_ops, +}; + +static struct clk ssp_clk = { + .parent = &io_clk, + .scale_reg = HW_CLKCTRL_SSP_ADDR, + .busy_reg = HW_CLKCTRL_SSP_ADDR, + .busy_bit = 29, + .enable_reg = HW_CLKCTRL_SSP_ADDR, + .enable_shift = 31, + .bypass_reg = HW_CLKCTRL_CLKSEQ_ADDR, + .bypass_shift = 5, + .enable_negate = 1, + .flags = NEEDS_SET_PARENT, + .ops = &std_ops, +}; + +static struct clk gpmi_clk = { + .parent = &io_clk, + .scale_reg = HW_CLKCTRL_GPMI_ADDR, + .busy_reg = HW_CLKCTRL_GPMI_ADDR, + .busy_bit = 29, + .enable_reg = HW_CLKCTRL_GPMI_ADDR, + .enable_shift = 31, + .enable_negate = 1, + .bypass_reg = HW_CLKCTRL_CLKSEQ_ADDR, + .bypass_shift = 4, + .flags = NEEDS_SET_PARENT, + .ops = &std_ops, +}; + +static struct clk spdif_clk = { + .parent = &pll_clk, + .enable_reg = HW_CLKCTRL_SPDIF_ADDR, + .enable_shift = 31, + .enable_negate = 1, + .ops = &min_ops, +}; + +static struct clk emi_clk = { + .parent = &pll_clk, + .enable_reg = HW_CLKCTRL_EMI_ADDR, + .enable_shift = 31, + .enable_negate = 1, + .scale_reg = HW_CLKCTRL_FRAC_ADDR, + .scale_shift = 8, + .busy_reg = HW_CLKCTRL_EMI_ADDR, + .busy_bit = 28, + .bypass_reg = HW_CLKCTRL_CLKSEQ_ADDR, + .bypass_shift = 6, + .flags = ENABLED, + .ops = &emi_ops, +}; + +static struct clk ir_clk = { + .parent = &io_clk, + .enable_reg = HW_CLKCTRL_IR_ADDR, + .enable_shift = 31, + .enable_negate = 1, + .bypass_reg = HW_CLKCTRL_CLKSEQ_ADDR, + .bypass_shift = 3, + .ops = &min_ops, +}; + +static struct clk saif_clk = { + .parent = &pll_clk, + .scale_reg = HW_CLKCTRL_SAIF_ADDR, + .busy_reg = HW_CLKCTRL_SAIF_ADDR, + .busy_bit = 29, + .enable_reg = HW_CLKCTRL_SAIF_ADDR, + .enable_shift = 31, + .enable_negate = 1, + .bypass_reg = HW_CLKCTRL_CLKSEQ_ADDR, + .bypass_shift = 0, + .ops = &std_ops, +}; + +static struct clk usb_clk = { + .parent = &pll_clk, + .enable_reg = HW_CLKCTRL_PLLCTRL0_ADDR, + .enable_shift = 18, + .enable_negate = 1, + .ops = &min_ops, +}; + +/* list of all the clocks */ +static __initdata struct clk_lookup onchip_clks[] = { + { + .con_id = "osc_24M", + .clk = &osc_24M, + }, { + .con_id = "pll", + .clk = &pll_clk, + }, { + .con_id = "cpu", + .clk = &cpu_clk, + }, { + .con_id = "hclk", + .clk = &hclk, + }, { + .con_id = "xclk", + .clk = &xclk, + }, { + .con_id = "io", + .clk = &io_clk, + }, { + .con_id = "uart", + .clk = &uart_clk, + }, { + .con_id = "audio", + .clk = &audio_clk, + }, { + .con_id = "pwm", + .clk = &pwm_clk, + }, { + .con_id = "dri", + .clk = &dri_clk, + }, { + .con_id = "digctl", + .clk = &digctl_clk, + }, { + .con_id = "timer", + .clk = &timer_clk, + }, { + .con_id = "lcdif", + .clk = &lcdif_clk, + }, { + .con_id = "ssp", + .clk = &ssp_clk, + }, { + .con_id = "gpmi", + .clk = &gpmi_clk, + }, { + .con_id = "spdif", + .clk = &spdif_clk, + }, { + .con_id = "emi", + .clk = &emi_clk, + }, { + .con_id = "ir", + .clk = &ir_clk, + }, { + .con_id = "saif", + .clk = &saif_clk, + }, { + .con_id = "usb", + .clk = &usb_clk, + }, +}; + +static int __init propagate_rate(struct clk *clk) +{ + struct clk_lookup *cl; + + for (cl = onchip_clks; cl < onchip_clks + ARRAY_SIZE(onchip_clks); + cl++) { + if (unlikely(!clk_good(cl->clk))) + continue; + if (cl->clk->parent == clk && cl->clk->ops->get_rate) { + cl->clk->ops->get_rate(cl->clk); + if (cl->clk->flags & RATE_PROPAGATES) + propagate_rate(cl->clk); + } + } + + return 0; +} + +/* Exported API */ +unsigned long clk_get_rate(struct clk *clk) +{ + if (unlikely(!clk_good(clk))) + return 0; + + if (clk->rate != 0) + return clk->rate; + + if (clk->ops->get_rate != NULL) + return clk->ops->get_rate(clk); + + return clk_get_rate(clk->parent); +} +EXPORT_SYMBOL(clk_get_rate); + +long clk_round_rate(struct clk *clk, unsigned long rate) +{ + if (unlikely(!clk_good(clk))) + return 0; + + if (clk->ops->round_rate) + return clk->ops->round_rate(clk, rate); + + return 0; +} +EXPORT_SYMBOL(clk_round_rate); + +static inline int close_enough(long rate1, long rate2) +{ + return rate1 && !((rate2 - rate1) * 1000 / rate1); +} + +int clk_set_rate(struct clk *clk, unsigned long rate) +{ + int ret = -EINVAL; + + if (unlikely(!clk_good(clk))) + goto out; + + if (clk->flags & FIXED_RATE || !clk->ops->set_rate) + goto out; + + else if (!close_enough(clk->rate, rate)) { + ret = clk->ops->set_rate(clk, rate); + if (ret < 0) + goto out; + clk->rate = rate; + if (clk->flags & RATE_PROPAGATES) + propagate_rate(clk); + } else + ret = 0; + +out: + return ret; +} +EXPORT_SYMBOL(clk_set_rate); + +int clk_enable(struct clk *clk) +{ + unsigned long clocks_flags; + + if (unlikely(!clk_good(clk))) + return -EINVAL; + + if (clk->parent) + clk_enable(clk->parent); + + spin_lock_irqsave(&clocks_lock, clocks_flags); + + clk->usage++; + if (clk->ops && clk->ops->enable) + clk->ops->enable(clk); + + spin_unlock_irqrestore(&clocks_lock, clocks_flags); + return 0; +} +EXPORT_SYMBOL(clk_enable); + +static void local_clk_disable(struct clk *clk) +{ + if (unlikely(!clk_good(clk))) + return; + + if (clk->usage == 0 && clk->ops->disable) + clk->ops->disable(clk); + + if (clk->parent) + local_clk_disable(clk->parent); +} + +void clk_disable(struct clk *clk) +{ + unsigned long clocks_flags; + + if (unlikely(!clk_good(clk))) + return; + + spin_lock_irqsave(&clocks_lock, clocks_flags); + + if ((--clk->usage) == 0 && clk->ops->disable) + clk->ops->disable(clk); + + spin_unlock_irqrestore(&clocks_lock, clocks_flags); + if (clk->parent) + clk_disable(clk->parent); +} +EXPORT_SYMBOL(clk_disable); + +/* Some additional API */ +int clk_set_parent(struct clk *clk, struct clk *parent) +{ + int ret = -ENODEV; + unsigned long clocks_flags; + + if (unlikely(!clk_good(clk))) + goto out; + + if (!clk->ops->set_parent) + goto out; + + spin_lock_irqsave(&clocks_lock, clocks_flags); + + ret = clk->ops->set_parent(clk, parent); + if (!ret) { + /* disable if usage count is 0 */ + local_clk_disable(parent); + + parent->usage += clk->usage; + clk->parent->usage -= clk->usage; + + /* disable if new usage count is 0 */ + local_clk_disable(clk->parent); + + clk->parent = parent; + } + spin_unlock_irqrestore(&clocks_lock, clocks_flags); + +out: + return ret; +} +EXPORT_SYMBOL(clk_set_parent); + +struct clk *clk_get_parent(struct clk *clk) +{ + if (unlikely(!clk_good(clk))) + return NULL; + return clk->parent; +} +EXPORT_SYMBOL(clk_get_parent); + +static int __init clk_init(void) +{ + struct clk_lookup *cl; + struct clk_ops *ops; + + spin_lock_init(&clocks_lock); + + for (cl = onchip_clks; cl < onchip_clks + ARRAY_SIZE(onchip_clks); + cl++) { + if (cl->clk->flags & ENABLED) + clk_enable(cl->clk); + else + local_clk_disable(cl->clk); + + ops = cl->clk->ops; + + if ((cl->clk->flags & NEEDS_INITIALIZATION) && + ops && ops->set_rate) + ops->set_rate(cl->clk, cl->clk->rate); + + if (cl->clk->flags & FIXED_RATE) { + if (cl->clk->flags & RATE_PROPAGATES) + propagate_rate(cl->clk); + } else { + if (ops && ops->get_rate) + ops->get_rate(cl->clk); + } + + if (cl->clk->flags & NEEDS_SET_PARENT) { + if (ops && ops->set_parent) + ops->set_parent(cl->clk, cl->clk->parent); + } + + clkdev_add(cl); + } + return 0; +} + +arch_initcall(clk_init); diff --git a/arch/arm/plat-stmp3xxx/clock.h b/arch/arm/plat-stmp3xxx/clock.h new file mode 100644 index 0000000..a6611e1 --- /dev/null +++ b/arch/arm/plat-stmp3xxx/clock.h @@ -0,0 +1,61 @@ +/* + * Clock control driver for Freescale STMP37XX/STMP378X - internal header file + * + * Author: Vitaly Wool + * + * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + */ + +/* + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ +#ifndef __ARCH_ARM_STMX3XXX_CLOCK_H__ +#define __ARCH_ARM_STMX3XXX_CLOCK_H__ + +#ifndef __ASSEMBLER__ + +struct clk_ops { + int (*enable) (struct clk *); + int (*disable) (struct clk *); + long (*get_rate) (struct clk *); + long (*round_rate) (struct clk *, u32); + int (*set_rate) (struct clk *, u32); + int (*set_parent) (struct clk *, struct clk *); +}; + +struct clk { + struct clk *parent; + u32 rate; + u32 flags; + u8 scale_shift; + u8 enable_shift; + u8 bypass_shift; + u8 busy_bit; + s8 usage; + int enable_wait; + int enable_negate; + u32 saved_div; + void __iomem *enable_reg; + void __iomem *scale_reg; + void __iomem *bypass_reg; + void __iomem *busy_reg; + struct clk_ops *ops; +}; + +#endif /* __ASSEMBLER__ */ + +/* Flags */ +#define RATE_PROPAGATES (1<<0) +#define NEEDS_INITIALIZATION (1<<1) +#define PARENT_SET_RATE (1<<2) +#define FIXED_RATE (1<<3) +#define ENABLED (1<<4) +#define NEEDS_SET_PARENT (1<<5) + +#endif diff --git a/arch/arm/plat-stmp3xxx/core.c b/arch/arm/plat-stmp3xxx/core.c new file mode 100644 index 0000000..6e2fef16 --- /dev/null +++ b/arch/arm/plat-stmp3xxx/core.c @@ -0,0 +1,127 @@ +/* + * Freescale STMP37XX/STMP378X core routines + * + * Embedded Alley Solutions, Inc + * + * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + */ + +/* + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ +#include +#include +#include + +#include +#include +#include + +static int __stmp3xxx_reset_block(void __iomem *hwreg, int just_enable) +{ + u32 c; + int timeout; + + /* the process of software reset of IP block is done + in several steps: + + - clear SFTRST and wait for block is enabled; + - clear clock gating (CLKGATE bit); + - set the SFTRST again and wait for block is in reset; + - clear SFTRST and wait for reset completion. + */ + c = __raw_readl(hwreg); + c &= ~(1<<31); /* clear SFTRST */ + __raw_writel(c, hwreg); + for (timeout = 1000000; timeout > 0; timeout--) + /* still in SFTRST state ? */ + if ((__raw_readl(hwreg) & (1<<31)) == 0) + break; + if (timeout <= 0) { + printk(KERN_ERR"%s(%p): timeout when enabling\n", + __func__, hwreg); + return -ETIME; + } + + c = __raw_readl(hwreg); + c &= ~(1<<30); /* clear CLKGATE */ + __raw_writel(c, hwreg); + + if (!just_enable) { + c = __raw_readl(hwreg); + c |= (1<<31); /* now again set SFTRST */ + __raw_writel(c, hwreg); + for (timeout = 1000000; timeout > 0; timeout--) + /* poll until CLKGATE set */ + if (__raw_readl(hwreg) & (1<<30)) + break; + if (timeout <= 0) { + printk(KERN_ERR"%s(%p): timeout when resetting\n", + __func__, hwreg); + return -ETIME; + } + + c = __raw_readl(hwreg); + c &= ~(1<<31); /* clear SFTRST */ + __raw_writel(c, hwreg); + for (timeout = 1000000; timeout > 0; timeout--) + /* still in SFTRST state ? */ + if ((__raw_readl(hwreg) & (1<<31)) == 0) + break; + if (timeout <= 0) { + printk(KERN_ERR"%s(%p): timeout when enabling " + "after reset\n", __func__, hwreg); + return -ETIME; + } + + c = __raw_readl(hwreg); + c &= ~(1<<30); /* clear CLKGATE */ + __raw_writel(c, hwreg); + } + for (timeout = 1000000; timeout > 0; timeout--) + /* still in SFTRST state ? */ + if ((__raw_readl(hwreg) & (1<<30)) == 0) + break; + + if (timeout <= 0) { + printk(KERN_ERR"%s(%p): timeout when unclockgating\n", + __func__, hwreg); + return -ETIME; + } + + return 0; +} + +int stmp3xxx_reset_block(void __iomem *hwreg, int just_enable) +{ + int try = 10; + int r; + + while (try--) { + r = __stmp3xxx_reset_block(hwreg, just_enable); + if (!r) + break; + pr_debug("%s: try %d failed\n", __func__, 10 - try); + } + return r; +} +EXPORT_SYMBOL(stmp3xxx_reset_block); + +struct platform_device stmp3xxx_dbguart = { + .name = "stmp3xxx-dbguart", + .id = -1, +}; + +void __init stmp3xxx_init(void) +{ + /* Turn off auto-slow and other tricks */ + HW_CLKCTRL_HBUS_CLR(0x07f00000U); + + stmp3xxx_dma_init(); +} diff --git a/arch/arm/plat-stmp3xxx/dma.c b/arch/arm/plat-stmp3xxx/dma.c new file mode 100644 index 0000000..cf42de0 --- /dev/null +++ b/arch/arm/plat-stmp3xxx/dma.c @@ -0,0 +1,462 @@ +/* + * DMA helper routines for Freescale STMP37XX/STMP378X + * + * Author: dmitry pervushin + * + * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + */ + +/* + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +static const size_t pool_item_size = sizeof(struct stmp3xxx_dma_command); +static const size_t pool_alignment = 8; +static struct stmp3xxx_dma_user { + void *pool; + int inuse; + const char *name; +} channels[MAX_DMA_CHANNELS]; + +static inline int dmach(int ch) +{ + return ch % 16; +} + +static inline int dmabus(int ch) +{ + return ch / 16; +} + +#define IS_VALID_CHANNEL(ch) ((ch) >= 0 && (ch) < MAX_DMA_CHANNELS) +#define IS_USED(ch) (channels[ch].inuse) + +int stmp3xxx_dma_request(int ch, struct device *dev, const char *name) +{ + struct stmp3xxx_dma_user *user; + int err = 0; + + user = channels + ch; + if (!IS_VALID_CHANNEL(ch)) { + err = -ENODEV; + goto out; + } + if (IS_USED(ch)) { + err = -EBUSY; + goto out; + } + /* Create a pool to allocate dma commands from */ + user->pool = dma_pool_create(name, dev, pool_item_size, + pool_alignment, PAGE_SIZE); + if (user->pool == NULL) { + err = -ENOMEM; + goto out; + } + user->name = name; + user->inuse++; +out: + return err; +} +EXPORT_SYMBOL(stmp3xxx_dma_request); + +int stmp3xxx_dma_release(int ch) +{ + struct stmp3xxx_dma_user *user = channels + ch; + int err = 0; + + if (!IS_VALID_CHANNEL(ch)) { + err = -ENODEV; + goto out; + } + if (!IS_USED(ch)) { + err = -EBUSY; + goto out; + } + BUG_ON(user->pool == NULL); + dma_pool_destroy(user->pool); + user->inuse--; +out: + return err; +} +EXPORT_SYMBOL(stmp3xxx_dma_release); + +int stmp3xxx_dma_read_semaphore(int channel) +{ + int sem = -1; + + switch (dmabus(channel)) { + case STMP3XXX_BUS_APBH: + sem = + (HW_APBH_CHn_SEMA_RD(dmach(channel)) & + BM_APBH_CHn_SEMA_PHORE) >> BP_APBH_CHn_SEMA_PHORE; + break; + + case STMP3XXX_BUS_APBX: + sem = + (HW_APBX_CHn_SEMA_RD(dmach(channel)) & + BM_APBX_CHn_SEMA_PHORE) >> BP_APBX_CHn_SEMA_PHORE; + break; + default: + BUG(); + } + return sem; +} +EXPORT_SYMBOL(stmp3xxx_dma_read_semaphore); + +int stmp3xxx_dma_allocate_command(int channel, + struct stmp3xxx_dma_descriptor *descriptor) +{ + struct stmp3xxx_dma_user *user = channels + channel; + int err = 0; + + if (!IS_VALID_CHANNEL(channel)) { + err = -ENODEV; + goto out; + } + if (!IS_USED(channel)) { + err = -EBUSY; + goto out; + } + if (descriptor == NULL) { + err = -EINVAL; + goto out; + } + + /* Allocate memory for a command from the buffer */ + descriptor->command = + dma_pool_alloc(user->pool, GFP_KERNEL, &descriptor->handle); + + /* Check it worked */ + if (!descriptor->command) { + err = -ENOMEM; + goto out; + } + + memset(descriptor->command, 0, pool_item_size); +out: + WARN_ON(err); + return err; +} +EXPORT_SYMBOL(stmp3xxx_dma_allocate_command); + +int stmp3xxx_dma_free_command(int channel, + struct stmp3xxx_dma_descriptor *descriptor) +{ + int err = 0; + + if (!IS_VALID_CHANNEL(channel)) { + err = -ENODEV; + goto out; + } + if (!IS_USED(channel)) { + err = -EBUSY; + goto out; + } + + /* Return the command memory to the pool */ + dma_pool_free(channels[channel].pool, descriptor->command, + descriptor->handle); + + /* Initialise descriptor so we're not tempted to use it */ + descriptor->command = NULL; + descriptor->handle = 0; + descriptor->virtual_buf_ptr = NULL; + descriptor->next_descr = NULL; + + WARN_ON(err); +out: + return err; +} +EXPORT_SYMBOL(stmp3xxx_dma_free_command); + +void stmp3xxx_dma_go(int channel, + struct stmp3xxx_dma_descriptor *head, u32 semaphore) +{ + int ch = dmach(channel); + + switch (dmabus(channel)) { + case STMP3XXX_BUS_APBH: + /* Set next command */ + HW_APBH_CHn_NXTCMDAR_WR(ch, head->handle); + /* Set counting semaphore (kicks off transfer). Assumes + peripheral has been set up correctly */ + HW_APBH_CHn_SEMA_WR(ch, semaphore); + break; + + case STMP3XXX_BUS_APBX: + /* Set next command */ + HW_APBX_CHn_NXTCMDAR_WR(ch, head->handle); + /* Set counting semaphore (kicks off transfer). Assumes + peripheral has been set up correctly */ + HW_APBX_CHn_SEMA_WR(ch, semaphore); + break; + } +} +EXPORT_SYMBOL(stmp3xxx_dma_go); + +int stmp3xxx_dma_running(int channel) +{ + switch (dmabus(channel)) { + case STMP3XXX_BUS_APBH: + return HW_APBH_CHn_SEMA_RD(dmach(channel)) & + BM_APBH_CHn_SEMA_PHORE; + + case STMP3XXX_BUS_APBX: + return HW_APBX_CHn_SEMA_RD(dmach(channel)) & + BM_APBX_CHn_SEMA_PHORE; + + default: + BUG(); + return 0; + } +} +EXPORT_SYMBOL(stmp3xxx_dma_running); + +/* + * Circular dma chain management + */ +void stmp3xxx_dma_free_chain(struct stmp37xx_circ_dma_chain *chain) +{ + int i; + + for (i = 0; i < chain->total_count; i++) + stmp3xxx_dma_free_command( + STMP3xxx_DMA(chain->channel, chain->bus), + &chain->chain[i]); +} +EXPORT_SYMBOL(stmp3xxx_dma_free_chain); + +int stmp3xxx_dma_make_chain(int ch, struct stmp37xx_circ_dma_chain *chain, + struct stmp3xxx_dma_descriptor descriptors[], + unsigned items) +{ + int i; + int err = 0; + + if (items == 0) + return err; + + for (i = 0; i < items; i++) { + err = stmp3xxx_dma_allocate_command(ch, &descriptors[i]); + if (err) { + WARN_ON(err); + /* + * Couldn't allocate the whole chain. + * deallocate what has been allocated + */ + if (i) { + do { + stmp3xxx_dma_free_command(ch, + &descriptors + [i]); + } while (i-- >= 0); + } + return err; + } + + /* link them! */ + if (i > 0) { + descriptors[i - 1].next_descr = &descriptors[i]; + descriptors[i - 1].command->next = + descriptors[i].handle; + } + } + + /* make list circular */ + descriptors[items - 1].next_descr = &descriptors[0]; + descriptors[items - 1].command->next = descriptors[0].handle; + + chain->total_count = items; + chain->chain = descriptors; + chain->free_index = 0; + chain->active_index = 0; + chain->cooked_index = 0; + chain->free_count = items; + chain->active_count = 0; + chain->cooked_count = 0; + chain->bus = dmabus(ch); + chain->channel = dmach(ch); + return err; +} +EXPORT_SYMBOL(stmp3xxx_dma_make_chain); + +void stmp37xx_circ_clear_chain(struct stmp37xx_circ_dma_chain *chain) +{ + BUG_ON(stmp3xxx_dma_running(STMP3xxx_DMA(chain->channel, chain->bus)) > + 0); + chain->free_index = 0; + chain->active_index = 0; + chain->cooked_index = 0; + chain->free_count = chain->total_count; + chain->active_count = 0; + chain->cooked_count = 0; +} +EXPORT_SYMBOL(stmp37xx_circ_clear_chain); + +void stmp37xx_circ_advance_free(struct stmp37xx_circ_dma_chain *chain, + unsigned count) +{ + BUG_ON(chain->cooked_count < count); + + chain->cooked_count -= count; + chain->cooked_index += count; + chain->cooked_index %= chain->total_count; + chain->free_count += count; +} +EXPORT_SYMBOL(stmp37xx_circ_advance_free); + +void stmp37xx_circ_advance_active(struct stmp37xx_circ_dma_chain *chain, + unsigned count) +{ + BUG_ON(chain->free_count < count); + + chain->free_count -= count; + chain->free_index += count; + chain->free_index %= chain->total_count; + chain->active_count += count; + + switch (chain->bus) { + case STMP3XXX_BUS_APBH: + /* Set counting semaphore (kicks off transfer). Assumes + peripheral has been set up correctly */ + HW_APBH_CHn_SEMA_CLR(chain->channel, + BM_APBH_CHn_SEMA_INCREMENT_SEMA); + HW_APBH_CHn_SEMA_SET(chain->channel, + BF_APBH_CHn_SEMA_INCREMENT_SEMA(count)); + break; + + case STMP3XXX_BUS_APBX: + /* Set counting semaphore (kicks off transfer). Assumes + peripheral has been set up correctly */ + HW_APBX_CHn_SEMA_CLR(chain->channel, + BM_APBX_CHn_SEMA_INCREMENT_SEMA); + HW_APBX_CHn_SEMA_SET(chain->channel, + BF_APBX_CHn_SEMA_INCREMENT_SEMA(count)); + break; + + default: + BUG(); + } +} +EXPORT_SYMBOL(stmp37xx_circ_advance_active); + +unsigned stmp37xx_circ_advance_cooked(struct stmp37xx_circ_dma_chain *chain) +{ + unsigned cooked; + + cooked = chain->active_count - + stmp3xxx_dma_read_semaphore(STMP3xxx_DMA(chain->channel, chain->bus)); + + chain->active_count -= cooked; + chain->active_index += cooked; + chain->active_index %= chain->total_count; + + chain->cooked_count += cooked; + + return cooked; +} +EXPORT_SYMBOL(stmp37xx_circ_advance_cooked); + +void stmp3xxx_dma_set_alt_target(int channel, int function) +{ +#if defined(CONFIG_ARCH_STMP37XX) + unsigned bits = 4; +#elif defined(CONFIG_ARCH_STMP378X) + unsigned bits = 2; +#else +#error wrong arch +#endif + int shift = dmach(channel) * bits; + unsigned mask = (1<= (1< + * + * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + */ + +/* + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ +#include +#include +#include +#include +#include + +#include +#include + +void __init stmp3xxx_init_irq(struct irq_chip *chip) +{ + unsigned int i; + + /* Reset the interrupt controller */ + HW_ICOLL_CTRL_CLR(BM_ICOLL_CTRL_CLKGATE); + udelay(10); + HW_ICOLL_CTRL_CLR(BM_ICOLL_CTRL_SFTRST); + udelay(10); + HW_ICOLL_CTRL_SET(BM_ICOLL_CTRL_SFTRST); + while (!(HW_ICOLL_CTRL_RD() & BM_ICOLL_CTRL_CLKGATE)) + continue; + HW_ICOLL_CTRL_CLR(BM_ICOLL_CTRL_SFTRST | BM_ICOLL_CTRL_CLKGATE); + + /* Disable all interrupts initially */ + for (i = 0; i < NR_REAL_IRQS; i++) { + chip->mask(i); + set_irq_chip(i, chip); + set_irq_handler(i, handle_level_irq); + set_irq_flags(i, IRQF_VALID | IRQF_PROBE); + } + + /* Ensure vector is cleared */ + HW_ICOLL_LEVELACK_WR(1); + HW_ICOLL_LEVELACK_WR(2); + HW_ICOLL_LEVELACK_WR(4); + HW_ICOLL_LEVELACK_WR(8); + + HW_ICOLL_VECTOR_WR(0); + /* Barrier */ + (void) HW_ICOLL_STAT_RD(); +} + diff --git a/arch/arm/plat-stmp3xxx/pinmux.c b/arch/arm/plat-stmp3xxx/pinmux.c new file mode 100644 index 0000000..9b28cc8 --- /dev/null +++ b/arch/arm/plat-stmp3xxx/pinmux.c @@ -0,0 +1,545 @@ +/* + * Freescale STMP378X/STMP378X Pin Multiplexing + * + * Author: Vladislav Buzov + * + * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + */ + +/* + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#define NR_BANKS ARRAY_SIZE(pinmux_banks) +static struct stmp3xxx_pinmux_bank pinmux_banks[] = { + [0] = { + .hw_muxsel = { + HW_PINCTRL_MUXSEL0_ADDR, + HW_PINCTRL_MUXSEL1_ADDR + }, + .hw_drive = { + HW_PINCTRL_DRIVE0_ADDR, + HW_PINCTRL_DRIVE1_ADDR, + HW_PINCTRL_DRIVE2_ADDR, + HW_PINCTRL_DRIVE3_ADDR + }, + .hw_pull = HW_PINCTRL_PULL0_ADDR, + .functions = { 0x0, 0x1, 0x2, 0x3 }, + .strengths = { 0x0, 0x1, 0x2, 0x3, 0xff }, + + .hw_gpio_read = HW_PINCTRL_DIN0_ADDR, + .hw_gpio_set = HW_PINCTRL_DOUT0_ADDR + HW_STMP3xxx_SET, + .hw_gpio_clr = HW_PINCTRL_DOUT0_ADDR + HW_STMP3xxx_CLR, + .hw_gpio_doe = HW_PINCTRL_DOE0_ADDR, + .irq = IRQ_GPIO0, + + .pin2irq = HW_PINCTRL_PIN2IRQ0_ADDR, + .irqstat = HW_PINCTRL_IRQSTAT0_ADDR, + .irqlevel = HW_PINCTRL_IRQLEVEL0_ADDR, + .irqpolarity = HW_PINCTRL_IRQPOL0_ADDR, + .irqen = HW_PINCTRL_IRQEN0_ADDR, + }, + [1] = { + .hw_muxsel = { + HW_PINCTRL_MUXSEL2_ADDR, + HW_PINCTRL_MUXSEL3_ADDR + }, + .hw_drive = { + HW_PINCTRL_DRIVE4_ADDR, + HW_PINCTRL_DRIVE5_ADDR, + HW_PINCTRL_DRIVE6_ADDR, + HW_PINCTRL_DRIVE7_ADDR + }, + .hw_pull = HW_PINCTRL_PULL1_ADDR, + .functions = { 0x0, 0x1, 0x2, 0x3 }, + .strengths = { 0x0, 0x1, 0x2, 0x3, 0xff }, + + .hw_gpio_read = HW_PINCTRL_DIN1_ADDR, + .hw_gpio_set = HW_PINCTRL_DOUT1_ADDR + HW_STMP3xxx_SET, + .hw_gpio_clr = HW_PINCTRL_DOUT1_ADDR + HW_STMP3xxx_CLR, + .hw_gpio_doe = HW_PINCTRL_DOE1_ADDR, + .irq = IRQ_GPIO1, + + .pin2irq = HW_PINCTRL_PIN2IRQ1_ADDR, + .irqstat = HW_PINCTRL_IRQSTAT1_ADDR, + .irqlevel = HW_PINCTRL_IRQLEVEL1_ADDR, + .irqpolarity = HW_PINCTRL_IRQPOL1_ADDR, + .irqen = HW_PINCTRL_IRQEN1_ADDR, + }, + [2] = { + .hw_muxsel = { + HW_PINCTRL_MUXSEL4_ADDR, + HW_PINCTRL_MUXSEL5_ADDR, + }, + .hw_drive = { + HW_PINCTRL_DRIVE8_ADDR, + HW_PINCTRL_DRIVE9_ADDR, + HW_PINCTRL_DRIVE10_ADDR, + HW_PINCTRL_DRIVE11_ADDR, + }, + .hw_pull = HW_PINCTRL_PULL2_ADDR, + .functions = { 0x0, 0x1, 0x2, 0x3 }, + .strengths = { 0x0, 0x1, 0x2, 0x1, 0x2 }, + + .hw_gpio_read = HW_PINCTRL_DIN2_ADDR, + .hw_gpio_set = HW_PINCTRL_DOUT2_ADDR + HW_STMP3xxx_SET, + .hw_gpio_clr = HW_PINCTRL_DOUT2_ADDR + HW_STMP3xxx_CLR, + .hw_gpio_doe = HW_PINCTRL_DOE2_ADDR, + .irq = IRQ_GPIO2, + + .pin2irq = HW_PINCTRL_PIN2IRQ2_ADDR, + .irqstat = HW_PINCTRL_IRQSTAT2_ADDR, + .irqlevel = HW_PINCTRL_IRQLEVEL2_ADDR, + .irqpolarity = HW_PINCTRL_IRQPOL2_ADDR, + .irqen = HW_PINCTRL_IRQEN2_ADDR, + }, + [3] = { + .hw_muxsel = { + HW_PINCTRL_MUXSEL6_ADDR, + HW_PINCTRL_MUXSEL7_ADDR, + }, + .hw_drive = { + HW_PINCTRL_DRIVE12_ADDR, + HW_PINCTRL_DRIVE13_ADDR, + HW_PINCTRL_DRIVE14_ADDR, + NULL, + }, + .hw_pull = HW_PINCTRL_PULL3_ADDR, + .functions = {0x0, 0x1, 0x2, 0x3}, + .strengths = {0x0, 0x1, 0x2, 0x3, 0xff}, + }, +}; + +static inline struct stmp3xxx_pinmux_bank * +stmp3xxx_pinmux_bank(unsigned id, unsigned *bank, unsigned *pin) +{ + unsigned b, p; + + b = STMP3XXX_PINID_TO_BANK(id); + p = STMP3XXX_PINID_TO_PINNUM(id); + BUG_ON(b >= NR_BANKS); + if (bank) + *bank = b; + if (pin) + *pin = p; + return &pinmux_banks[b]; +} + +/* Check if requested pin is owned by caller */ +static int stmp3xxx_check_pin(unsigned id, const char *label) +{ + unsigned pin; + struct stmp3xxx_pinmux_bank *pm = stmp3xxx_pinmux_bank(id, NULL, &pin); + + if (!test_bit(pin, &pm->pin_map)) { + printk(KERN_WARNING + "%s: Accessing free pin %x, caller %s\n", + __func__, id, label); + + return -EINVAL; + } + + if (label && pm->pin_labels[pin] && + strcmp(label, pm->pin_labels[pin])) { + printk(KERN_WARNING + "%s: Wrong pin owner %x, caller %s owner %s\n", + __func__, id, label, pm->pin_labels[pin]); + + return -EINVAL; + } + return 0; +} + +void stmp3xxx_pin_strength(unsigned id, enum pin_strength strength, + const char *label) +{ + struct stmp3xxx_pinmux_bank *pbank; + void __iomem *hwdrive; + u32 shift, val; + u32 bank, pin; + + pbank = stmp3xxx_pinmux_bank(id, &bank, &pin); + pr_debug("%s: label %s bank %d pin %d strength %d\n", __func__, label, + bank, pin, strength); + + hwdrive = pbank->hw_drive[pin / HW_DRIVE_PIN_NUM]; + shift = (pin % HW_DRIVE_PIN_NUM) * HW_DRIVE_PIN_LEN; + val = pbank->strengths[strength]; + if (val == 0xff) { + printk(KERN_WARNING + "%s: strength is not supported for bank %d, caller %s", + __func__, bank, label); + return; + } + + if (stmp3xxx_check_pin(id, label)) + return; + + pr_debug("%s: writing 0x%x to 0x%p register\n", __func__, + val << shift, hwdrive); + __raw_writel(HW_DRIVE_PINDRV_MASK << shift, hwdrive + HW_STMP3xxx_CLR); + __raw_writel(val << shift, hwdrive + HW_STMP3xxx_SET); +} + +void stmp3xxx_pin_voltage(unsigned id, enum pin_voltage voltage, + const char *label) +{ + struct stmp3xxx_pinmux_bank *pbank; + void __iomem *hwdrive; + u32 shift; + u32 bank, pin; + + pbank = stmp3xxx_pinmux_bank(id, &bank, &pin); + pr_debug("%s: label %s bank %d pin %d voltage %d\n", __func__, label, + bank, pin, voltage); + + hwdrive = pbank->hw_drive[pin / HW_DRIVE_PIN_NUM]; + shift = (pin % HW_DRIVE_PIN_NUM) * HW_DRIVE_PIN_LEN; + + if (stmp3xxx_check_pin(id, label)) + return; + + pr_debug("%s: changing 0x%x bit in 0x%p register\n", + __func__, HW_DRIVE_PINV_MASK << shift, hwdrive); + if (voltage == PIN_1_8V) + __raw_writel(HW_DRIVE_PINV_MASK << shift, + hwdrive + HW_STMP3xxx_CLR); + else + __raw_writel(HW_DRIVE_PINV_MASK << shift, + hwdrive + HW_STMP3xxx_SET); +} + +void stmp3xxx_pin_pullup(unsigned id, int enable, const char *label) +{ + struct stmp3xxx_pinmux_bank *pbank; + void __iomem *hwpull; + u32 bank, pin; + + pbank = stmp3xxx_pinmux_bank(id, &bank, &pin); + pr_debug("%s: label %s bank %d pin %d enable %d\n", __func__, label, + bank, pin, enable); + + hwpull = pbank->hw_pull; + + if (stmp3xxx_check_pin(id, label)) + return; + + pr_debug("%s: changing 0x%x bit in 0x%p register\n", + __func__, 1 << pin, hwpull); + __raw_writel(1 << pin, + hwpull + (enable ? HW_STMP3xxx_SET : HW_STMP3xxx_CLR)); +} + +int stmp3xxx_request_pin(unsigned id, enum pin_fun fun, const char *label) +{ + struct stmp3xxx_pinmux_bank *pbank; + u32 bank, pin; + int ret = 0; + + pbank = stmp3xxx_pinmux_bank(id, &bank, &pin); + pr_debug("%s: label %s bank %d pin %d fun %d\n", __func__, label, + bank, pin, fun); + + if (test_bit(pin, &pbank->pin_map)) { + printk(KERN_WARNING + "%s: CONFLICT DETECTED pin %d:%d caller %s owner %s\n", + __func__, bank, pin, label, pbank->pin_labels[pin]); + return -EBUSY; + } + + set_bit(pin, &pbank->pin_map); + pbank->pin_labels[pin] = label; + + stmp3xxx_set_pin_type(id, fun); + + return ret; +} + +void stmp3xxx_set_pin_type(unsigned id, enum pin_fun fun) +{ + struct stmp3xxx_pinmux_bank *pbank; + void __iomem *hwmux; + u32 shift, val; + u32 bank, pin; + + pbank = stmp3xxx_pinmux_bank(id, &bank, &pin); + + hwmux = pbank->hw_muxsel[pin / HW_MUXSEL_PIN_NUM]; + shift = (pin % HW_MUXSEL_PIN_NUM) * HW_MUXSEL_PIN_LEN; + + val = pbank->functions[fun]; + shift = (pin % HW_MUXSEL_PIN_NUM) * HW_MUXSEL_PIN_LEN; + pr_debug("%s: writing 0x%x to 0x%p register\n", + __func__, val << shift, hwmux); + __raw_writel(HW_MUXSEL_PINFUN_MASK << shift, hwmux + HW_STMP3xxx_CLR); + __raw_writel(val << shift, hwmux + HW_STMP3xxx_SET); +} + +void stmp3xxx_release_pin(unsigned id, const char *label) +{ + struct stmp3xxx_pinmux_bank *pbank; + u32 bank, pin; + + pbank = stmp3xxx_pinmux_bank(id, &bank, &pin); + pr_debug("%s: label %s bank %d pin %d\n", __func__, label, bank, pin); + + if (stmp3xxx_check_pin(id, label)) + return; + + clear_bit(pin, &pbank->pin_map); + pbank->pin_labels[pin] = NULL; +} + +int stmp3xxx_request_pin_group(struct pin_group *pin_group, const char *label) +{ + struct pin_desc *pin; + int p; + int err = 0; + + /* Allocate and configure pins */ + for (p = 0; p < pin_group->nr_pins; p++) { + pr_debug("%s: #%d\n", __func__, p); + pin = &pin_group->pins[p]; + + err = stmp3xxx_request_pin(pin->id, pin->fun, label); + if (err) + goto out_err; + + stmp3xxx_pin_strength(pin->id, pin->strength, label); + stmp3xxx_pin_voltage(pin->id, pin->voltage, label); + stmp3xxx_pin_pullup(pin->id, pin->pullup, label); + } + + return 0; + +out_err: + /* Release allocated pins in case of error */ + while (--p >= 0) { + pr_debug("%s: releasing #%d\n", __func__, p); + stmp3xxx_release_pin(pin_group->pins[p].id, label); + } + return err; +} +EXPORT_SYMBOL(stmp3xxx_request_pin_group); + +void stmp3xxx_release_pin_group(struct pin_group *pin_group, const char *label) +{ + struct pin_desc *pin; + int p; + + for (p = 0; p < pin_group->nr_pins; p++) { + pin = &pin_group->pins[p]; + stmp3xxx_release_pin(pin->id, label); + } +} +EXPORT_SYMBOL(stmp3xxx_release_pin_group); + +static int stmp3xxx_irq_to_gpio(int irq, + struct stmp3xxx_pinmux_bank **bank, unsigned *gpio) +{ + struct stmp3xxx_pinmux_bank *pm; + + for (pm = pinmux_banks; pm < pinmux_banks + NR_BANKS; pm++) + if (pm->virq <= irq && irq < pm->virq + 32) { + *bank = pm; + *gpio = irq - pm->virq; + return 0; + } + return -ENOENT; +} + +static int stmp3xxx_set_irqtype(unsigned irq, unsigned type) +{ + struct stmp3xxx_pinmux_bank *pm; + unsigned gpio; + int l, p; + + stmp3xxx_irq_to_gpio(irq, &pm, &gpio); + switch (type) { + case IRQ_TYPE_EDGE_RISING: + l = 0; p = 1; break; + case IRQ_TYPE_EDGE_FALLING: + l = 0; p = 0; break; + case IRQ_TYPE_LEVEL_HIGH: + l = 1; p = 1; break; + case IRQ_TYPE_LEVEL_LOW: + l = 1; p = 0; break; + default: + pr_debug("%s: Incorrect GPIO interrupt type 0x%x\n", + __func__, type); + return -ENXIO; + } + __raw_writel(1 << gpio, + pm->irqlevel + (l ? HW_STMP3xxx_SET : HW_STMP3xxx_CLR)); + __raw_writel(1 << gpio, + pm->irqpolarity + (p ? HW_STMP3xxx_SET : HW_STMP3xxx_CLR)); + return 0; +} + +static void stmp3xxx_pin_ack_irq(unsigned irq) +{ + u32 stat; + struct stmp3xxx_pinmux_bank *pm; + unsigned gpio; + + stmp3xxx_irq_to_gpio(irq, &pm, &gpio); + stat = __raw_readl(pm->irqstat) & (1<irqstat + HW_STMP3xxx_CLR); +} + +static void stmp3xxx_pin_mask_irq(unsigned irq) +{ + struct stmp3xxx_pinmux_bank *pm; + unsigned gpio; + + stmp3xxx_irq_to_gpio(irq, &pm, &gpio); + __raw_writel(1 << gpio, pm->irqen + HW_STMP3xxx_CLR); + __raw_writel(1 << gpio, pm->pin2irq + HW_STMP3xxx_CLR); +} + +static void stmp3xxx_pin_unmask_irq(unsigned irq) +{ + struct stmp3xxx_pinmux_bank *pm; + unsigned gpio; + + stmp3xxx_irq_to_gpio(irq, &pm, &gpio); + __raw_writel(1 << gpio, pm->irqen + HW_STMP3xxx_SET); + __raw_writel(1 << gpio, pm->pin2irq + HW_STMP3xxx_SET); +} + +static inline +struct stmp3xxx_pinmux_bank *to_pinmux_bank(struct gpio_chip *chip) +{ + return container_of(chip, struct stmp3xxx_pinmux_bank, chip); +} + +static int stmp3xxx_gpio_to_irq(struct gpio_chip *chip, unsigned offset) +{ + struct stmp3xxx_pinmux_bank *pm = to_pinmux_bank(chip); + return pm->virq + offset; +} + +static int stmp3xxx_gpio_get(struct gpio_chip *chip, unsigned offset) +{ + struct stmp3xxx_pinmux_bank *pm = to_pinmux_bank(chip); + unsigned v; + + v = __raw_readl(pm->hw_gpio_read) & (1 << offset); + return v ? 1 : 0; +} + +static void stmp3xxx_gpio_set(struct gpio_chip *chip, unsigned offset, int v) +{ + struct stmp3xxx_pinmux_bank *pm = to_pinmux_bank(chip); + + __raw_writel(1 << offset, v ? pm->hw_gpio_set : pm->hw_gpio_clr); +} + +static int stmp3xxx_gpio_output(struct gpio_chip *chip, unsigned offset, int v) +{ + struct stmp3xxx_pinmux_bank *pm = to_pinmux_bank(chip); + + __raw_writel(1 << offset, pm->hw_gpio_doe + HW_STMP3xxx_SET); + stmp3xxx_gpio_set(chip, offset, v); + return 0; +} + +static int stmp3xxx_gpio_input(struct gpio_chip *chip, unsigned offset) +{ + struct stmp3xxx_pinmux_bank *pm = to_pinmux_bank(chip); + + __raw_writel(1 << offset, pm->hw_gpio_doe + HW_STMP3xxx_CLR); + return 0; +} + +static int stmp3xxx_gpio_request(struct gpio_chip *chip, unsigned offset) +{ + return stmp3xxx_request_pin(chip->base + offset, PIN_GPIO, "gpio"); +} + +static void stmp3xxx_gpio_free(struct gpio_chip *chip, unsigned offset) +{ + stmp3xxx_release_pin(chip->base + offset, "gpio"); +} + +static void stmp3xxx_gpio_irq(u32 irq, struct irq_desc *desc) +{ + struct stmp3xxx_pinmux_bank *pm = get_irq_data(irq); + int gpio_irq = pm->virq; + u32 stat = __raw_readl(pm->irqstat); + + while (stat) { + if (stat & 1) + irq_desc[gpio_irq].handle_irq(gpio_irq, + &irq_desc[gpio_irq]); + gpio_irq++; + stat >>= 1; + } +} + +static struct irq_chip gpio_irq_chip = { + .ack = stmp3xxx_pin_ack_irq, + .mask = stmp3xxx_pin_mask_irq, + .unmask = stmp3xxx_pin_unmask_irq, + .set_type = stmp3xxx_set_irqtype, +}; + +int __init stmp3xxx_pinmux_init(int virtual_irq_start) +{ + int b, r = 0; + struct stmp3xxx_pinmux_bank *pm; + int virq; + + for (b = 0; b < 3; b++) { + /* only banks 0,1,2 are allowed to GPIO */ + pm = pinmux_banks + b; + pm->chip.base = 32 * b; + pm->chip.ngpio = 32; + pm->chip.owner = THIS_MODULE; + pm->chip.can_sleep = 1; + pm->chip.exported = 1; + pm->chip.to_irq = stmp3xxx_gpio_to_irq; + pm->chip.direction_input = stmp3xxx_gpio_input; + pm->chip.direction_output = stmp3xxx_gpio_output; + pm->chip.get = stmp3xxx_gpio_get; + pm->chip.set = stmp3xxx_gpio_set; + pm->chip.request = stmp3xxx_gpio_request; + pm->chip.free = stmp3xxx_gpio_free; + pm->virq = virtual_irq_start + b * 32; + + for (virq = pm->virq; virq < pm->virq; virq++) { + gpio_irq_chip.mask(virq); + set_irq_chip(virq, &gpio_irq_chip); + set_irq_handler(virq, handle_level_irq); + set_irq_flags(virq, IRQF_VALID); + } + r = gpiochip_add(&pm->chip); + if (r < 0) + break; + set_irq_chained_handler(pm->irq, stmp3xxx_gpio_irq); + set_irq_data(pm->irq, pm); + } + return r; +} + +MODULE_AUTHOR("Vladislav Buzov"); +MODULE_LICENSE("GPL"); diff --git a/arch/arm/plat-stmp3xxx/timer.c b/arch/arm/plat-stmp3xxx/timer.c new file mode 100644 index 0000000..c916068 --- /dev/null +++ b/arch/arm/plat-stmp3xxx/timer.c @@ -0,0 +1,172 @@ +/* + * System timer for Freescale STMP37XX/STMP378X + * + * Embedded Alley Solutions, Inc + * + * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + */ + +/* + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static irqreturn_t +stmp3xxx_timer_interrupt(int irq, void *dev_id) +{ + struct clock_event_device *c = dev_id; + + if (HW_TIMROT_TIMCTRLn_RD(0) & (1<<15)) { + HW_TIMROT_TIMCTRLn_CLR(0, (1<<15)); + c->event_handler(c); + } else if (HW_TIMROT_TIMCTRLn_RD(1) & (1<<15)) { + HW_TIMROT_TIMCTRLn_CLR(1, (1<<15)); + HW_TIMROT_TIMCTRLn_CLR(1, BM_TIMROT_TIMCTRLn_IRQ_EN); + HW_TIMROT_TIMCOUNTn_WR(1, 0xFFFF); + } + + return IRQ_HANDLED; +} + +static cycle_t stmp3xxx_clock_read(void) +{ + return ~((HW_TIMROT_TIMCOUNTn_RD(1) & 0xFFFF0000) >> 16); +} + +static int +stmp3xxx_timrot_set_next_event(unsigned long delta, + struct clock_event_device *dev) +{ + HW_TIMROT_TIMCOUNTn_WR(0, delta); /* reload */ + return 0; +} + +static void +stmp3xxx_timrot_set_mode(enum clock_event_mode mode, + struct clock_event_device *dev) +{ +} + +static struct clock_event_device ckevt_timrot = { + .name = "timrot", + .features = CLOCK_EVT_FEAT_ONESHOT, + .shift = 32, + .set_next_event = stmp3xxx_timrot_set_next_event, + .set_mode = stmp3xxx_timrot_set_mode, +}; + +static struct clocksource cksrc_stmp3xxx = { + .name = "cksrc_stmp3xxx", + .rating = 250, + .read = stmp3xxx_clock_read, + .mask = CLOCKSOURCE_MASK(16), + .shift = 10, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + +static struct irqaction stmp3xxx_timer_irq = { + .name = "stmp3xxx_timer", + .flags = IRQF_DISABLED | IRQF_TIMER, + .handler = stmp3xxx_timer_interrupt, + .dev_id = &ckevt_timrot, +}; + + +/* + * Set up timer interrupt, and return the current time in seconds. + */ +static void __init stmp3xxx_init_timer(void) +{ + cksrc_stmp3xxx.mult = clocksource_hz2mult(CLOCK_TICK_RATE, + cksrc_stmp3xxx.shift); + ckevt_timrot.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, + ckevt_timrot.shift); + ckevt_timrot.min_delta_ns = clockevent_delta2ns(2, &ckevt_timrot); + ckevt_timrot.max_delta_ns = clockevent_delta2ns(0xFFF, &ckevt_timrot); + ckevt_timrot.cpumask = cpumask_of(0); + + HW_TIMROT_ROTCTRL_CLR(BM_TIMROT_ROTCTRL_SFTRST | + BM_TIMROT_ROTCTRL_CLKGATE); + HW_TIMROT_TIMCOUNTn_WR(0, 0); + HW_TIMROT_TIMCOUNTn_WR(1, 0); + + HW_TIMROT_TIMCTRLn_WR(0, + (BF_TIMROT_TIMCTRLn_SELECT(8) | /* 32 kHz */ + BF_TIMROT_TIMCTRLn_PRESCALE(0) | + BM_TIMROT_TIMCTRLn_RELOAD | + BM_TIMROT_TIMCTRLn_UPDATE | + BM_TIMROT_TIMCTRLn_IRQ_EN)); + HW_TIMROT_TIMCTRLn_WR(1, + (BF_TIMROT_TIMCTRLn_SELECT(8) | /* 32 kHz */ + BF_TIMROT_TIMCTRLn_PRESCALE(0) | + BM_TIMROT_TIMCTRLn_RELOAD | + BM_TIMROT_TIMCTRLn_UPDATE)); + + HW_TIMROT_TIMCOUNTn_WR(0, CLOCK_TICK_RATE / HZ - 1); + HW_TIMROT_TIMCOUNTn_WR(1, 0xFFFF); /* reload */ + + setup_irq(IRQ_TIMER0, &stmp3xxx_timer_irq); + + clocksource_register(&cksrc_stmp3xxx); + clockevents_register_device(&ckevt_timrot); +} + +#ifdef CONFIG_PM + +void stmp3xxx_suspend_timer(void) +{ + HW_TIMROT_TIMCTRLn_CLR(0, BM_TIMROT_TIMCTRLn_IRQ_EN); + HW_TIMROT_TIMCTRLn_CLR(0, (1<<15)); + HW_TIMROT_ROTCTRL_SET(BM_TIMROT_ROTCTRL_CLKGATE); +} + +void stmp3xxx_resume_timer(void) +{ + HW_TIMROT_ROTCTRL_CLR(BM_TIMROT_ROTCTRL_SFTRST | + BM_TIMROT_ROTCTRL_CLKGATE); + + + HW_TIMROT_TIMCTRLn_WR(0, + (BF_TIMROT_TIMCTRLn_SELECT(8) | /* 32 kHz */ + BF_TIMROT_TIMCTRLn_PRESCALE(0) | + BM_TIMROT_TIMCTRLn_UPDATE | + BM_TIMROT_TIMCTRLn_IRQ_EN)); + HW_TIMROT_TIMCTRLn_WR(1, + (BF_TIMROT_TIMCTRLn_SELECT(8) | /* 32 kHz */ + BF_TIMROT_TIMCTRLn_PRESCALE(0) | + BM_TIMROT_TIMCTRLn_RELOAD | + BM_TIMROT_TIMCTRLn_UPDATE)); + + HW_TIMROT_TIMCOUNTn_WR(0, CLOCK_TICK_RATE / HZ - 1); + HW_TIMROT_TIMCOUNTn_WR(1, 0xFFFF); /* reload */ +} + +#else + +#define stmp3xxx_suspend_timer NULL +#define stmp3xxx_resume_timer NULL + +#endif /* CONFIG_PM */ + +struct sys_timer stmp3xxx_timer = { + .init = stmp3xxx_init_timer, + .suspend = stmp3xxx_suspend_timer, + .resume = stmp3xxx_resume_timer, +}; -- cgit v0.10.2 From 45d9108011b9dfb4fccd6c258290d2185145709b Mon Sep 17 00:00:00 2001 From: dmitry pervushin Date: Wed, 22 Apr 2009 23:57:05 +0100 Subject: [ARM] 5465/1: Freescale STMP platform support [7/10] Sources: support for 37xx boards Signed-off-by: dmitry pervushin Signed-off-by: Russell King diff --git a/arch/arm/mach-stmp37xx/Makefile b/arch/arm/mach-stmp37xx/Makefile new file mode 100644 index 0000000..57deffd --- /dev/null +++ b/arch/arm/mach-stmp37xx/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_ARCH_STMP37XX) += stmp37xx.o +obj-$(CONFIG_MACH_STMP37XX) += stmp37xx_devb.o diff --git a/arch/arm/mach-stmp37xx/Makefile.boot b/arch/arm/mach-stmp37xx/Makefile.boot new file mode 100644 index 0000000..1568ad4 --- /dev/null +++ b/arch/arm/mach-stmp37xx/Makefile.boot @@ -0,0 +1,3 @@ + zreladdr-y := 0x40008000 +params_phys-y := 0x40000100 +initrd_phys-y := 0x40800000 diff --git a/arch/arm/mach-stmp37xx/stmp37xx.c b/arch/arm/mach-stmp37xx/stmp37xx.c new file mode 100644 index 0000000..83a41c9 --- /dev/null +++ b/arch/arm/mach-stmp37xx/stmp37xx.c @@ -0,0 +1,217 @@ +/* + * Freescale STMP37XX platform support + * + * Embedded Alley Solutions, Inc + * + * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + */ + +/* + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include "stmp37xx.h" + +/* + * IRQ handling + */ +static void stmp37xx_ack_irq(unsigned int irq) +{ + /* Disable IRQ */ + HW_ICOLL_PRIORITYn_CLR(irq / 4, 0x04 << ((irq % 4) * 8)); + + /* ACK current interrupt */ + HW_ICOLL_LEVELACK_WR(1); + + /* Barrier */ + (void) HW_ICOLL_STAT_RD(); +} + +static void stmp37xx_mask_irq(unsigned int irq) +{ + /* IRQ disable */ + HW_ICOLL_PRIORITYn_CLR(irq / 4, 0x04 << ((irq % 4) * 8)); +} + +static void stmp37xx_unmask_irq(unsigned int irq) +{ + /* IRQ enable */ + HW_ICOLL_PRIORITYn_SET(irq / 4, 0x04 << ((irq % 4) * 8)); +} + +static struct irq_chip stmp37xx_chip = { + .ack = stmp37xx_ack_irq, + .mask = stmp37xx_mask_irq, + .unmask = stmp37xx_unmask_irq, +}; + +void __init stmp37xx_init_irq(void) +{ + stmp3xxx_init_irq(&stmp37xx_chip); +} + +/* + * DMA interrupt handling + */ +void stmp3xxx_arch_dma_enable_interrupt(int channel) +{ + int dmabus = channel / 16; + + switch (dmabus) { + case STMP3XXX_BUS_APBH: + HW_APBH_CTRL1_SET(1 << (8 + (channel % 16))); + break; + + case STMP3XXX_BUS_APBX: + HW_APBX_CTRL1_SET(1 << (8 + (channel % 16))); + break; + } +} +EXPORT_SYMBOL(stmp3xxx_arch_dma_enable_interrupt); + +void stmp3xxx_arch_dma_clear_interrupt(int channel) +{ + int dmabus = channel / 16; + + switch (dmabus) { + case STMP3XXX_BUS_APBH: + HW_APBH_CTRL1_CLR(1 << (channel % 16)); + break; + + case STMP3XXX_BUS_APBX: + HW_APBX_CTRL1_CLR(1 << (channel % 16)); + break; + } +} +EXPORT_SYMBOL(stmp3xxx_arch_dma_clear_interrupt); + +int stmp3xxx_arch_dma_is_interrupt(int channel) +{ + int r = 0; + + int dmabus = channel / 16; + + switch (dmabus) { + case STMP3XXX_BUS_APBH: + r = HW_APBH_CTRL1_RD() & (1 << (channel % 16)); + break; + + case STMP3XXX_BUS_APBX: + r = HW_APBX_CTRL1_RD() & (1 << (channel % 16)); + break; + } + return r; +} +EXPORT_SYMBOL(stmp3xxx_arch_dma_is_interrupt); + +void stmp3xxx_arch_dma_reset_channel(int channel) +{ + int dmabus = channel / 16; + unsigned chbit = 1 << (channel % 16); + + switch (dmabus) { + case STMP3XXX_BUS_APBH: + /* Reset channel and wait for it to complete */ + HW_APBH_CTRL0_SET(chbit << BP_APBH_CTRL0_RESET_CHANNEL); + while (HW_APBH_CTRL0_RD() & + (chbit << BP_APBH_CTRL0_RESET_CHANNEL)) + continue; + break; + + case STMP3XXX_BUS_APBX: + /* Reset channel and wait for it to complete */ + HW_APBX_CTRL0_SET(chbit << BP_APBX_CTRL0_RESET_CHANNEL); + while (HW_APBX_CTRL0_RD() & + (chbit << BP_APBX_CTRL0_RESET_CHANNEL)) + continue; + break; + } +} +EXPORT_SYMBOL(stmp3xxx_arch_dma_reset_channel); + +void stmp3xxx_arch_dma_freeze(int channel) +{ + int dmabus = channel / 16; + unsigned chbit = 1 << (channel % 16); + + switch (dmabus) { + case STMP3XXX_BUS_APBH: + HW_APBH_CTRL0_SET(1< + * + * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + */ + +/* + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ +#ifndef __MACH_STMP37XX_H +#define __MACH_STMP37XX_H + +void stmp37xx_map_io(void); +void stmp37xx_init_irq(void); + +#endif /* __MACH_STMP37XX_H */ diff --git a/arch/arm/mach-stmp37xx/stmp37xx_devb.c b/arch/arm/mach-stmp37xx/stmp37xx_devb.c new file mode 100644 index 0000000..adfbdc7 --- /dev/null +++ b/arch/arm/mach-stmp37xx/stmp37xx_devb.c @@ -0,0 +1,83 @@ +/* + * Freescale STMP37XX development board support + * + * Embedded Alley Solutions, Inc + * + * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + */ + +/* + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include "stmp37xx.h" + +/* + * List of STMP37xx development board specific devices + */ +static struct platform_device *stmp37xx_devb_devices[] = { + &stmp3xxx_dbguart, +}; + +static struct pin_desc dbguart_pins_0[] = { + { PINID_PWM0, PIN_FUN3, }, + { PINID_PWM1, PIN_FUN3, }, +}; + +static struct pin_group dbguart_pins[] = { + [0] = { + .pins = dbguart_pins_0, + .nr_pins = ARRAY_SIZE(dbguart_pins_0), + }, +}; + +static int dbguart_pins_control(int id, int request) +{ + int r = 0; + + if (request) + r = stmp3xxx_request_pin_group(&dbguart_pins[id], "debug uart"); + else + stmp3xxx_release_pin_group(&dbguart_pins[id], "debug uart"); + return r; +} + + +static void __init stmp37xx_devb_init(void) +{ + stmp3xxx_pinmux_init(NR_REAL_IRQS); + + /* Init STMP3xxx platform */ + stmp3xxx_init(); + + stmp3xxx_dbguart.dev.platform_data = dbguart_pins_control; + /* Add STMP37xx development board devices */ + platform_add_devices(stmp37xx_devb_devices, + ARRAY_SIZE(stmp37xx_devb_devices)); +} + +MACHINE_START(STMP37XX, "STMP37XX") + .phys_io = 0x80000000, + .io_pg_offst = ((0xf0000000) >> 18) & 0xfffc, + .boot_params = 0x40000100, + .map_io = stmp37xx_map_io, + .init_irq = stmp37xx_init_irq, + .timer = &stmp3xxx_timer, + .init_machine = stmp37xx_devb_init, +MACHINE_END -- cgit v0.10.2 From bc19d892a14cbb31d838813b2225e262a6c01341 Mon Sep 17 00:00:00 2001 From: dmitry pervushin Date: Wed, 22 Apr 2009 23:57:28 +0100 Subject: [ARM] 5464/1: Freescale STMP platform support [7/10] Sources: support for 378x boards Signed-off-by: dmitry pervushin Signed-off-by: Russell King diff --git a/arch/arm/mach-stmp378x/Makefile b/arch/arm/mach-stmp378x/Makefile new file mode 100644 index 0000000..d156f76 --- /dev/null +++ b/arch/arm/mach-stmp378x/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_ARCH_STMP378X) += stmp378x.o +obj-$(CONFIG_MACH_STMP378X) += stmp378x_devb.o diff --git a/arch/arm/mach-stmp378x/Makefile.boot b/arch/arm/mach-stmp378x/Makefile.boot new file mode 100644 index 0000000..1568ad4 --- /dev/null +++ b/arch/arm/mach-stmp378x/Makefile.boot @@ -0,0 +1,3 @@ + zreladdr-y := 0x40008000 +params_phys-y := 0x40000100 +initrd_phys-y := 0x40800000 diff --git a/arch/arm/mach-stmp378x/stmp378x.c b/arch/arm/mach-stmp378x/stmp378x.c new file mode 100644 index 0000000..f156ec7 --- /dev/null +++ b/arch/arm/mach-stmp378x/stmp378x.c @@ -0,0 +1,225 @@ +/* + * Freescale STMP378X platform support + * + * Embedded Alley Solutions, Inc + * + * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + */ + +/* + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "stmp378x.h" +/* + * IRQ handling + */ +static void stmp378x_ack_irq(unsigned int irq) +{ + /* Tell ICOLL to release IRQ line */ + HW_ICOLL_VECTOR_WR(0x0); + + /* ACK current interrupt */ + HW_ICOLL_LEVELACK_WR(BV_ICOLL_LEVELACK_IRQLEVELACK__LEVEL0); + + /* Barrier */ + (void) HW_ICOLL_STAT_RD(); +} + +static void stmp378x_mask_irq(unsigned int irq) +{ + /* IRQ disable */ + HW_ICOLL_INTERRUPTn_CLR(irq, BM_ICOLL_INTERRUPTn_ENABLE); +} + +static void stmp378x_unmask_irq(unsigned int irq) +{ + /* IRQ enable */ + HW_ICOLL_INTERRUPTn_SET(irq, BM_ICOLL_INTERRUPTn_ENABLE); +} + +static struct irq_chip stmp378x_chip = { + .ack = stmp378x_ack_irq, + .mask = stmp378x_mask_irq, + .unmask = stmp378x_unmask_irq, +}; + +void __init stmp378x_init_irq(void) +{ + stmp3xxx_init_irq(&stmp378x_chip); +} + +/* + * DMA interrupt handling + */ +void stmp3xxx_arch_dma_enable_interrupt(int channel) +{ + int dmabus = channel / 16; + + switch (dmabus) { + case STMP3XXX_BUS_APBH: + HW_APBH_CTRL1_SET(1 << (16 + (channel % 16))); + HW_APBH_CTRL2_SET(1 << (16 + (channel % 16))); + break; + + case STMP3XXX_BUS_APBX: + HW_APBX_CTRL1_SET(1 << (16 + (channel % 16))); + HW_APBX_CTRL2_SET(1 << (16 + (channel % 16))); + break; + } +} +EXPORT_SYMBOL(stmp3xxx_arch_dma_enable_interrupt); + +void stmp3xxx_arch_dma_clear_interrupt(int channel) +{ + int dmabus = channel / 16; + + switch (dmabus) { + case STMP3XXX_BUS_APBH: + HW_APBH_CTRL1_CLR(1 << (channel % 16)); + HW_APBH_CTRL2_CLR(1 << (channel % 16)); + break; + + case STMP3XXX_BUS_APBX: + HW_APBX_CTRL1_CLR(1 << (channel % 16)); + HW_APBX_CTRL2_CLR(1 << (channel % 16)); + break; + } +} +EXPORT_SYMBOL(stmp3xxx_arch_dma_clear_interrupt); + +int stmp3xxx_arch_dma_is_interrupt(int channel) +{ + int dmabus = channel / 16; + int r = 0; + + switch (dmabus) { + case STMP3XXX_BUS_APBH: + r = HW_APBH_CTRL1_RD() & (1 << (channel % 16)); + break; + + case STMP3XXX_BUS_APBX: + r = HW_APBX_CTRL1_RD() & (1 << (channel % 16)); + break; + } + return r; +} +EXPORT_SYMBOL(stmp3xxx_arch_dma_is_interrupt); + +void stmp3xxx_arch_dma_reset_channel(int channel) +{ + int dmabus = channel / 16; + unsigned chbit = 1 << (channel % 16); + + switch (dmabus) { + case STMP3XXX_BUS_APBH: + /* Reset channel and wait for it to complete */ + HW_APBH_CTRL0_SET(chbit << + BP_APBH_CTRL0_RESET_CHANNEL); + while (HW_APBH_CTRL0_RD() & + (chbit << BP_APBH_CTRL0_RESET_CHANNEL)) + continue; + break; + + case STMP3XXX_BUS_APBX: + /* Reset channel and wait for it to complete */ + HW_APBX_CHANNEL_CTRL_SET( + BF_APBX_CHANNEL_CTRL_RESET_CHANNEL(chbit)); + while (HW_APBX_CHANNEL_CTRL_RD() & + BF_APBX_CHANNEL_CTRL_RESET_CHANNEL(chbit)) + continue; + break; + } +} +EXPORT_SYMBOL(stmp3xxx_arch_dma_reset_channel); + +void stmp3xxx_arch_dma_freeze(int channel) +{ + int dmabus = channel / 16; + unsigned chbit = 1 << (channel % 16); + + switch (dmabus) { + case STMP3XXX_BUS_APBH: + HW_APBH_CTRL0_SET(1< + * + * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + */ + +/* + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ +#ifndef __MACH_STMP378X_H +#define __MACH_STMP378X_H + +void stmp378x_map_io(void); +void stmp378x_init_irq(void); + +#endif /* __MACH_STMP378X_COMMON_H */ diff --git a/arch/arm/mach-stmp378x/stmp378x_devb.c b/arch/arm/mach-stmp378x/stmp378x_devb.c new file mode 100644 index 0000000..bc643f6 --- /dev/null +++ b/arch/arm/mach-stmp378x/stmp378x_devb.c @@ -0,0 +1,80 @@ +/* + * Freescale STMP378X development board support + * + * Embedded Alley Solutions, Inc + * + * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. + * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. + */ + +/* + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include "stmp378x.h" + +static struct platform_device *devices[] = { + &stmp3xxx_dbguart, +}; + +static struct pin_desc dbguart_pins_0[] = { + { PINID_PWM0, PIN_FUN3, }, + { PINID_PWM1, PIN_FUN3, }, +}; + +static struct pin_group dbguart_pins[] = { + [0] = { + .pins = dbguart_pins_0, + .nr_pins = ARRAY_SIZE(dbguart_pins_0), + }, +}; + +static int dbguart_pins_control(int id, int request) +{ + int r = 0; + + if (request) + r = stmp3xxx_request_pin_group(&dbguart_pins[id], "debug uart"); + else + stmp3xxx_release_pin_group(&dbguart_pins[id], "debug uart"); + return r; +} + +static void __init stmp378x_devb_init(void) +{ + stmp3xxx_pinmux_init(NR_REAL_IRQS); + + /* init stmp3xxx platform */ + stmp3xxx_init(); + + stmp3xxx_dbguart.dev.platform_data = dbguart_pins_control; + + /* add board's devices */ + platform_add_devices(devices, ARRAY_SIZE(devices)); +} + +MACHINE_START(STMP378X, "STMP378X") + .phys_io = 0x80000000, + .io_pg_offst = ((0xf0000000) >> 18) & 0xfffc, + .boot_params = 0x40000100, + .map_io = stmp378x_map_io, + .init_irq = stmp378x_init_irq, + .timer = &stmp3xxx_timer, + .init_machine = stmp378x_devb_init, +MACHINE_END -- cgit v0.10.2 From bf155f8defdb5be5da0653279327bbd3801a3ca9 Mon Sep 17 00:00:00 2001 From: dmitry pervushin Date: Thu, 23 Apr 2009 00:00:01 +0100 Subject: [ARM] 5463/1: Freescale STMP platform support [10/10] Default configs for STMP3xxx boards Signed-off-by: dmitry pervushin Signed-off-by: Russell King diff --git a/arch/arm/configs/stmp378x_defconfig b/arch/arm/configs/stmp378x_defconfig new file mode 100644 index 0000000..44461f1 --- /dev/null +++ b/arch/arm/configs/stmp378x_defconfig @@ -0,0 +1,1141 @@ +# +# Automatically generated make config: don't edit +# Linux kernel version: 2.6.30-rc2 +# Thu Apr 23 02:44:13 2009 +# +CONFIG_ARM=y +CONFIG_SYS_SUPPORTS_APM_EMULATION=y +CONFIG_GENERIC_GPIO=y +CONFIG_GENERIC_TIME=y +CONFIG_GENERIC_CLOCKEVENTS=y +CONFIG_MMU=y +# CONFIG_NO_IOPORT is not set +CONFIG_GENERIC_HARDIRQS=y +CONFIG_STACKTRACE_SUPPORT=y +CONFIG_HAVE_LATENCYTOP_SUPPORT=y +CONFIG_LOCKDEP_SUPPORT=y +CONFIG_TRACE_IRQFLAGS_SUPPORT=y +CONFIG_HARDIRQS_SW_RESEND=y +CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_RWSEM_GENERIC_SPINLOCK=y +# CONFIG_ARCH_HAS_ILOG2_U32 is not set +# CONFIG_ARCH_HAS_ILOG2_U64 is not set +CONFIG_GENERIC_HWEIGHT=y +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y +CONFIG_VECTORS_BASE=0xffff0000 +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" + +# +# General setup +# +CONFIG_EXPERIMENTAL=y +CONFIG_BROKEN_ON_SMP=y +CONFIG_LOCK_KERNEL=y +CONFIG_INIT_ENV_ARG_LIMIT=32 +CONFIG_LOCALVERSION="-default" +CONFIG_LOCALVERSION_AUTO=y +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +CONFIG_SYSVIPC_SYSCTL=y +CONFIG_POSIX_MQUEUE=y +CONFIG_POSIX_MQUEUE_SYSCTL=y +CONFIG_BSD_PROCESS_ACCT=y +# CONFIG_BSD_PROCESS_ACCT_V3 is not set +# CONFIG_TASKSTATS is not set +# CONFIG_AUDIT is not set + +# +# RCU Subsystem +# +CONFIG_CLASSIC_RCU=y +# CONFIG_TREE_RCU is not set +# CONFIG_PREEMPT_RCU is not set +# CONFIG_TREE_RCU_TRACE is not set +# CONFIG_PREEMPT_RCU_TRACE is not set +# CONFIG_IKCONFIG is not set +CONFIG_LOG_BUF_SHIFT=17 +# CONFIG_GROUP_SCHED is not set +# CONFIG_CGROUPS is not set +CONFIG_SYSFS_DEPRECATED=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_RELAY=y +# CONFIG_NAMESPACES is not set +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="" +CONFIG_INITRAMFS_ROOT_UID=0 +CONFIG_INITRAMFS_ROOT_GID=0 +CONFIG_RD_GZIP=y +# CONFIG_RD_BZIP2 is not set +# CONFIG_RD_LZMA is not set +# CONFIG_INITRAMFS_COMPRESSION_NONE is not set +CONFIG_INITRAMFS_COMPRESSION_GZIP=y +# CONFIG_INITRAMFS_COMPRESSION_BZIP2 is not set +# CONFIG_INITRAMFS_COMPRESSION_LZMA is not set +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SYSCTL=y +CONFIG_ANON_INODES=y +CONFIG_EMBEDDED=y +CONFIG_UID16=y +CONFIG_SYSCTL_SYSCALL=y +CONFIG_KALLSYMS=y +CONFIG_KALLSYMS_ALL=y +# CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_STRIP_ASM_SYMS=y +CONFIG_HOTPLUG=y +CONFIG_PRINTK=y +CONFIG_BUG=y +CONFIG_ELF_CORE=y +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_EPOLL=y +CONFIG_SIGNALFD=y +CONFIG_TIMERFD=y +CONFIG_EVENTFD=y +CONFIG_SHMEM=y +CONFIG_AIO=y +CONFIG_VM_EVENT_COUNTERS=y +CONFIG_COMPAT_BRK=y +CONFIG_SLAB=y +# CONFIG_SLUB is not set +# CONFIG_SLOB is not set +# CONFIG_PROFILING is not set +CONFIG_TRACEPOINTS=y +CONFIG_MARKERS=y +CONFIG_HAVE_OPROFILE=y +# CONFIG_KPROBES is not set +CONFIG_HAVE_KPROBES=y +CONFIG_HAVE_KRETPROBES=y +CONFIG_HAVE_CLK=y +# CONFIG_SLOW_WORK is not set +CONFIG_HAVE_GENERIC_DMA_COHERENT=y +CONFIG_SLABINFO=y +CONFIG_RT_MUTEXES=y +CONFIG_BASE_SMALL=0 +CONFIG_MODULES=y +# CONFIG_MODULE_FORCE_LOAD is not set +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y +CONFIG_MODVERSIONS=y +CONFIG_MODULE_SRCVERSION_ALL=y +CONFIG_BLOCK=y +CONFIG_LBD=y +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_BLK_DEV_INTEGRITY is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +# CONFIG_DEFAULT_AS is not set +# CONFIG_DEFAULT_DEADLINE is not set +CONFIG_DEFAULT_CFQ=y +# CONFIG_DEFAULT_NOOP is not set +CONFIG_DEFAULT_IOSCHED="cfq" +# CONFIG_FREEZER is not set + +# +# System Type +# +# CONFIG_ARCH_AAEC2000 is not set +# CONFIG_ARCH_INTEGRATOR is not set +# CONFIG_ARCH_REALVIEW is not set +# CONFIG_ARCH_VERSATILE is not set +# CONFIG_ARCH_AT91 is not set +# CONFIG_ARCH_CLPS711X is not set +# CONFIG_ARCH_EBSA110 is not set +# CONFIG_ARCH_EP93XX is not set +# CONFIG_ARCH_GEMINI is not set +# CONFIG_ARCH_FOOTBRIDGE is not set +# CONFIG_ARCH_NETX is not set +# CONFIG_ARCH_H720X is not set +# CONFIG_ARCH_IMX is not set +# CONFIG_ARCH_IOP13XX is not set +# CONFIG_ARCH_IOP32X is not set +# CONFIG_ARCH_IOP33X is not set +# CONFIG_ARCH_IXP23XX is not set +# CONFIG_ARCH_IXP2000 is not set +# CONFIG_ARCH_IXP4XX is not set +# CONFIG_ARCH_L7200 is not set +# CONFIG_ARCH_KIRKWOOD is not set +# CONFIG_ARCH_KS8695 is not set +# CONFIG_ARCH_NS9XXX is not set +# CONFIG_ARCH_LOKI is not set +# CONFIG_ARCH_MV78XX0 is not set +# CONFIG_ARCH_MXC is not set +# CONFIG_ARCH_ORION5X is not set +# CONFIG_ARCH_PNX4008 is not set +# CONFIG_ARCH_PXA is not set +# CONFIG_ARCH_MMP is not set +# CONFIG_ARCH_RPC is not set +# CONFIG_ARCH_SA1100 is not set +# CONFIG_ARCH_S3C2410 is not set +# CONFIG_ARCH_S3C64XX is not set +# CONFIG_ARCH_SHARK is not set +# CONFIG_ARCH_LH7A40X is not set +# CONFIG_ARCH_DAVINCI is not set +# CONFIG_ARCH_OMAP is not set +# CONFIG_ARCH_MSM is not set +# CONFIG_ARCH_W90X900 is not set +CONFIG_ARCH_STMP3XXX=y + +# +# Freescale STMP3xxx implementations +# +# CONFIG_ARCH_STMP37XX is not set +CONFIG_ARCH_STMP378X=y +# CONFIG_MACH_STMP37XX is not set +CONFIG_MACH_STMP378X=y + +# +# Processor Type +# +CONFIG_CPU_32=y +CONFIG_CPU_ARM926T=y +CONFIG_CPU_32v5=y +CONFIG_CPU_ABRT_EV5TJ=y +CONFIG_CPU_PABRT_NOIFAR=y +CONFIG_CPU_CACHE_VIVT=y +CONFIG_CPU_COPY_V4WB=y +CONFIG_CPU_TLB_V4WBI=y +CONFIG_CPU_CP15=y +CONFIG_CPU_CP15_MMU=y + +# +# Processor Features +# +CONFIG_ARM_THUMB=y +# CONFIG_CPU_ICACHE_DISABLE is not set +# CONFIG_CPU_DCACHE_DISABLE is not set +# CONFIG_CPU_DCACHE_WRITETHROUGH is not set +# CONFIG_CPU_CACHE_ROUND_ROBIN is not set +# CONFIG_OUTER_CACHE is not set +CONFIG_COMMON_CLKDEV=y + +# +# Bus support +# +# CONFIG_PCI_SYSCALL is not set +# CONFIG_ARCH_SUPPORTS_MSI is not set +# CONFIG_PCCARD is not set + +# +# Kernel Features +# +CONFIG_TICK_ONESHOT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_GENERIC_CLOCKEVENTS_BUILD=y +CONFIG_VMSPLIT_3G=y +# CONFIG_VMSPLIT_2G is not set +# CONFIG_VMSPLIT_1G is not set +CONFIG_PAGE_OFFSET=0xC0000000 +CONFIG_PREEMPT=y +CONFIG_HZ=100 +CONFIG_AEABI=y +CONFIG_OABI_COMPAT=y +CONFIG_ARCH_FLATMEM_HAS_HOLES=y +# CONFIG_ARCH_SPARSEMEM_DEFAULT is not set +# CONFIG_ARCH_SELECT_MEMORY_MODEL is not set +CONFIG_HIGHMEM=y +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y +CONFIG_PAGEFLAGS_EXTENDED=y +CONFIG_SPLIT_PTLOCK_CPUS=4096 +# CONFIG_PHYS_ADDR_T_64BIT is not set +CONFIG_ZONE_DMA_FLAG=0 +CONFIG_BOUNCE=y +CONFIG_VIRT_TO_BUS=y +CONFIG_UNEVICTABLE_LRU=y +CONFIG_HAVE_MLOCK=y +CONFIG_HAVE_MLOCKED_PAGE_BIT=y +CONFIG_ALIGNMENT_TRAP=y + +# +# Boot options +# +CONFIG_ZBOOT_ROM_TEXT=0x0 +CONFIG_ZBOOT_ROM_BSS=0x0 +CONFIG_CMDLINE="console=ttySDBG0,115200 mem=32M" +# CONFIG_XIP_KERNEL is not set +# CONFIG_KEXEC is not set + +# +# CPU Power Management +# +# CONFIG_CPU_IDLE is not set + +# +# Floating point emulation +# + +# +# At least one emulation must be selected +# +# CONFIG_FPE_NWFPE is not set +# CONFIG_FPE_FASTFPE is not set +# CONFIG_VFP is not set + +# +# Userspace binary formats +# +CONFIG_BINFMT_ELF=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_HAVE_AOUT=y +# CONFIG_BINFMT_AOUT is not set +# CONFIG_BINFMT_MISC is not set + +# +# Power management options +# +# CONFIG_PM is not set +CONFIG_ARCH_SUSPEND_POSSIBLE=y +CONFIG_NET=y + +# +# Networking options +# +CONFIG_PACKET=y +CONFIG_PACKET_MMAP=y +CONFIG_UNIX=y +CONFIG_XFRM=y +# CONFIG_XFRM_USER is not set +# CONFIG_XFRM_SUB_POLICY is not set +# CONFIG_XFRM_MIGRATE is not set +# CONFIG_XFRM_STATISTICS is not set +# CONFIG_NET_KEY is not set +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_ASK_IP_FIB_HASH=y +# CONFIG_IP_FIB_TRIE is not set +CONFIG_IP_FIB_HASH=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +# CONFIG_IP_PNP_RARP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +# CONFIG_ARPD is not set +CONFIG_SYN_COOKIES=y +# CONFIG_INET_AH is not set +# CONFIG_INET_ESP is not set +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_XFRM_TUNNEL is not set +# CONFIG_INET_TUNNEL is not set +CONFIG_INET_XFRM_MODE_TRANSPORT=y +CONFIG_INET_XFRM_MODE_TUNNEL=y +CONFIG_INET_XFRM_MODE_BEET=y +# CONFIG_INET_LRO is not set +CONFIG_INET_DIAG=y +CONFIG_INET_TCP_DIAG=y +# CONFIG_TCP_CONG_ADVANCED is not set +CONFIG_TCP_CONG_CUBIC=y +CONFIG_DEFAULT_TCP_CONG="cubic" +# CONFIG_TCP_MD5SIG is not set +# CONFIG_IPV6 is not set +# CONFIG_NETLABEL is not set +# CONFIG_NETWORK_SECMARK is not set +# CONFIG_NETFILTER is not set +# CONFIG_IP_DCCP is not set +# CONFIG_IP_SCTP is not set +# CONFIG_TIPC is not set +# CONFIG_ATM is not set +# CONFIG_BRIDGE is not set +# CONFIG_NET_DSA is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_DECNET is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set +# CONFIG_PHONET is not set +CONFIG_NET_SCHED=y + +# +# Queueing/Scheduling +# +# CONFIG_NET_SCH_CBQ is not set +# CONFIG_NET_SCH_HTB is not set +# CONFIG_NET_SCH_HFSC is not set +# CONFIG_NET_SCH_PRIO is not set +# CONFIG_NET_SCH_MULTIQ is not set +# CONFIG_NET_SCH_RED is not set +# CONFIG_NET_SCH_SFQ is not set +# CONFIG_NET_SCH_TEQL is not set +# CONFIG_NET_SCH_TBF is not set +# CONFIG_NET_SCH_GRED is not set +# CONFIG_NET_SCH_DSMARK is not set +# CONFIG_NET_SCH_NETEM is not set +# CONFIG_NET_SCH_DRR is not set + +# +# Classification +# +# CONFIG_NET_CLS_BASIC is not set +# CONFIG_NET_CLS_TCINDEX is not set +# CONFIG_NET_CLS_ROUTE4 is not set +# CONFIG_NET_CLS_FW is not set +# CONFIG_NET_CLS_U32 is not set +# CONFIG_NET_CLS_RSVP is not set +# CONFIG_NET_CLS_RSVP6 is not set +# CONFIG_NET_CLS_FLOW is not set +# CONFIG_NET_EMATCH is not set +# CONFIG_NET_CLS_ACT is not set +CONFIG_NET_SCH_FIFO=y +# CONFIG_DCB is not set + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +# CONFIG_NET_DROP_MONITOR is not set +# CONFIG_HAMRADIO is not set +# CONFIG_CAN is not set +# CONFIG_IRDA is not set +# CONFIG_BT is not set +# CONFIG_AF_RXRPC is not set +CONFIG_FIB_RULES=y +# CONFIG_WIRELESS is not set +# CONFIG_WIMAX is not set +# CONFIG_RFKILL is not set +# CONFIG_NET_9P is not set + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +# CONFIG_STANDALONE is not set +CONFIG_PREVENT_FIRMWARE_BUILD=y +CONFIG_FW_LOADER=y +CONFIG_FIRMWARE_IN_KERNEL=y +CONFIG_EXTRA_FIRMWARE="" +# CONFIG_DEBUG_DRIVER is not set +# CONFIG_DEBUG_DEVRES is not set +# CONFIG_SYS_HYPERVISOR is not set +# CONFIG_CONNECTOR is not set +CONFIG_MTD=y +# CONFIG_MTD_DEBUG is not set +# CONFIG_MTD_CONCAT is not set +# CONFIG_MTD_PARTITIONS is not set +# CONFIG_MTD_TESTS is not set + +# +# User Modules And Translation Layers +# +CONFIG_MTD_CHAR=y +# CONFIG_MTD_BLKDEVS is not set +# CONFIG_MTD_BLOCK is not set +# CONFIG_MTD_BLOCK_RO is not set +# CONFIG_FTL is not set +# CONFIG_NFTL is not set +# CONFIG_INFTL is not set +# CONFIG_RFD_FTL is not set +# CONFIG_SSFDC is not set +# CONFIG_MTD_OOPS is not set + +# +# RAM/ROM/Flash chip drivers +# +# CONFIG_MTD_CFI is not set +# CONFIG_MTD_JEDECPROBE is not set +CONFIG_MTD_MAP_BANK_WIDTH_1=y +CONFIG_MTD_MAP_BANK_WIDTH_2=y +CONFIG_MTD_MAP_BANK_WIDTH_4=y +# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set +# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set +# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set +CONFIG_MTD_CFI_I1=y +CONFIG_MTD_CFI_I2=y +# CONFIG_MTD_CFI_I4 is not set +# CONFIG_MTD_CFI_I8 is not set +# CONFIG_MTD_RAM is not set +# CONFIG_MTD_ROM is not set +# CONFIG_MTD_ABSENT is not set + +# +# Mapping drivers for chip access +# +# CONFIG_MTD_COMPLEX_MAPPINGS is not set +# CONFIG_MTD_PLATRAM is not set + +# +# Self-contained MTD device drivers +# +# CONFIG_MTD_SLRAM is not set +# CONFIG_MTD_PHRAM is not set +# CONFIG_MTD_MTDRAM is not set +# CONFIG_MTD_BLOCK2MTD is not set + +# +# Disk-On-Chip Device Drivers +# +# CONFIG_MTD_DOC2000 is not set +# CONFIG_MTD_DOC2001 is not set +# CONFIG_MTD_DOC2001PLUS is not set +CONFIG_MTD_NAND=y +# CONFIG_MTD_NAND_VERIFY_WRITE is not set +# CONFIG_MTD_NAND_ECC_SMC is not set +# CONFIG_MTD_NAND_MUSEUM_IDS is not set +# CONFIG_MTD_NAND_GPIO is not set +CONFIG_MTD_NAND_IDS=y +# CONFIG_MTD_NAND_DISKONCHIP is not set +# CONFIG_MTD_NAND_PLATFORM is not set +# CONFIG_MTD_ONENAND is not set + +# +# LPDDR flash memory drivers +# +# CONFIG_MTD_LPDDR is not set + +# +# UBI - Unsorted block images +# +CONFIG_MTD_UBI=y +CONFIG_MTD_UBI_WL_THRESHOLD=4096 +CONFIG_MTD_UBI_BEB_RESERVE=1 +CONFIG_MTD_UBI_GLUEBI=y + +# +# UBI debugging options +# +# CONFIG_MTD_UBI_DEBUG is not set +# CONFIG_PARPORT is not set +CONFIG_BLK_DEV=y +# CONFIG_BLK_DEV_COW_COMMON is not set +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_CRYPTOLOOP=y +# CONFIG_BLK_DEV_NBD is not set +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_COUNT=4 +CONFIG_BLK_DEV_RAM_SIZE=6144 +# CONFIG_BLK_DEV_XIP is not set +# CONFIG_CDROM_PKTCDVD is not set +# CONFIG_ATA_OVER_ETH is not set +# CONFIG_MISC_DEVICES is not set +CONFIG_HAVE_IDE=y +# CONFIG_IDE is not set + +# +# SCSI device support +# +# CONFIG_RAID_ATTRS is not set +CONFIG_SCSI=y +CONFIG_SCSI_DMA=y +# CONFIG_SCSI_TGT is not set +# CONFIG_SCSI_NETLINK is not set +CONFIG_SCSI_PROC_FS=y + +# +# SCSI support type (disk, tape, CD-ROM) +# +CONFIG_BLK_DEV_SD=y +# CONFIG_CHR_DEV_ST is not set +# CONFIG_CHR_DEV_OSST is not set +# CONFIG_BLK_DEV_SR is not set +CONFIG_CHR_DEV_SG=y +# CONFIG_CHR_DEV_SCH is not set + +# +# Some SCSI devices (e.g. CD jukebox) support multiple LUNs +# +# CONFIG_SCSI_MULTI_LUN is not set +# CONFIG_SCSI_CONSTANTS is not set +# CONFIG_SCSI_LOGGING is not set +# CONFIG_SCSI_SCAN_ASYNC is not set +CONFIG_SCSI_WAIT_SCAN=m + +# +# SCSI Transports +# +# CONFIG_SCSI_SPI_ATTRS is not set +# CONFIG_SCSI_FC_ATTRS is not set +# CONFIG_SCSI_ISCSI_ATTRS is not set +# CONFIG_SCSI_SAS_LIBSAS is not set +# CONFIG_SCSI_SRP_ATTRS is not set +# CONFIG_SCSI_LOWLEVEL is not set +# CONFIG_SCSI_DH is not set +# CONFIG_SCSI_OSD_INITIATOR is not set +# CONFIG_ATA is not set +# CONFIG_MD is not set +# CONFIG_NETDEVICES is not set +# CONFIG_ISDN is not set + +# +# Input device support +# +CONFIG_INPUT=y +# CONFIG_INPUT_FF_MEMLESS is not set +CONFIG_INPUT_POLLDEV=y + +# +# Userland interfaces +# +CONFIG_INPUT_MOUSEDEV=y +CONFIG_INPUT_MOUSEDEV_PSAUX=y +CONFIG_INPUT_MOUSEDEV_SCREEN_X=320 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=240 +# CONFIG_INPUT_JOYDEV is not set +CONFIG_INPUT_EVDEV=y +# CONFIG_INPUT_EVBUG is not set + +# +# Input Device Drivers +# +CONFIG_INPUT_KEYBOARD=y +# CONFIG_KEYBOARD_ATKBD is not set +# CONFIG_KEYBOARD_SUNKBD is not set +# CONFIG_KEYBOARD_LKKBD is not set +# CONFIG_KEYBOARD_XTKBD is not set +# CONFIG_KEYBOARD_NEWTON is not set +# CONFIG_KEYBOARD_STOWAWAY is not set +# CONFIG_KEYBOARD_GPIO is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_INPUT_JOYSTICK is not set +# CONFIG_INPUT_TABLET is not set +CONFIG_INPUT_TOUCHSCREEN=y +# CONFIG_TOUCHSCREEN_AD7879 is not set +# CONFIG_TOUCHSCREEN_FUJITSU is not set +# CONFIG_TOUCHSCREEN_GUNZE is not set +# CONFIG_TOUCHSCREEN_ELO is not set +# CONFIG_TOUCHSCREEN_WACOM_W8001 is not set +# CONFIG_TOUCHSCREEN_MTOUCH is not set +# CONFIG_TOUCHSCREEN_INEXIO is not set +# CONFIG_TOUCHSCREEN_MK712 is not set +# CONFIG_TOUCHSCREEN_PENMOUNT is not set +# CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set +# CONFIG_TOUCHSCREEN_TOUCHWIN is not set +# CONFIG_TOUCHSCREEN_TOUCHIT213 is not set +CONFIG_INPUT_MISC=y +# CONFIG_INPUT_UINPUT is not set +# CONFIG_INPUT_GPIO_ROTARY_ENCODER is not set + +# +# Hardware I/O ports +# +CONFIG_SERIO=y +# CONFIG_SERIO_SERPORT is not set +# CONFIG_SERIO_LIBPS2 is not set +# CONFIG_SERIO_RAW is not set +# CONFIG_GAMEPORT is not set + +# +# Character devices +# +CONFIG_VT=y +CONFIG_CONSOLE_TRANSLATIONS=y +CONFIG_VT_CONSOLE=y +CONFIG_HW_CONSOLE=y +CONFIG_VT_HW_CONSOLE_BINDING=y +CONFIG_DEVKMEM=y +# CONFIG_SERIAL_NONSTANDARD is not set + +# +# Serial drivers +# +# CONFIG_SERIAL_8250 is not set + +# +# Non-8250 serial port support +# +CONFIG_UNIX98_PTYS=y +# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set +# CONFIG_LEGACY_PTYS is not set +# CONFIG_IPMI_HANDLER is not set +CONFIG_HW_RANDOM=y +# CONFIG_HW_RANDOM_TIMERIOMEM is not set +# CONFIG_R3964 is not set +# CONFIG_RAW_DRIVER is not set +# CONFIG_TCG_TPM is not set +# CONFIG_I2C is not set +# CONFIG_SPI is not set +CONFIG_ARCH_REQUIRE_GPIOLIB=y +CONFIG_GPIOLIB=y +CONFIG_DEBUG_GPIO=y +CONFIG_GPIO_SYSFS=y + +# +# Memory mapped GPIO expanders: +# + +# +# I2C GPIO expanders: +# + +# +# PCI GPIO expanders: +# + +# +# SPI GPIO expanders: +# +# CONFIG_W1 is not set +# CONFIG_POWER_SUPPLY is not set +# CONFIG_HWMON is not set +# CONFIG_THERMAL is not set +# CONFIG_THERMAL_HWMON is not set +# CONFIG_WATCHDOG is not set +CONFIG_SSB_POSSIBLE=y + +# +# Sonics Silicon Backplane +# +# CONFIG_SSB is not set + +# +# Multifunction device drivers +# +# CONFIG_MFD_CORE is not set +# CONFIG_MFD_SM501 is not set +# CONFIG_MFD_ASIC3 is not set +# CONFIG_HTC_EGPIO is not set +# CONFIG_HTC_PASIC3 is not set +# CONFIG_MFD_TMIO is not set +# CONFIG_MFD_T7L66XB is not set +# CONFIG_MFD_TC6387XB is not set +# CONFIG_MFD_TC6393XB is not set + +# +# Multimedia devices +# + +# +# Multimedia core support +# +CONFIG_VIDEO_DEV=y +CONFIG_VIDEO_V4L2_COMMON=y +# CONFIG_VIDEO_ALLOW_V4L1 is not set +# CONFIG_VIDEO_V4L1_COMPAT is not set +# CONFIG_DVB_CORE is not set +CONFIG_VIDEO_MEDIA=y + +# +# Multimedia drivers +# +# CONFIG_MEDIA_ATTACH is not set +CONFIG_VIDEO_V4L2=y +CONFIG_VIDEO_CAPTURE_DRIVERS=y +# CONFIG_VIDEO_ADV_DEBUG is not set +# CONFIG_VIDEO_FIXED_MINOR_RANGES is not set +# CONFIG_VIDEO_HELPER_CHIPS_AUTO is not set + +# +# Encoders/decoders and other helper chips +# + +# +# Audio decoders +# + +# +# RDS decoders +# + +# +# Video decoders +# + +# +# Video and audio decoders +# + +# +# MPEG video encoders +# +# CONFIG_VIDEO_CX2341X is not set + +# +# Video encoders +# + +# +# Video improvement chips +# +# CONFIG_VIDEO_VIVI is not set +# CONFIG_SOC_CAMERA is not set +# CONFIG_RADIO_ADAPTERS is not set +# CONFIG_DAB is not set + +# +# Graphics support +# +# CONFIG_VGASTATE is not set +# CONFIG_VIDEO_OUTPUT_CONTROL is not set +CONFIG_FB=y +# CONFIG_FIRMWARE_EDID is not set +# CONFIG_FB_DDC is not set +# CONFIG_FB_BOOT_VESA_SUPPORT is not set +# CONFIG_FB_CFB_FILLRECT is not set +# CONFIG_FB_CFB_COPYAREA is not set +# CONFIG_FB_CFB_IMAGEBLIT is not set +# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set +# CONFIG_FB_SYS_FILLRECT is not set +# CONFIG_FB_SYS_COPYAREA is not set +# CONFIG_FB_SYS_IMAGEBLIT is not set +# CONFIG_FB_FOREIGN_ENDIAN is not set +# CONFIG_FB_SYS_FOPS is not set +# CONFIG_FB_SVGALIB is not set +# CONFIG_FB_MACMODES is not set +# CONFIG_FB_BACKLIGHT is not set +# CONFIG_FB_MODE_HELPERS is not set +# CONFIG_FB_TILEBLITTING is not set + +# +# Frame buffer hardware drivers +# +# CONFIG_FB_S1D13XXX is not set +# CONFIG_FB_VIRTUAL is not set +# CONFIG_FB_METRONOME is not set +# CONFIG_FB_MB862XX is not set +# CONFIG_FB_BROADSHEET is not set +CONFIG_BACKLIGHT_LCD_SUPPORT=y +CONFIG_LCD_CLASS_DEVICE=y +# CONFIG_LCD_ILI9320 is not set +# CONFIG_LCD_PLATFORM is not set +CONFIG_BACKLIGHT_CLASS_DEVICE=y +CONFIG_BACKLIGHT_GENERIC=y + +# +# Display device support +# +# CONFIG_DISPLAY_SUPPORT is not set + +# +# Console display driver support +# +# CONFIG_VGA_CONSOLE is not set +CONFIG_DUMMY_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE=y +# CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY is not set +# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set +# CONFIG_FONTS is not set +CONFIG_FONT_8x8=y +CONFIG_FONT_8x16=y +CONFIG_LOGO=y +CONFIG_LOGO_LINUX_MONO=y +CONFIG_LOGO_LINUX_VGA16=y +CONFIG_LOGO_LINUX_CLUT224=y +# CONFIG_SOUND is not set +# CONFIG_HID_SUPPORT is not set +# CONFIG_USB_SUPPORT is not set +CONFIG_USB_ARCH_HAS_EHCI=y +# CONFIG_MMC is not set +# CONFIG_MEMSTICK is not set +# CONFIG_ACCESSIBILITY is not set +# CONFIG_NEW_LEDS is not set +CONFIG_RTC_LIB=y +# CONFIG_RTC_CLASS is not set +# CONFIG_DMADEVICES is not set +# CONFIG_AUXDISPLAY is not set +# CONFIG_REGULATOR is not set +# CONFIG_UIO is not set +# CONFIG_STAGING is not set + +# +# File systems +# +# CONFIG_EXT2_FS is not set +# CONFIG_EXT3_FS is not set +# CONFIG_EXT4_FS is not set +# CONFIG_REISERFS_FS is not set +# CONFIG_JFS_FS is not set +# CONFIG_FS_POSIX_ACL is not set +CONFIG_FILE_LOCKING=y +# CONFIG_XFS_FS is not set +# CONFIG_GFS2_FS is not set +# CONFIG_OCFS2_FS is not set +# CONFIG_BTRFS_FS is not set +# CONFIG_DNOTIFY is not set +# CONFIG_INOTIFY is not set +# CONFIG_QUOTA is not set +# CONFIG_AUTOFS_FS is not set +# CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set + +# +# Caches +# +# CONFIG_FSCACHE is not set + +# +# CD-ROM/DVD Filesystems +# +# CONFIG_ISO9660_FS is not set +# CONFIG_UDF_FS is not set + +# +# DOS/FAT/NT Filesystems +# +# CONFIG_MSDOS_FS is not set +# CONFIG_VFAT_FS is not set +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_SYSCTL=y +CONFIG_PROC_PAGE_MONITOR=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +# CONFIG_TMPFS_POSIX_ACL is not set +# CONFIG_HUGETLB_PAGE is not set +CONFIG_CONFIGFS_FS=m +# CONFIG_MISC_FILESYSTEMS is not set +# CONFIG_NETWORK_FILESYSTEMS is not set + +# +# Partition Types +# +# CONFIG_PARTITION_ADVANCED is not set +CONFIG_MSDOS_PARTITION=y +# CONFIG_NLS is not set +# CONFIG_DLM is not set + +# +# Kernel hacking +# +# CONFIG_PRINTK_TIME is not set +CONFIG_ENABLE_WARN_DEPRECATED=y +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_FRAME_WARN=1024 +# CONFIG_MAGIC_SYSRQ is not set +# CONFIG_UNUSED_SYMBOLS is not set +CONFIG_DEBUG_FS=y +# CONFIG_HEADERS_CHECK is not set +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_SHIRQ=y +CONFIG_DETECT_SOFTLOCKUP=y +# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 +CONFIG_DETECT_HUNG_TASK=y +# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set +CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0 +# CONFIG_SCHED_DEBUG is not set +# CONFIG_SCHEDSTATS is not set +# CONFIG_TIMER_STATS is not set +CONFIG_DEBUG_OBJECTS=y +CONFIG_DEBUG_OBJECTS_SELFTEST=y +CONFIG_DEBUG_OBJECTS_FREE=y +CONFIG_DEBUG_OBJECTS_TIMERS=y +CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT=1 +CONFIG_DEBUG_SLAB=y +CONFIG_DEBUG_SLAB_LEAK=y +CONFIG_DEBUG_PREEMPT=y +CONFIG_DEBUG_RT_MUTEXES=y +CONFIG_DEBUG_PI_LIST=y +# CONFIG_RT_MUTEX_TESTER is not set +CONFIG_DEBUG_SPINLOCK=y +CONFIG_DEBUG_MUTEXES=y +CONFIG_DEBUG_LOCK_ALLOC=y +CONFIG_PROVE_LOCKING=y +CONFIG_LOCKDEP=y +# CONFIG_LOCK_STAT is not set +# CONFIG_DEBUG_LOCKDEP is not set +CONFIG_TRACE_IRQFLAGS=y +CONFIG_DEBUG_SPINLOCK_SLEEP=y +# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set +CONFIG_STACKTRACE=y +CONFIG_DEBUG_KOBJECT=y +# CONFIG_DEBUG_HIGHMEM is not set +# CONFIG_DEBUG_BUGVERBOSE is not set +CONFIG_DEBUG_INFO=y +# CONFIG_DEBUG_VM is not set +# CONFIG_DEBUG_WRITECOUNT is not set +# CONFIG_DEBUG_MEMORY_INIT is not set +# CONFIG_DEBUG_LIST is not set +# CONFIG_DEBUG_SG is not set +# CONFIG_DEBUG_NOTIFIERS is not set +CONFIG_FRAME_POINTER=y +# CONFIG_BOOT_PRINTK_DELAY is not set +# CONFIG_RCU_TORTURE_TEST is not set +# CONFIG_RCU_CPU_STALL_DETECTOR is not set +# CONFIG_BACKTRACE_SELF_TEST is not set +# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set +# CONFIG_FAULT_INJECTION is not set +# CONFIG_LATENCYTOP is not set +CONFIG_SYSCTL_SYSCALL_CHECK=y +# CONFIG_PAGE_POISONING is not set +CONFIG_NOP_TRACER=y +CONFIG_HAVE_FUNCTION_TRACER=y +CONFIG_RING_BUFFER=y +CONFIG_TRACING=y +CONFIG_TRACING_SUPPORT=y + +# +# Tracers +# +CONFIG_FUNCTION_TRACER=y +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_PREEMPT_TRACER is not set +# CONFIG_SCHED_TRACER is not set +CONFIG_CONTEXT_SWITCH_TRACER=y +# CONFIG_EVENT_TRACER is not set +CONFIG_BOOT_TRACER=y +# CONFIG_TRACE_BRANCH_PROFILING is not set +CONFIG_STACK_TRACER=y +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +CONFIG_BLK_DEV_IO_TRACE=y +# CONFIG_FTRACE_STARTUP_TEST is not set +# CONFIG_DYNAMIC_DEBUG is not set +# CONFIG_SAMPLES is not set +CONFIG_HAVE_ARCH_KGDB=y +# CONFIG_KGDB is not set +CONFIG_ARM_UNWIND=y +# CONFIG_DEBUG_USER is not set +# CONFIG_DEBUG_ERRORS is not set +# CONFIG_DEBUG_STACK_USAGE is not set +# CONFIG_DEBUG_LL is not set + +# +# Security options +# +CONFIG_KEYS=y +CONFIG_KEYS_DEBUG_PROC_KEYS=y +CONFIG_SECURITY=y +# CONFIG_SECURITYFS is not set +# CONFIG_SECURITY_NETWORK is not set +# CONFIG_SECURITY_PATH is not set +# CONFIG_SECURITY_FILE_CAPABILITIES is not set +CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=0 +# CONFIG_SECURITY_TOMOYO is not set +CONFIG_CRYPTO=y + +# +# Crypto core or helper +# +# CONFIG_CRYPTO_FIPS is not set +CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_ALGAPI2=y +CONFIG_CRYPTO_AEAD2=y +CONFIG_CRYPTO_BLKCIPHER=y +CONFIG_CRYPTO_BLKCIPHER2=y +CONFIG_CRYPTO_HASH=y +CONFIG_CRYPTO_HASH2=y +CONFIG_CRYPTO_RNG2=y +CONFIG_CRYPTO_PCOMP=y +CONFIG_CRYPTO_MANAGER=y +CONFIG_CRYPTO_MANAGER2=y +# CONFIG_CRYPTO_GF128MUL is not set +# CONFIG_CRYPTO_NULL is not set +CONFIG_CRYPTO_WORKQUEUE=y +# CONFIG_CRYPTO_CRYPTD is not set +# CONFIG_CRYPTO_AUTHENC is not set +CONFIG_CRYPTO_TEST=m + +# +# Authenticated Encryption with Associated Data +# +# CONFIG_CRYPTO_CCM is not set +# CONFIG_CRYPTO_GCM is not set +# CONFIG_CRYPTO_SEQIV is not set + +# +# Block modes +# +CONFIG_CRYPTO_CBC=y +# CONFIG_CRYPTO_CTR is not set +# CONFIG_CRYPTO_CTS is not set +CONFIG_CRYPTO_ECB=y +# CONFIG_CRYPTO_LRW is not set +# CONFIG_CRYPTO_PCBC is not set +# CONFIG_CRYPTO_XTS is not set + +# +# Hash modes +# +CONFIG_CRYPTO_HMAC=y +# CONFIG_CRYPTO_XCBC is not set + +# +# Digest +# +# CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_MD4 is not set +CONFIG_CRYPTO_MD5=y +# CONFIG_CRYPTO_MICHAEL_MIC is not set +# CONFIG_CRYPTO_RMD128 is not set +# CONFIG_CRYPTO_RMD160 is not set +# CONFIG_CRYPTO_RMD256 is not set +# CONFIG_CRYPTO_RMD320 is not set +CONFIG_CRYPTO_SHA1=m +# CONFIG_CRYPTO_SHA256 is not set +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_TGR192 is not set +# CONFIG_CRYPTO_WP512 is not set + +# +# Ciphers +# +CONFIG_CRYPTO_AES=m +# CONFIG_CRYPTO_ANUBIS is not set +# CONFIG_CRYPTO_ARC4 is not set +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_CAMELLIA is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +CONFIG_CRYPTO_DES=y +# CONFIG_CRYPTO_FCRYPT is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_SALSA20 is not set +# CONFIG_CRYPTO_SEED is not set +# CONFIG_CRYPTO_SERPENT is not set +# CONFIG_CRYPTO_TEA is not set +# CONFIG_CRYPTO_TWOFISH is not set + +# +# Compression +# +CONFIG_CRYPTO_DEFLATE=y +# CONFIG_CRYPTO_ZLIB is not set +CONFIG_CRYPTO_LZO=y + +# +# Random Number Generation +# +# CONFIG_CRYPTO_ANSI_CPRNG is not set +CONFIG_CRYPTO_HW=y +CONFIG_BINARY_PRINTF=y + +# +# Library routines +# +CONFIG_BITREVERSE=y +CONFIG_GENERIC_FIND_LAST_BIT=y +CONFIG_CRC_CCITT=m +CONFIG_CRC16=y +# CONFIG_CRC_T10DIF is not set +# CONFIG_CRC_ITU_T is not set +CONFIG_CRC32=y +# CONFIG_CRC7 is not set +# CONFIG_LIBCRC32C is not set +CONFIG_ZLIB_INFLATE=y +CONFIG_ZLIB_DEFLATE=y +CONFIG_LZO_COMPRESS=y +CONFIG_LZO_DECOMPRESS=y +CONFIG_DECOMPRESS_GZIP=y +CONFIG_HAS_IOMEM=y +CONFIG_HAS_IOPORT=y +CONFIG_HAS_DMA=y +CONFIG_NLATTR=y diff --git a/arch/arm/configs/stmp37xx_defconfig b/arch/arm/configs/stmp37xx_defconfig new file mode 100644 index 0000000..401279d --- /dev/null +++ b/arch/arm/configs/stmp37xx_defconfig @@ -0,0 +1,1002 @@ +# +# Automatically generated make config: don't edit +# Linux kernel version: 2.6.29.1 +# Mon Apr 20 04:41:26 2009 +# +CONFIG_ARM=y +CONFIG_SYS_SUPPORTS_APM_EMULATION=y +CONFIG_GENERIC_GPIO=y +CONFIG_GENERIC_TIME=y +CONFIG_GENERIC_CLOCKEVENTS=y +CONFIG_MMU=y +# CONFIG_NO_IOPORT is not set +CONFIG_GENERIC_HARDIRQS=y +CONFIG_STACKTRACE_SUPPORT=y +CONFIG_HAVE_LATENCYTOP_SUPPORT=y +CONFIG_LOCKDEP_SUPPORT=y +CONFIG_TRACE_IRQFLAGS_SUPPORT=y +CONFIG_HARDIRQS_SW_RESEND=y +CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_RWSEM_GENERIC_SPINLOCK=y +# CONFIG_ARCH_HAS_ILOG2_U32 is not set +# CONFIG_ARCH_HAS_ILOG2_U64 is not set +CONFIG_GENERIC_HWEIGHT=y +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y +CONFIG_VECTORS_BASE=0xffff0000 +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" + +# +# General setup +# +CONFIG_EXPERIMENTAL=y +CONFIG_BROKEN_ON_SMP=y +CONFIG_LOCK_KERNEL=y +CONFIG_INIT_ENV_ARG_LIMIT=32 +CONFIG_LOCALVERSION="-default" +CONFIG_LOCALVERSION_AUTO=y +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +CONFIG_SYSVIPC_SYSCTL=y +CONFIG_POSIX_MQUEUE=y +CONFIG_BSD_PROCESS_ACCT=y +# CONFIG_BSD_PROCESS_ACCT_V3 is not set +# CONFIG_TASKSTATS is not set +# CONFIG_AUDIT is not set + +# +# RCU Subsystem +# +CONFIG_CLASSIC_RCU=y +# CONFIG_TREE_RCU is not set +# CONFIG_PREEMPT_RCU is not set +# CONFIG_TREE_RCU_TRACE is not set +# CONFIG_PREEMPT_RCU_TRACE is not set +# CONFIG_IKCONFIG is not set +CONFIG_LOG_BUF_SHIFT=17 +# CONFIG_GROUP_SCHED is not set +# CONFIG_CGROUPS is not set +CONFIG_SYSFS_DEPRECATED=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_RELAY=y +# CONFIG_NAMESPACES is not set +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="" +CONFIG_INITRAMFS_ROOT_UID=0 +CONFIG_INITRAMFS_ROOT_GID=0 +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SYSCTL=y +CONFIG_ANON_INODES=y +CONFIG_EMBEDDED=y +CONFIG_UID16=y +CONFIG_SYSCTL_SYSCALL=y +CONFIG_KALLSYMS=y +# CONFIG_KALLSYMS_ALL is not set +# CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_HOTPLUG=y +CONFIG_PRINTK=y +CONFIG_BUG=y +CONFIG_ELF_CORE=y +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_EPOLL=y +CONFIG_SIGNALFD=y +CONFIG_TIMERFD=y +CONFIG_EVENTFD=y +CONFIG_SHMEM=y +CONFIG_AIO=y +CONFIG_VM_EVENT_COUNTERS=y +CONFIG_COMPAT_BRK=y +CONFIG_SLAB=y +# CONFIG_SLUB is not set +# CONFIG_SLOB is not set +# CONFIG_PROFILING is not set +CONFIG_TRACEPOINTS=y +CONFIG_MARKERS=y +CONFIG_HAVE_OPROFILE=y +# CONFIG_KPROBES is not set +CONFIG_HAVE_KPROBES=y +CONFIG_HAVE_KRETPROBES=y +CONFIG_HAVE_CLK=y +CONFIG_HAVE_GENERIC_DMA_COHERENT=y +CONFIG_SLABINFO=y +CONFIG_RT_MUTEXES=y +CONFIG_BASE_SMALL=0 +CONFIG_MODULES=y +# CONFIG_MODULE_FORCE_LOAD is not set +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y +CONFIG_MODVERSIONS=y +CONFIG_MODULE_SRCVERSION_ALL=y +CONFIG_BLOCK=y +CONFIG_LBD=y +CONFIG_BLK_DEV_IO_TRACE=y +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_BLK_DEV_INTEGRITY is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +# CONFIG_DEFAULT_AS is not set +# CONFIG_DEFAULT_DEADLINE is not set +CONFIG_DEFAULT_CFQ=y +# CONFIG_DEFAULT_NOOP is not set +CONFIG_DEFAULT_IOSCHED="cfq" +# CONFIG_FREEZER is not set + +# +# System Type +# +# CONFIG_ARCH_AAEC2000 is not set +# CONFIG_ARCH_INTEGRATOR is not set +# CONFIG_ARCH_REALVIEW is not set +# CONFIG_ARCH_VERSATILE is not set +# CONFIG_ARCH_AT91 is not set +# CONFIG_ARCH_CLPS711X is not set +# CONFIG_ARCH_EBSA110 is not set +# CONFIG_ARCH_EP93XX is not set +# CONFIG_ARCH_FOOTBRIDGE is not set +# CONFIG_ARCH_NETX is not set +# CONFIG_ARCH_H720X is not set +# CONFIG_ARCH_IMX is not set +# CONFIG_ARCH_IOP13XX is not set +# CONFIG_ARCH_IOP32X is not set +# CONFIG_ARCH_IOP33X is not set +# CONFIG_ARCH_IXP23XX is not set +# CONFIG_ARCH_IXP2000 is not set +# CONFIG_ARCH_IXP4XX is not set +# CONFIG_ARCH_L7200 is not set +# CONFIG_ARCH_KIRKWOOD is not set +# CONFIG_ARCH_KS8695 is not set +# CONFIG_ARCH_NS9XXX is not set +# CONFIG_ARCH_LOKI is not set +# CONFIG_ARCH_MV78XX0 is not set +# CONFIG_ARCH_MXC is not set +# CONFIG_ARCH_ORION5X is not set +# CONFIG_ARCH_PNX4008 is not set +# CONFIG_ARCH_PXA is not set +# CONFIG_ARCH_RPC is not set +# CONFIG_ARCH_SA1100 is not set +# CONFIG_ARCH_S3C2410 is not set +# CONFIG_ARCH_S3C64XX is not set +# CONFIG_ARCH_SHARK is not set +# CONFIG_ARCH_LH7A40X is not set +# CONFIG_ARCH_DAVINCI is not set +# CONFIG_ARCH_OMAP is not set +# CONFIG_ARCH_MSM is not set +# CONFIG_ARCH_W90X900 is not set +CONFIG_ARCH_STMP3XXX=y + +# +# Freescale STMP3xxx implementations +# +CONFIG_ARCH_STMP37XX=y +# CONFIG_ARCH_STMP378X is not set +CONFIG_MACH_STMP37XX=y +# CONFIG_MACH_STMP378X is not set + +# +# Processor Type +# +CONFIG_CPU_32=y +CONFIG_CPU_ARM926T=y +CONFIG_CPU_32v5=y +CONFIG_CPU_ABRT_EV5TJ=y +CONFIG_CPU_PABRT_NOIFAR=y +CONFIG_CPU_CACHE_VIVT=y +CONFIG_CPU_COPY_V4WB=y +CONFIG_CPU_TLB_V4WBI=y +CONFIG_CPU_CP15=y +CONFIG_CPU_CP15_MMU=y + +# +# Processor Features +# +CONFIG_ARM_THUMB=y +# CONFIG_CPU_ICACHE_DISABLE is not set +# CONFIG_CPU_DCACHE_DISABLE is not set +# CONFIG_CPU_DCACHE_WRITETHROUGH is not set +# CONFIG_CPU_CACHE_ROUND_ROBIN is not set +# CONFIG_OUTER_CACHE is not set +CONFIG_COMMON_CLKDEV=y + +# +# Bus support +# +# CONFIG_PCI_SYSCALL is not set +# CONFIG_ARCH_SUPPORTS_MSI is not set +# CONFIG_PCCARD is not set + +# +# Kernel Features +# +CONFIG_TICK_ONESHOT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_GENERIC_CLOCKEVENTS_BUILD=y +CONFIG_VMSPLIT_3G=y +# CONFIG_VMSPLIT_2G is not set +# CONFIG_VMSPLIT_1G is not set +CONFIG_PAGE_OFFSET=0xC0000000 +CONFIG_PREEMPT=y +CONFIG_HZ=100 +CONFIG_AEABI=y +CONFIG_OABI_COMPAT=y +CONFIG_ARCH_FLATMEM_HAS_HOLES=y +# CONFIG_ARCH_SPARSEMEM_DEFAULT is not set +# CONFIG_ARCH_SELECT_MEMORY_MODEL is not set +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y +CONFIG_PAGEFLAGS_EXTENDED=y +CONFIG_SPLIT_PTLOCK_CPUS=4096 +# CONFIG_PHYS_ADDR_T_64BIT is not set +CONFIG_ZONE_DMA_FLAG=0 +CONFIG_VIRT_TO_BUS=y +CONFIG_UNEVICTABLE_LRU=y +CONFIG_ALIGNMENT_TRAP=y + +# +# Boot options +# +CONFIG_ZBOOT_ROM_TEXT=0x0 +CONFIG_ZBOOT_ROM_BSS=0x0 +CONFIG_CMDLINE="console=ttySDBG0,115200 mem=32M lcd_panel=lms350 rdinit=/bin/sh ignore_loglevel" +# CONFIG_XIP_KERNEL is not set +# CONFIG_KEXEC is not set + +# +# CPU Power Management +# +# CONFIG_CPU_IDLE is not set + +# +# Floating point emulation +# + +# +# At least one emulation must be selected +# +# CONFIG_FPE_NWFPE is not set +# CONFIG_FPE_FASTFPE is not set +# CONFIG_VFP is not set + +# +# Userspace binary formats +# +CONFIG_BINFMT_ELF=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_HAVE_AOUT=y +# CONFIG_BINFMT_AOUT is not set +# CONFIG_BINFMT_MISC is not set + +# +# Power management options +# +# CONFIG_PM is not set +CONFIG_ARCH_SUSPEND_POSSIBLE=y +CONFIG_NET=y + +# +# Networking options +# +CONFIG_COMPAT_NET_DEV_OPS=y +CONFIG_PACKET=y +CONFIG_PACKET_MMAP=y +CONFIG_UNIX=y +CONFIG_XFRM=y +# CONFIG_XFRM_USER is not set +# CONFIG_XFRM_SUB_POLICY is not set +# CONFIG_XFRM_MIGRATE is not set +# CONFIG_XFRM_STATISTICS is not set +# CONFIG_NET_KEY is not set +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_ASK_IP_FIB_HASH=y +# CONFIG_IP_FIB_TRIE is not set +CONFIG_IP_FIB_HASH=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +# CONFIG_IP_PNP_RARP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +# CONFIG_ARPD is not set +CONFIG_SYN_COOKIES=y +# CONFIG_INET_AH is not set +# CONFIG_INET_ESP is not set +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_XFRM_TUNNEL is not set +# CONFIG_INET_TUNNEL is not set +CONFIG_INET_XFRM_MODE_TRANSPORT=y +CONFIG_INET_XFRM_MODE_TUNNEL=y +CONFIG_INET_XFRM_MODE_BEET=y +# CONFIG_INET_LRO is not set +CONFIG_INET_DIAG=y +CONFIG_INET_TCP_DIAG=y +# CONFIG_TCP_CONG_ADVANCED is not set +CONFIG_TCP_CONG_CUBIC=y +CONFIG_DEFAULT_TCP_CONG="cubic" +# CONFIG_TCP_MD5SIG is not set +# CONFIG_IPV6 is not set +# CONFIG_NETLABEL is not set +# CONFIG_NETWORK_SECMARK is not set +# CONFIG_NETFILTER is not set +# CONFIG_IP_DCCP is not set +# CONFIG_IP_SCTP is not set +# CONFIG_TIPC is not set +# CONFIG_ATM is not set +# CONFIG_BRIDGE is not set +# CONFIG_NET_DSA is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_DECNET is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set +CONFIG_NET_SCHED=y + +# +# Queueing/Scheduling +# +# CONFIG_NET_SCH_CBQ is not set +# CONFIG_NET_SCH_HTB is not set +# CONFIG_NET_SCH_HFSC is not set +# CONFIG_NET_SCH_PRIO is not set +# CONFIG_NET_SCH_MULTIQ is not set +# CONFIG_NET_SCH_RED is not set +# CONFIG_NET_SCH_SFQ is not set +# CONFIG_NET_SCH_TEQL is not set +# CONFIG_NET_SCH_TBF is not set +# CONFIG_NET_SCH_GRED is not set +# CONFIG_NET_SCH_DSMARK is not set +# CONFIG_NET_SCH_NETEM is not set +# CONFIG_NET_SCH_DRR is not set + +# +# Classification +# +# CONFIG_NET_CLS_BASIC is not set +# CONFIG_NET_CLS_TCINDEX is not set +# CONFIG_NET_CLS_ROUTE4 is not set +# CONFIG_NET_CLS_FW is not set +# CONFIG_NET_CLS_U32 is not set +# CONFIG_NET_CLS_RSVP is not set +# CONFIG_NET_CLS_RSVP6 is not set +# CONFIG_NET_CLS_FLOW is not set +# CONFIG_NET_EMATCH is not set +# CONFIG_NET_CLS_ACT is not set +CONFIG_NET_SCH_FIFO=y +# CONFIG_DCB is not set + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +# CONFIG_HAMRADIO is not set +# CONFIG_CAN is not set +# CONFIG_IRDA is not set +# CONFIG_BT is not set +# CONFIG_AF_RXRPC is not set +# CONFIG_PHONET is not set +CONFIG_FIB_RULES=y +# CONFIG_WIRELESS is not set +# CONFIG_WIMAX is not set +# CONFIG_RFKILL is not set +# CONFIG_NET_9P is not set + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +# CONFIG_STANDALONE is not set +CONFIG_PREVENT_FIRMWARE_BUILD=y +CONFIG_FW_LOADER=y +CONFIG_FIRMWARE_IN_KERNEL=y +CONFIG_EXTRA_FIRMWARE="" +# CONFIG_DEBUG_DRIVER is not set +# CONFIG_DEBUG_DEVRES is not set +# CONFIG_SYS_HYPERVISOR is not set +# CONFIG_CONNECTOR is not set +# CONFIG_MTD is not set +# CONFIG_PARPORT is not set +CONFIG_BLK_DEV=y +# CONFIG_BLK_DEV_COW_COMMON is not set +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_CRYPTOLOOP=y +# CONFIG_BLK_DEV_NBD is not set +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_COUNT=4 +CONFIG_BLK_DEV_RAM_SIZE=6144 +# CONFIG_BLK_DEV_XIP is not set +# CONFIG_CDROM_PKTCDVD is not set +# CONFIG_ATA_OVER_ETH is not set +# CONFIG_MISC_DEVICES is not set +CONFIG_HAVE_IDE=y +# CONFIG_IDE is not set + +# +# SCSI device support +# +# CONFIG_RAID_ATTRS is not set +CONFIG_SCSI=y +CONFIG_SCSI_DMA=y +# CONFIG_SCSI_TGT is not set +# CONFIG_SCSI_NETLINK is not set +CONFIG_SCSI_PROC_FS=y + +# +# SCSI support type (disk, tape, CD-ROM) +# +CONFIG_BLK_DEV_SD=y +# CONFIG_CHR_DEV_ST is not set +# CONFIG_CHR_DEV_OSST is not set +# CONFIG_BLK_DEV_SR is not set +CONFIG_CHR_DEV_SG=y +# CONFIG_CHR_DEV_SCH is not set + +# +# Some SCSI devices (e.g. CD jukebox) support multiple LUNs +# +# CONFIG_SCSI_MULTI_LUN is not set +# CONFIG_SCSI_CONSTANTS is not set +# CONFIG_SCSI_LOGGING is not set +# CONFIG_SCSI_SCAN_ASYNC is not set +CONFIG_SCSI_WAIT_SCAN=m + +# +# SCSI Transports +# +# CONFIG_SCSI_SPI_ATTRS is not set +# CONFIG_SCSI_FC_ATTRS is not set +# CONFIG_SCSI_ISCSI_ATTRS is not set +# CONFIG_SCSI_SAS_LIBSAS is not set +# CONFIG_SCSI_SRP_ATTRS is not set +# CONFIG_SCSI_LOWLEVEL is not set +# CONFIG_SCSI_DH is not set +# CONFIG_ATA is not set +# CONFIG_MD is not set +# CONFIG_NETDEVICES is not set +# CONFIG_ISDN is not set + +# +# Input device support +# +CONFIG_INPUT=y +# CONFIG_INPUT_FF_MEMLESS is not set +CONFIG_INPUT_POLLDEV=y + +# +# Userland interfaces +# +CONFIG_INPUT_MOUSEDEV=y +CONFIG_INPUT_MOUSEDEV_PSAUX=y +CONFIG_INPUT_MOUSEDEV_SCREEN_X=320 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=240 +# CONFIG_INPUT_JOYDEV is not set +CONFIG_INPUT_EVDEV=y +# CONFIG_INPUT_EVBUG is not set + +# +# Input Device Drivers +# +CONFIG_INPUT_KEYBOARD=y +# CONFIG_KEYBOARD_ATKBD is not set +# CONFIG_KEYBOARD_SUNKBD is not set +# CONFIG_KEYBOARD_LKKBD is not set +# CONFIG_KEYBOARD_XTKBD is not set +# CONFIG_KEYBOARD_NEWTON is not set +# CONFIG_KEYBOARD_STOWAWAY is not set +# CONFIG_KEYBOARD_GPIO is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_INPUT_JOYSTICK is not set +# CONFIG_INPUT_TABLET is not set +CONFIG_INPUT_TOUCHSCREEN=y +# CONFIG_TOUCHSCREEN_FUJITSU is not set +# CONFIG_TOUCHSCREEN_GUNZE is not set +# CONFIG_TOUCHSCREEN_ELO is not set +# CONFIG_TOUCHSCREEN_WACOM_W8001 is not set +# CONFIG_TOUCHSCREEN_MTOUCH is not set +# CONFIG_TOUCHSCREEN_INEXIO is not set +# CONFIG_TOUCHSCREEN_MK712 is not set +# CONFIG_TOUCHSCREEN_PENMOUNT is not set +# CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set +# CONFIG_TOUCHSCREEN_TOUCHWIN is not set +# CONFIG_TOUCHSCREEN_TOUCHIT213 is not set +CONFIG_INPUT_MISC=y +# CONFIG_INPUT_UINPUT is not set + +# +# Hardware I/O ports +# +CONFIG_SERIO=y +# CONFIG_SERIO_SERPORT is not set +# CONFIG_SERIO_LIBPS2 is not set +# CONFIG_SERIO_RAW is not set +# CONFIG_GAMEPORT is not set + +# +# Character devices +# +CONFIG_VT=y +CONFIG_CONSOLE_TRANSLATIONS=y +CONFIG_VT_CONSOLE=y +CONFIG_HW_CONSOLE=y +CONFIG_VT_HW_CONSOLE_BINDING=y +CONFIG_DEVKMEM=y +# CONFIG_SERIAL_NONSTANDARD is not set + +# +# Serial drivers +# +# CONFIG_SERIAL_8250 is not set + +# +# Non-8250 serial port support +# +# CONFIG_SERIAL_STMP_DBG is not set +CONFIG_UNIX98_PTYS=y +# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set +# CONFIG_LEGACY_PTYS is not set +# CONFIG_IPMI_HANDLER is not set +CONFIG_HW_RANDOM=y +# CONFIG_R3964 is not set +# CONFIG_RAW_DRIVER is not set +# CONFIG_TCG_TPM is not set +# CONFIG_I2C is not set +# CONFIG_SPI is not set +CONFIG_ARCH_REQUIRE_GPIOLIB=y +CONFIG_GPIOLIB=y +CONFIG_DEBUG_GPIO=y +CONFIG_GPIO_SYSFS=y + +# +# Memory mapped GPIO expanders: +# + +# +# I2C GPIO expanders: +# + +# +# PCI GPIO expanders: +# + +# +# SPI GPIO expanders: +# +# CONFIG_W1 is not set +# CONFIG_POWER_SUPPLY is not set +# CONFIG_HWMON is not set +# CONFIG_THERMAL is not set +# CONFIG_THERMAL_HWMON is not set +# CONFIG_WATCHDOG is not set +CONFIG_SSB_POSSIBLE=y + +# +# Sonics Silicon Backplane +# +# CONFIG_SSB is not set + +# +# Multifunction device drivers +# +# CONFIG_MFD_CORE is not set +# CONFIG_MFD_SM501 is not set +# CONFIG_MFD_ASIC3 is not set +# CONFIG_HTC_EGPIO is not set +# CONFIG_HTC_PASIC3 is not set +# CONFIG_MFD_TMIO is not set +# CONFIG_MFD_T7L66XB is not set +# CONFIG_MFD_TC6387XB is not set +# CONFIG_MFD_TC6393XB is not set + +# +# Multimedia devices +# + +# +# Multimedia core support +# +CONFIG_VIDEO_DEV=y +CONFIG_VIDEO_V4L2_COMMON=y +# CONFIG_VIDEO_ALLOW_V4L1 is not set +# CONFIG_VIDEO_V4L1_COMPAT is not set +# CONFIG_DVB_CORE is not set +CONFIG_VIDEO_MEDIA=y + +# +# Multimedia drivers +# +# CONFIG_MEDIA_ATTACH is not set +CONFIG_VIDEO_V4L2=y +CONFIG_VIDEO_CAPTURE_DRIVERS=y +# CONFIG_VIDEO_ADV_DEBUG is not set +# CONFIG_VIDEO_FIXED_MINOR_RANGES is not set +# CONFIG_VIDEO_HELPER_CHIPS_AUTO is not set + +# +# Encoders/decoders and other helper chips +# + +# +# Audio decoders +# + +# +# Video decoders +# + +# +# Video and audio decoders +# + +# +# MPEG video encoders +# +# CONFIG_VIDEO_CX2341X is not set + +# +# Video encoders +# + +# +# Video improvement chips +# +# CONFIG_VIDEO_VIVI is not set +# CONFIG_SOC_CAMERA is not set +# CONFIG_RADIO_ADAPTERS is not set +# CONFIG_DAB is not set + +# +# Graphics support +# +# CONFIG_VGASTATE is not set +# CONFIG_VIDEO_OUTPUT_CONTROL is not set +CONFIG_FB=y +# CONFIG_FIRMWARE_EDID is not set +# CONFIG_FB_DDC is not set +# CONFIG_FB_BOOT_VESA_SUPPORT is not set +# CONFIG_FB_CFB_FILLRECT is not set +# CONFIG_FB_CFB_COPYAREA is not set +# CONFIG_FB_CFB_IMAGEBLIT is not set +# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set +# CONFIG_FB_SYS_FILLRECT is not set +# CONFIG_FB_SYS_COPYAREA is not set +# CONFIG_FB_SYS_IMAGEBLIT is not set +# CONFIG_FB_FOREIGN_ENDIAN is not set +# CONFIG_FB_SYS_FOPS is not set +# CONFIG_FB_SVGALIB is not set +# CONFIG_FB_MACMODES is not set +# CONFIG_FB_BACKLIGHT is not set +# CONFIG_FB_MODE_HELPERS is not set +# CONFIG_FB_TILEBLITTING is not set + +# +# Frame buffer hardware drivers +# +# CONFIG_FB_S1D13XXX is not set +# CONFIG_FB_VIRTUAL is not set +# CONFIG_FB_METRONOME is not set +# CONFIG_FB_MB862XX is not set +CONFIG_BACKLIGHT_LCD_SUPPORT=y +CONFIG_LCD_CLASS_DEVICE=y +# CONFIG_LCD_ILI9320 is not set +# CONFIG_LCD_PLATFORM is not set +CONFIG_BACKLIGHT_CLASS_DEVICE=y +CONFIG_BACKLIGHT_GENERIC=y + +# +# Display device support +# +# CONFIG_DISPLAY_SUPPORT is not set + +# +# Console display driver support +# +# CONFIG_VGA_CONSOLE is not set +CONFIG_DUMMY_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE=y +# CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY is not set +# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set +# CONFIG_FONTS is not set +CONFIG_FONT_8x8=y +CONFIG_FONT_8x16=y +CONFIG_LOGO=y +CONFIG_LOGO_LINUX_MONO=y +CONFIG_LOGO_LINUX_VGA16=y +CONFIG_LOGO_LINUX_CLUT224=y +# CONFIG_SOUND is not set +# CONFIG_HID_SUPPORT is not set +# CONFIG_USB_SUPPORT is not set +CONFIG_USB_ARCH_HAS_EHCI=y +# CONFIG_MMC is not set +# CONFIG_MEMSTICK is not set +# CONFIG_ACCESSIBILITY is not set +# CONFIG_NEW_LEDS is not set +CONFIG_RTC_LIB=y +# CONFIG_RTC_CLASS is not set +# CONFIG_DMADEVICES is not set +# CONFIG_REGULATOR is not set +# CONFIG_UIO is not set +# CONFIG_STAGING is not set + +# +# File systems +# +# CONFIG_EXT2_FS is not set +# CONFIG_EXT3_FS is not set +# CONFIG_EXT4_FS is not set +# CONFIG_REISERFS_FS is not set +# CONFIG_JFS_FS is not set +# CONFIG_FS_POSIX_ACL is not set +CONFIG_FILE_LOCKING=y +# CONFIG_XFS_FS is not set +# CONFIG_GFS2_FS is not set +# CONFIG_OCFS2_FS is not set +# CONFIG_BTRFS_FS is not set +# CONFIG_DNOTIFY is not set +# CONFIG_INOTIFY is not set +# CONFIG_QUOTA is not set +# CONFIG_AUTOFS_FS is not set +# CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set + +# +# CD-ROM/DVD Filesystems +# +# CONFIG_ISO9660_FS is not set +# CONFIG_UDF_FS is not set + +# +# DOS/FAT/NT Filesystems +# +# CONFIG_MSDOS_FS is not set +# CONFIG_VFAT_FS is not set +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_SYSCTL=y +CONFIG_PROC_PAGE_MONITOR=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +# CONFIG_TMPFS_POSIX_ACL is not set +# CONFIG_HUGETLB_PAGE is not set +CONFIG_CONFIGFS_FS=m +# CONFIG_MISC_FILESYSTEMS is not set +# CONFIG_NETWORK_FILESYSTEMS is not set + +# +# Partition Types +# +# CONFIG_PARTITION_ADVANCED is not set +CONFIG_MSDOS_PARTITION=y +# CONFIG_NLS is not set +# CONFIG_DLM is not set + +# +# Kernel hacking +# +# CONFIG_PRINTK_TIME is not set +CONFIG_ENABLE_WARN_DEPRECATED=y +# CONFIG_ENABLE_MUST_CHECK is not set +CONFIG_FRAME_WARN=1024 +# CONFIG_MAGIC_SYSRQ is not set +# CONFIG_UNUSED_SYMBOLS is not set +CONFIG_DEBUG_FS=y +# CONFIG_HEADERS_CHECK is not set +CONFIG_DEBUG_SECTION_MISMATCH=y +CONFIG_DEBUG_KERNEL=y +# CONFIG_DEBUG_SHIRQ is not set +CONFIG_DETECT_SOFTLOCKUP=y +# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 +CONFIG_SCHED_DEBUG=y +# CONFIG_SCHEDSTATS is not set +# CONFIG_TIMER_STATS is not set +# CONFIG_DEBUG_OBJECTS is not set +# CONFIG_DEBUG_SLAB is not set +CONFIG_DEBUG_PREEMPT=y +# CONFIG_DEBUG_RT_MUTEXES is not set +# CONFIG_RT_MUTEX_TESTER is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_MUTEXES is not set +# CONFIG_DEBUG_LOCK_ALLOC is not set +# CONFIG_PROVE_LOCKING is not set +# CONFIG_LOCK_STAT is not set +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set +# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set +CONFIG_STACKTRACE=y +# CONFIG_DEBUG_KOBJECT is not set +# CONFIG_DEBUG_BUGVERBOSE is not set +# CONFIG_DEBUG_INFO is not set +# CONFIG_DEBUG_VM is not set +# CONFIG_DEBUG_WRITECOUNT is not set +# CONFIG_DEBUG_MEMORY_INIT is not set +# CONFIG_DEBUG_LIST is not set +# CONFIG_DEBUG_SG is not set +# CONFIG_DEBUG_NOTIFIERS is not set +CONFIG_FRAME_POINTER=y +# CONFIG_BOOT_PRINTK_DELAY is not set +# CONFIG_RCU_TORTURE_TEST is not set +# CONFIG_RCU_CPU_STALL_DETECTOR is not set +# CONFIG_BACKTRACE_SELF_TEST is not set +# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set +# CONFIG_FAULT_INJECTION is not set +# CONFIG_LATENCYTOP is not set +CONFIG_SYSCTL_SYSCALL_CHECK=y +CONFIG_NOP_TRACER=y +CONFIG_HAVE_FUNCTION_TRACER=y +CONFIG_RING_BUFFER=y +CONFIG_TRACING=y + +# +# Tracers +# +CONFIG_FUNCTION_TRACER=y +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_PREEMPT_TRACER is not set +# CONFIG_SCHED_TRACER is not set +CONFIG_CONTEXT_SWITCH_TRACER=y +CONFIG_BOOT_TRACER=y +# CONFIG_TRACE_BRANCH_PROFILING is not set +CONFIG_STACK_TRACER=y +# CONFIG_DYNAMIC_PRINTK_DEBUG is not set +# CONFIG_SAMPLES is not set +CONFIG_HAVE_ARCH_KGDB=y +# CONFIG_KGDB is not set +# CONFIG_DEBUG_USER is not set +# CONFIG_DEBUG_ERRORS is not set +# CONFIG_DEBUG_STACK_USAGE is not set +CONFIG_DEBUG_LL=y +# CONFIG_DEBUG_ICEDCC is not set + +# +# Security options +# +CONFIG_KEYS=y +CONFIG_KEYS_DEBUG_PROC_KEYS=y +CONFIG_SECURITY=y +# CONFIG_SECURITYFS is not set +# CONFIG_SECURITY_NETWORK is not set +# CONFIG_SECURITY_PATH is not set +# CONFIG_SECURITY_FILE_CAPABILITIES is not set +CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=0 +CONFIG_CRYPTO=y + +# +# Crypto core or helper +# +# CONFIG_CRYPTO_FIPS is not set +CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_ALGAPI2=y +CONFIG_CRYPTO_AEAD2=y +CONFIG_CRYPTO_BLKCIPHER=y +CONFIG_CRYPTO_BLKCIPHER2=y +CONFIG_CRYPTO_HASH=y +CONFIG_CRYPTO_HASH2=y +CONFIG_CRYPTO_RNG2=y +CONFIG_CRYPTO_MANAGER=y +CONFIG_CRYPTO_MANAGER2=y +# CONFIG_CRYPTO_GF128MUL is not set +# CONFIG_CRYPTO_NULL is not set +# CONFIG_CRYPTO_CRYPTD is not set +# CONFIG_CRYPTO_AUTHENC is not set +CONFIG_CRYPTO_TEST=m + +# +# Authenticated Encryption with Associated Data +# +# CONFIG_CRYPTO_CCM is not set +# CONFIG_CRYPTO_GCM is not set +# CONFIG_CRYPTO_SEQIV is not set + +# +# Block modes +# +CONFIG_CRYPTO_CBC=y +# CONFIG_CRYPTO_CTR is not set +# CONFIG_CRYPTO_CTS is not set +CONFIG_CRYPTO_ECB=y +# CONFIG_CRYPTO_LRW is not set +# CONFIG_CRYPTO_PCBC is not set +# CONFIG_CRYPTO_XTS is not set + +# +# Hash modes +# +CONFIG_CRYPTO_HMAC=y +# CONFIG_CRYPTO_XCBC is not set + +# +# Digest +# +# CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_MD4 is not set +CONFIG_CRYPTO_MD5=y +# CONFIG_CRYPTO_MICHAEL_MIC is not set +# CONFIG_CRYPTO_RMD128 is not set +# CONFIG_CRYPTO_RMD160 is not set +# CONFIG_CRYPTO_RMD256 is not set +# CONFIG_CRYPTO_RMD320 is not set +CONFIG_CRYPTO_SHA1=m +# CONFIG_CRYPTO_SHA256 is not set +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_TGR192 is not set +# CONFIG_CRYPTO_WP512 is not set + +# +# Ciphers +# +CONFIG_CRYPTO_AES=m +# CONFIG_CRYPTO_ANUBIS is not set +# CONFIG_CRYPTO_ARC4 is not set +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_CAMELLIA is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +CONFIG_CRYPTO_DES=y +# CONFIG_CRYPTO_FCRYPT is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_SALSA20 is not set +# CONFIG_CRYPTO_SEED is not set +# CONFIG_CRYPTO_SERPENT is not set +# CONFIG_CRYPTO_TEA is not set +# CONFIG_CRYPTO_TWOFISH is not set + +# +# Compression +# +CONFIG_CRYPTO_DEFLATE=y +CONFIG_CRYPTO_LZO=y + +# +# Random Number Generation +# +# CONFIG_CRYPTO_ANSI_CPRNG is not set +CONFIG_CRYPTO_HW=y + +# +# Library routines +# +CONFIG_BITREVERSE=y +CONFIG_GENERIC_FIND_LAST_BIT=y +CONFIG_CRC_CCITT=m +CONFIG_CRC16=y +# CONFIG_CRC_T10DIF is not set +# CONFIG_CRC_ITU_T is not set +CONFIG_CRC32=y +# CONFIG_CRC7 is not set +# CONFIG_LIBCRC32C is not set +CONFIG_ZLIB_INFLATE=y +CONFIG_ZLIB_DEFLATE=y +CONFIG_LZO_COMPRESS=y +CONFIG_LZO_DECOMPRESS=y +CONFIG_PLIST=y +CONFIG_HAS_IOMEM=y +CONFIG_HAS_IOPORT=y +CONFIG_HAS_DMA=y -- cgit v0.10.2 From 7bd0f2f5fc5f51d351228488dfef5e33d28e8d0c Mon Sep 17 00:00:00 2001 From: dmitry pervushin Date: Mon, 27 Apr 2009 10:35:04 +0100 Subject: [ARM] 5483/1: Freescale STMP: add Kconfig/Makefile entries Added Kconfig/Makefile entries for STMP platform Signed-off-by: dmitry pervushin Signed-off-by: Russell King diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index a930e5c..d17c801 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -302,6 +302,19 @@ config ARCH_MXC help Support for Freescale MXC/iMX-based family of processors +config ARCH_STMP3XXX + bool "Freescale STMP3xxx" + select CPU_ARM926T + select HAVE_CLK + select COMMON_CLKDEV + select ARCH_REQUIRE_GPIOLIB + select GENERIC_TIME + select GENERIC_CLOCKEVENTS + select GENERIC_GPIO + select USB_ARCH_HAS_EHCI + help + Support for systems based on the Freescale 3xxx CPUs. + config ARCH_NETX bool "Hilscher NetX based" select CPU_ARM926T @@ -679,6 +692,8 @@ source "arch/arm/mach-s3c6400/Kconfig" source "arch/arm/mach-s3c6410/Kconfig" endif +source "arch/arm/plat-stmp3xxx/Kconfig" + source "arch/arm/mach-lh7a40x/Kconfig" source "arch/arm/mach-imx/Kconfig" diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 885a837..9a0af47 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -149,6 +149,8 @@ machine-$(CONFIG_ARCH_S3C24A0) := s3c24a0 machine-$(CONFIG_ARCH_S3C64XX) := s3c6400 s3c6410 machine-$(CONFIG_ARCH_SA1100) := sa1100 machine-$(CONFIG_ARCH_SHARK) := shark +machine-$(CONFIG_ARCH_STMP378X) := stmp378x +machine-$(CONFIG_ARCH_STMP37XX) := stmp37xx machine-$(CONFIG_ARCH_VERSATILE) := versatile machine-$(CONFIG_ARCH_W90X900) := w90x900 machine-$(CONFIG_FOOTBRIDGE) := footbridge @@ -162,6 +164,7 @@ plat-$(CONFIG_PLAT_ORION) := orion plat-$(CONFIG_PLAT_PXA) := pxa plat-$(CONFIG_PLAT_S3C24XX) := s3c24xx s3c plat-$(CONFIG_PLAT_S3C64XX) := s3c64xx s3c +plat-$(CONFIG_ARCH_STMP3XXX) := stmp3xxx ifeq ($(CONFIG_ARCH_EBSA110),y) # This is what happens if you forget the IOCS16 line. -- cgit v0.10.2 From 4a79ba34cada6a5a4ee86ed53aa8a73ba1e6fc51 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 22 Apr 2009 16:31:35 +0200 Subject: ALSA: hda - Add amp initialization for realtek auto mode In the realtek auto-probing mode, the initialization of amp with some magic COEF or EAPD verbs is applied only when the codec SSID has valid values to satisfy the realtek's definition. However, many devices don't provide in that way, thus the device doesn't work as is. This patch allows the same initialization code even if the SSID doesn't pass the bit test. Also, alc_subsystem_id() is changed just to check and define the type, so that it's called in the parser, instead of the initializer. Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 583603f..3a63063 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -253,6 +253,15 @@ enum { /* for GPIO Poll */ #define GPIO_MASK 0x03 +/* extra amp-initialization sequence types */ +enum { + ALC_INIT_NONE, + ALC_INIT_DEFAULT, + ALC_INIT_GPIO1, + ALC_INIT_GPIO2, + ALC_INIT_GPIO3, +}; + struct alc_spec { /* codec parameterization */ struct snd_kcontrol_new *mixers[5]; /* mixer arrays */ @@ -322,6 +331,7 @@ struct alc_spec { /* other flags */ unsigned int no_analog :1; /* digital I/O only */ + int init_amp; /* for virtual master */ hda_nid_t vmaster_nid; @@ -994,74 +1004,21 @@ static void alc888_coef_init(struct hda_codec *codec) AC_VERB_SET_PROC_COEF, 0x3030); } -/* 32-bit subsystem ID for BIOS loading in HD Audio codec. - * 31 ~ 16 : Manufacture ID - * 15 ~ 8 : SKU ID - * 7 ~ 0 : Assembly ID - * port-A --> pin 39/41, port-E --> pin 14/15, port-D --> pin 35/36 - */ -static void alc_subsystem_id(struct hda_codec *codec, - unsigned int porta, unsigned int porte, - unsigned int portd) +static void alc_auto_init_amp(struct hda_codec *codec, int type) { - unsigned int ass, tmp, i; - unsigned nid; - struct alc_spec *spec = codec->spec; - - ass = codec->subsystem_id & 0xffff; - if ((ass != codec->bus->pci->subsystem_device) && (ass & 1)) - goto do_sku; - - /* - * 31~30 : port conetcivity - * 29~21 : reserve - * 20 : PCBEEP input - * 19~16 : Check sum (15:1) - * 15~1 : Custom - * 0 : override - */ - nid = 0x1d; - if (codec->vendor_id == 0x10ec0260) - nid = 0x17; - ass = snd_hda_codec_get_pincfg(codec, nid); - snd_printd("realtek: No valid SSID, " - "checking pincfg 0x%08x for NID 0x%x\n", - ass, nid); - if (!(ass & 1) && !(ass & 0x100000)) - return; - if ((ass >> 30) != 1) /* no physical connection */ - return; + unsigned int tmp; - /* check sum */ - tmp = 0; - for (i = 1; i < 16; i++) { - if ((ass >> i) & 1) - tmp++; - } - if (((ass >> 16) & 0xf) != tmp) - return; -do_sku: - snd_printd("realtek: Enabling init ASM_ID=0x%04x CODEC_ID=%08x\n", - ass & 0xffff, codec->vendor_id); - /* - * 0 : override - * 1 : Swap Jack - * 2 : 0 --> Desktop, 1 --> Laptop - * 3~5 : External Amplifier control - * 7~6 : Reserved - */ - tmp = (ass & 0x38) >> 3; /* external Amp control */ - switch (tmp) { - case 1: + switch (type) { + case ALC_INIT_GPIO1: snd_hda_sequence_write(codec, alc_gpio1_init_verbs); break; - case 3: + case ALC_INIT_GPIO2: snd_hda_sequence_write(codec, alc_gpio2_init_verbs); break; - case 7: + case ALC_INIT_GPIO3: snd_hda_sequence_write(codec, alc_gpio3_init_verbs); break; - case 5: /* set EAPD output high */ + case ALC_INIT_DEFAULT: switch (codec->vendor_id) { case 0x10ec0260: snd_hda_codec_write(codec, 0x0f, 0, @@ -1115,7 +1072,7 @@ do_sku: tmp | 0x2010); break; case 0x10ec0888: - /*alc888_coef_init(codec);*/ /* called in alc_init() */ + alc888_coef_init(codec); break; case 0x10ec0267: case 0x10ec0268: @@ -1130,7 +1087,104 @@ do_sku: tmp | 0x3000); break; } - default: + break; + } +} + +static void alc_init_auto_hp(struct hda_codec *codec) +{ + struct alc_spec *spec = codec->spec; + + if (!spec->autocfg.hp_pins[0]) + return; + + if (!spec->autocfg.speaker_pins[0]) { + if (spec->autocfg.line_out_pins[0]) + spec->autocfg.speaker_pins[0] = + spec->autocfg.line_out_pins[0]; + else + return; + } + + snd_hda_codec_write_cache(codec, spec->autocfg.hp_pins[0], 0, + AC_VERB_SET_UNSOLICITED_ENABLE, + AC_USRSP_EN | ALC880_HP_EVENT); + spec->unsol_event = alc_sku_unsol_event; +} + +/* check subsystem ID and set up device-specific initialization; + * return 1 if initialized, 0 if invalid SSID + */ +/* 32-bit subsystem ID for BIOS loading in HD Audio codec. + * 31 ~ 16 : Manufacture ID + * 15 ~ 8 : SKU ID + * 7 ~ 0 : Assembly ID + * port-A --> pin 39/41, port-E --> pin 14/15, port-D --> pin 35/36 + */ +static int alc_subsystem_id(struct hda_codec *codec, + hda_nid_t porta, hda_nid_t porte, + hda_nid_t portd) +{ + unsigned int ass, tmp, i; + unsigned nid; + struct alc_spec *spec = codec->spec; + + ass = codec->subsystem_id & 0xffff; + if ((ass != codec->bus->pci->subsystem_device) && (ass & 1)) + goto do_sku; + + /* invalid SSID, check the special NID pin defcfg instead */ + /* + * 31~30 : port conetcivity + * 29~21 : reserve + * 20 : PCBEEP input + * 19~16 : Check sum (15:1) + * 15~1 : Custom + * 0 : override + */ + nid = 0x1d; + if (codec->vendor_id == 0x10ec0260) + nid = 0x17; + ass = snd_hda_codec_get_pincfg(codec, nid); + snd_printd("realtek: No valid SSID, " + "checking pincfg 0x%08x for NID 0x%x\n", + nid, ass); + if (!(ass & 1) && !(ass & 0x100000)) + return 0; + if ((ass >> 30) != 1) /* no physical connection */ + return 0; + + /* check sum */ + tmp = 0; + for (i = 1; i < 16; i++) { + if ((ass >> i) & 1) + tmp++; + } + if (((ass >> 16) & 0xf) != tmp) + return 0; +do_sku: + snd_printd("realtek: Enabling init ASM_ID=0x%04x CODEC_ID=%08x\n", + ass & 0xffff, codec->vendor_id); + /* + * 0 : override + * 1 : Swap Jack + * 2 : 0 --> Desktop, 1 --> Laptop + * 3~5 : External Amplifier control + * 7~6 : Reserved + */ + tmp = (ass & 0x38) >> 3; /* external Amp control */ + switch (tmp) { + case 1: + spec->init_amp = ALC_INIT_GPIO1; + break; + case 3: + spec->init_amp = ALC_INIT_GPIO2; + break; + case 7: + spec->init_amp = ALC_INIT_GPIO3; + break; + case 5: + spec->init_amp = ALC_INIT_DEFAULT; break; } @@ -1138,7 +1192,7 @@ do_sku: * when the external headphone out jack is plugged" */ if (!(ass & 0x8000)) - return; + return 1; /* * 10~8 : Jack location * 12~11: Headphone out -> 00: PortA, 01: PortE, 02: PortD, 03: Resvered @@ -1146,14 +1200,6 @@ do_sku: * 15 : 1 --> enable the function "Mute internal speaker * when the external headphone out jack is plugged" */ - if (!spec->autocfg.speaker_pins[0]) { - if (spec->autocfg.line_out_pins[0]) - spec->autocfg.speaker_pins[0] = - spec->autocfg.line_out_pins[0]; - else - return; - } - if (!spec->autocfg.hp_pins[0]) { tmp = (ass >> 11) & 0x3; /* HP to chassis */ if (tmp == 0) @@ -1163,23 +1209,23 @@ do_sku: else if (tmp == 2) spec->autocfg.hp_pins[0] = portd; else - return; + return 1; } - if (spec->autocfg.hp_pins[0]) - snd_hda_codec_write(codec, spec->autocfg.hp_pins[0], 0, - AC_VERB_SET_UNSOLICITED_ENABLE, - AC_USRSP_EN | ALC880_HP_EVENT); -#if 0 /* it's broken in some acses -- temporarily disabled */ - if (spec->autocfg.input_pins[AUTO_PIN_MIC] && - spec->autocfg.input_pins[AUTO_PIN_FRONT_MIC]) - snd_hda_codec_write(codec, - spec->autocfg.input_pins[AUTO_PIN_MIC], 0, - AC_VERB_SET_UNSOLICITED_ENABLE, - AC_USRSP_EN | ALC880_MIC_EVENT); -#endif /* disabled */ + alc_init_auto_hp(codec); + return 1; +} - spec->unsol_event = alc_sku_unsol_event; +static void alc_ssid_check(struct hda_codec *codec, + hda_nid_t porta, hda_nid_t porte, hda_nid_t portd) +{ + if (!alc_subsystem_id(codec, porta, porte, portd)) { + struct alc_spec *spec = codec->spec; + snd_printd("realtek: " + "Enable default setup for auto mode as fallback\n"); + spec->init_amp = ALC_INIT_DEFAULT; + alc_init_auto_hp(codec); + } } /* @@ -2923,8 +2969,7 @@ static int alc_init(struct hda_codec *codec) unsigned int i; alc_fix_pll(codec); - if (codec->vendor_id == 0x10ec0888) - alc888_coef_init(codec); + alc_auto_init_amp(codec, spec->init_amp); for (i = 0; i < spec->num_init_verbs; i++) snd_hda_sequence_write(codec, spec->init_verbs[i]); @@ -4198,7 +4243,6 @@ static void alc880_auto_init_multi_out(struct hda_codec *codec) struct alc_spec *spec = codec->spec; int i; - alc_subsystem_id(codec, 0x15, 0x1b, 0x14); for (i = 0; i < spec->autocfg.line_outs; i++) { hda_nid_t nid = spec->autocfg.line_out_pins[i]; int pin_type = get_pin_type(spec->autocfg.line_out_type); @@ -4303,6 +4347,8 @@ static int alc880_parse_auto_config(struct hda_codec *codec) spec->num_mux_defs = 1; spec->input_mux = &spec->private_imux[0]; + alc_ssid_check(codec, 0x15, 0x1b, 0x14); + return 1; } @@ -5678,7 +5724,6 @@ static void alc260_auto_init_multi_out(struct hda_codec *codec) struct alc_spec *spec = codec->spec; hda_nid_t nid; - alc_subsystem_id(codec, 0x10, 0x15, 0x0f); nid = spec->autocfg.line_out_pins[0]; if (nid) { int pin_type = get_pin_type(spec->autocfg.line_out_type); @@ -5788,6 +5833,8 @@ static int alc260_parse_auto_config(struct hda_codec *codec) spec->num_mux_defs = 1; spec->input_mux = &spec->private_imux[0]; + alc_ssid_check(codec, 0x10, 0x15, 0x0f); + return 1; } @@ -7013,7 +7060,6 @@ static void alc882_auto_init_multi_out(struct hda_codec *codec) struct alc_spec *spec = codec->spec; int i; - alc_subsystem_id(codec, 0x15, 0x1b, 0x14); for (i = 0; i <= HDA_SIDE; i++) { hda_nid_t nid = spec->autocfg.line_out_pins[i]; int pin_type = get_pin_type(spec->autocfg.line_out_type); @@ -9154,7 +9200,6 @@ static void alc883_auto_init_multi_out(struct hda_codec *codec) struct alc_spec *spec = codec->spec; int i; - alc_subsystem_id(codec, 0x15, 0x1b, 0x14); for (i = 0; i <= HDA_SIDE; i++) { hda_nid_t nid = spec->autocfg.line_out_pins[i]; int pin_type = get_pin_type(spec->autocfg.line_out_type); @@ -9317,6 +9362,7 @@ static int patch_alc883(struct hda_codec *codec) if (!spec->capsrc_nids) spec->capsrc_nids = alc883_capsrc_nids; spec->capture_style = CAPT_MIX; /* matrix-style capture */ + spec->init_amp = ALC_INIT_DEFAULT; /* always initialize */ break; case 0x10ec0889: spec->stream_name_analog = "ALC889 Analog"; @@ -10842,6 +10888,8 @@ static int alc262_parse_auto_config(struct hda_codec *codec) if (err < 0) return err; + alc_ssid_check(codec, 0x15, 0x14, 0x1b); + return 1; } @@ -13925,7 +13973,6 @@ static void alc861_auto_init_multi_out(struct hda_codec *codec) struct alc_spec *spec = codec->spec; int i; - alc_subsystem_id(codec, 0x0e, 0x0f, 0x0b); for (i = 0; i < spec->autocfg.line_outs; i++) { hda_nid_t nid = spec->autocfg.line_out_pins[i]; int pin_type = get_pin_type(spec->autocfg.line_out_type); @@ -14008,6 +14055,8 @@ static int alc861_parse_auto_config(struct hda_codec *codec) spec->num_adc_nids = ARRAY_SIZE(alc861_adc_nids); set_capture_mixer(spec); + alc_ssid_check(codec, 0x0e, 0x0f, 0x0b); + return 1; } @@ -14889,7 +14938,6 @@ static void alc861vd_auto_init_multi_out(struct hda_codec *codec) struct alc_spec *spec = codec->spec; int i; - alc_subsystem_id(codec, 0x15, 0x1b, 0x14); for (i = 0; i <= HDA_SIDE; i++) { hda_nid_t nid = spec->autocfg.line_out_pins[i]; int pin_type = get_pin_type(spec->autocfg.line_out_type); @@ -15107,6 +15155,8 @@ static int alc861vd_parse_auto_config(struct hda_codec *codec) if (err < 0) return err; + alc_ssid_check(codec, 0x15, 0x1b, 0x14); + return 1; } @@ -16931,7 +16981,6 @@ static void alc662_auto_init_multi_out(struct hda_codec *codec) struct alc_spec *spec = codec->spec; int i; - alc_subsystem_id(codec, 0x15, 0x1b, 0x14); for (i = 0; i <= HDA_SIDE; i++) { hda_nid_t nid = spec->autocfg.line_out_pins[i]; int pin_type = get_pin_type(spec->autocfg.line_out_type); @@ -17028,6 +17077,8 @@ static int alc662_parse_auto_config(struct hda_codec *codec) if (err < 0) return err; + alc_ssid_check(codec, 0x15, 0x1b, 0x14); + return 1; } -- cgit v0.10.2 From 4bc4d8998a472cd64aa66a4abad3d833be901028 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 27 Apr 2009 14:28:44 +0100 Subject: ASoC: Enforce symmetric rates for S3C64xx I2S interface There is only one LRCLK pin on each interface. Signed-off-by: Mark Brown diff --git a/sound/soc/s3c24xx/s3c64xx-i2s.c b/sound/soc/s3c24xx/s3c64xx-i2s.c index a84c4be..c335248 100644 --- a/sound/soc/s3c24xx/s3c64xx-i2s.c +++ b/sound/soc/s3c24xx/s3c64xx-i2s.c @@ -171,6 +171,7 @@ struct snd_soc_dai s3c64xx_i2s_dai[] = { .formats = S3C64XX_I2S_FMTS, }, .ops = &s3c64xx_i2s_dai_ops, + .symmetric_rates = 1, }, { .name = "s3c64xx-i2s", @@ -189,6 +190,7 @@ struct snd_soc_dai s3c64xx_i2s_dai[] = { .formats = S3C64XX_I2S_FMTS, }, .ops = &s3c64xx_i2s_dai_ops, + .symmetric_rates = 1, }, }; EXPORT_SYMBOL_GPL(s3c64xx_i2s_dai); -- cgit v0.10.2 From 008db442efa542357314593c71ab9966be909855 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 27 Apr 2009 19:17:08 +0100 Subject: ASoC: Include WM8350 register definitions in CODEC header It's expected behaviour for the CODEC header to provide them but the WM8350 doesn't due to having all the registers together under drivers/mfd. Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/wm8350.h b/sound/soc/codecs/wm8350.h index d11bd92..d088eb4 100644 --- a/sound/soc/codecs/wm8350.h +++ b/sound/soc/codecs/wm8350.h @@ -13,6 +13,7 @@ #define _WM8350_H #include +#include extern struct snd_soc_dai wm8350_dai; extern struct snd_soc_codec_device soc_codec_dev_wm8350; -- cgit v0.10.2 From 5c556a6e190897a0f1ff14e13722591828412031 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 27 Apr 2009 20:23:19 +0100 Subject: ASoC: s3c-i2s-v2 diagnostic improvements Say what invalid values we're seeing when we see an invalid value and ensure that errors are displayed by default. Signed-off-by: Mark Brown diff --git a/sound/soc/s3c24xx/s3c-i2s-v2.c b/sound/soc/s3c24xx/s3c-i2s-v2.c index ab680aa..aeea49c 100644 --- a/sound/soc/s3c24xx/s3c-i2s-v2.c +++ b/sound/soc/s3c24xx/s3c-i2s-v2.c @@ -105,7 +105,9 @@ void s3c2412_snd_txctrl(struct s3c_i2sv2_info *i2s, int on) break; default: - dev_err(i2s->dev, "TXEN: Invalid MODE in IISMOD\n"); + dev_err(i2s->dev, "TXEN: Invalid MODE %x in IISMOD\n", + mod & S3C2412_IISMOD_MODE_MASK); + break; } writel(con, regs + S3C2412_IISCON); @@ -132,7 +134,9 @@ void s3c2412_snd_txctrl(struct s3c_i2sv2_info *i2s, int on) break; default: - dev_err(i2s->dev, "TXDIS: Invalid MODE in IISMOD\n"); + dev_err(i2s->dev, "TXDIS: Invalid MODE %xin IISMOD\n", + mod & S3C2412_IISMOD_MODE_MASK); + break; } writel(mod, regs + S3C2412_IISMOD); @@ -175,7 +179,8 @@ void s3c2412_snd_rxctrl(struct s3c_i2sv2_info *i2s, int on) break; default: - dev_err(i2s->dev, "RXEN: Invalid MODE in IISMOD\n"); + dev_err(i2s->dev, "RXEN: Invalid MODE %x in IISMOD\n", + mod & S3C2412_IISMOD_MODE_MASK); } writel(mod, regs + S3C2412_IISMOD); @@ -199,7 +204,8 @@ void s3c2412_snd_rxctrl(struct s3c_i2sv2_info *i2s, int on) break; default: - dev_err(i2s->dev, "RXEN: Invalid MODE in IISMOD\n"); + dev_err(i2s->dev, "RXEN: Invalid MODE %x in IISMOD\n", + mod & S3C2412_IISMOD_MODE_MASK); } writel(con, regs + S3C2412_IISCON); @@ -281,7 +287,7 @@ static int s3c2412_i2s_set_fmt(struct snd_soc_dai *cpu_dai, iismod |= IISMOD_MASTER; break; default: - pr_debug("unknwon master/slave format\n"); + pr_err("unknwon master/slave format\n"); return -EINVAL; } @@ -298,7 +304,7 @@ static int s3c2412_i2s_set_fmt(struct snd_soc_dai *cpu_dai, iismod |= S3C2412_IISMOD_SDF_IIS; break; default: - pr_debug("Unknown data format\n"); + pr_err("Unknown data format\n"); return -EINVAL; } -- cgit v0.10.2 From a7be4d92d989fc53d840d24cba2ebea9e5ad8480 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 27 Apr 2009 20:24:15 +0100 Subject: ASoC: Use our registration function for S3C64xx Make sure we get the DAI operations initialised. Signed-off-by: Mark Brown diff --git a/sound/soc/s3c24xx/s3c-i2s-v2.c b/sound/soc/s3c24xx/s3c-i2s-v2.c index aeea49c..ab680aa 100644 --- a/sound/soc/s3c24xx/s3c-i2s-v2.c +++ b/sound/soc/s3c24xx/s3c-i2s-v2.c @@ -105,9 +105,7 @@ void s3c2412_snd_txctrl(struct s3c_i2sv2_info *i2s, int on) break; default: - dev_err(i2s->dev, "TXEN: Invalid MODE %x in IISMOD\n", - mod & S3C2412_IISMOD_MODE_MASK); - break; + dev_err(i2s->dev, "TXEN: Invalid MODE in IISMOD\n"); } writel(con, regs + S3C2412_IISCON); @@ -134,9 +132,7 @@ void s3c2412_snd_txctrl(struct s3c_i2sv2_info *i2s, int on) break; default: - dev_err(i2s->dev, "TXDIS: Invalid MODE %xin IISMOD\n", - mod & S3C2412_IISMOD_MODE_MASK); - break; + dev_err(i2s->dev, "TXDIS: Invalid MODE in IISMOD\n"); } writel(mod, regs + S3C2412_IISMOD); @@ -179,8 +175,7 @@ void s3c2412_snd_rxctrl(struct s3c_i2sv2_info *i2s, int on) break; default: - dev_err(i2s->dev, "RXEN: Invalid MODE %x in IISMOD\n", - mod & S3C2412_IISMOD_MODE_MASK); + dev_err(i2s->dev, "RXEN: Invalid MODE in IISMOD\n"); } writel(mod, regs + S3C2412_IISMOD); @@ -204,8 +199,7 @@ void s3c2412_snd_rxctrl(struct s3c_i2sv2_info *i2s, int on) break; default: - dev_err(i2s->dev, "RXEN: Invalid MODE %x in IISMOD\n", - mod & S3C2412_IISMOD_MODE_MASK); + dev_err(i2s->dev, "RXEN: Invalid MODE in IISMOD\n"); } writel(con, regs + S3C2412_IISCON); @@ -287,7 +281,7 @@ static int s3c2412_i2s_set_fmt(struct snd_soc_dai *cpu_dai, iismod |= IISMOD_MASTER; break; default: - pr_err("unknwon master/slave format\n"); + pr_debug("unknwon master/slave format\n"); return -EINVAL; } @@ -304,7 +298,7 @@ static int s3c2412_i2s_set_fmt(struct snd_soc_dai *cpu_dai, iismod |= S3C2412_IISMOD_SDF_IIS; break; default: - pr_err("Unknown data format\n"); + pr_debug("Unknown data format\n"); return -EINVAL; } diff --git a/sound/soc/s3c24xx/s3c64xx-i2s.c b/sound/soc/s3c24xx/s3c64xx-i2s.c index c335248..1345fbd 100644 --- a/sound/soc/s3c24xx/s3c64xx-i2s.c +++ b/sound/soc/s3c24xx/s3c64xx-i2s.c @@ -225,7 +225,7 @@ static __devinit int s3c64xx_iis_dev_probe(struct platform_device *pdev) if (ret) goto err_clk; - ret = snd_soc_register_dai(dai); + ret = s3c_i2sv2_register_dai(dai); if (ret != 0) goto err_i2sv2; -- cgit v0.10.2 From 0b5e92c5e020ee7437fa5304a8451d6dd08d1a26 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Mon, 27 Apr 2009 13:49:44 +0000 Subject: ASoC WM8940 Driver Signed-off-by: Jonathan Cameron Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index 121d63f..1c19ad5 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -35,6 +35,7 @@ config SND_SOC_ALL_CODECS select SND_SOC_WM8753 if SND_SOC_I2C_AND_SPI select SND_SOC_WM8900 if I2C select SND_SOC_WM8903 if I2C + select SND_SOC_WM8940 if I2C select SND_SOC_WM8960 if I2C select SND_SOC_WM8971 if I2C select SND_SOC_WM8988 if SND_SOC_I2C_AND_SPI @@ -140,6 +141,9 @@ config SND_SOC_WM8900 config SND_SOC_WM8903 tristate +config SND_SOC_WM8940 + tristate + config SND_SOC_WM8960 tristate diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile index 8116968..3d31b6b 100644 --- a/sound/soc/codecs/Makefile +++ b/sound/soc/codecs/Makefile @@ -23,6 +23,7 @@ snd-soc-wm8750-objs := wm8750.o snd-soc-wm8753-objs := wm8753.o snd-soc-wm8900-objs := wm8900.o snd-soc-wm8903-objs := wm8903.o +snd-soc-wm8940-objs := wm8940.o snd-soc-wm8960-objs := wm8960.o snd-soc-wm8971-objs := wm8971.o snd-soc-wm8988-objs := wm8988.o @@ -57,6 +58,7 @@ obj-$(CONFIG_SND_SOC_WM8753) += snd-soc-wm8753.o obj-$(CONFIG_SND_SOC_WM8900) += snd-soc-wm8900.o obj-$(CONFIG_SND_SOC_WM8903) += snd-soc-wm8903.o obj-$(CONFIG_SND_SOC_WM8971) += snd-soc-wm8971.o +obj-$(CONFIG_SND_SOC_WM8940) += snd-soc-wm8940.o obj-$(CONFIG_SND_SOC_WM8960) += snd-soc-wm8960.o obj-$(CONFIG_SND_SOC_WM8988) += snd-soc-wm8988.o obj-$(CONFIG_SND_SOC_WM8990) += snd-soc-wm8990.o diff --git a/sound/soc/codecs/wm8940.c b/sound/soc/codecs/wm8940.c new file mode 100644 index 0000000..26987dc --- /dev/null +++ b/sound/soc/codecs/wm8940.c @@ -0,0 +1,955 @@ +/* + * wm8940.c -- WM8940 ALSA Soc Audio driver + * + * Author: Jonathan Cameron + * + * Based on wm8510.c + * Copyright 2006 Wolfson Microelectronics PLC. + * Author: Liam Girdwood + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Not currently handled: + * Notch filter control + * AUXMode (inverting vs mixer) + * No means to obtain current gain if alc enabled. + * No use made of gpio + * Fast VMID discharge for power down + * Soft Start + * DLR and ALR Swaps not enabled + * Digital Sidetone not supported + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "wm8940.h" + +struct wm8940_priv { + unsigned int sysclk; + u16 reg_cache[WM8940_CACHEREGNUM]; + struct snd_soc_codec codec; +}; + +static u16 wm8940_reg_defaults[] = { + 0x8940, /* Soft Reset */ + 0x0000, /* Power 1 */ + 0x0000, /* Power 2 */ + 0x0000, /* Power 3 */ + 0x0010, /* Interface Control */ + 0x0000, /* Companding Control */ + 0x0140, /* Clock Control */ + 0x0000, /* Additional Controls */ + 0x0000, /* GPIO Control */ + 0x0002, /* Auto Increment Control */ + 0x0000, /* DAC Control */ + 0x00FF, /* DAC Volume */ + 0, + 0, + 0x0100, /* ADC Control */ + 0x00FF, /* ADC Volume */ + 0x0000, /* Notch Filter 1 Control 1 */ + 0x0000, /* Notch Filter 1 Control 2 */ + 0x0000, /* Notch Filter 2 Control 1 */ + 0x0000, /* Notch Filter 2 Control 2 */ + 0x0000, /* Notch Filter 3 Control 1 */ + 0x0000, /* Notch Filter 3 Control 2 */ + 0x0000, /* Notch Filter 4 Control 1 */ + 0x0000, /* Notch Filter 4 Control 2 */ + 0x0032, /* DAC Limit Control 1 */ + 0x0000, /* DAC Limit Control 2 */ + 0, + 0, + 0, + 0, + 0, + 0, + 0x0038, /* ALC Control 1 */ + 0x000B, /* ALC Control 2 */ + 0x0032, /* ALC Control 3 */ + 0x0000, /* Noise Gate */ + 0x0041, /* PLLN */ + 0x000C, /* PLLK1 */ + 0x0093, /* PLLK2 */ + 0x00E9, /* PLLK3 */ + 0, + 0, + 0x0030, /* ALC Control 4 */ + 0, + 0x0002, /* Input Control */ + 0x0050, /* PGA Gain */ + 0, + 0x0002, /* ADC Boost Control */ + 0, + 0x0002, /* Output Control */ + 0x0000, /* Speaker Mixer Control */ + 0, + 0, + 0, + 0x0079, /* Speaker Volume */ + 0, + 0x0000, /* Mono Mixer Control */ +}; + +static inline unsigned int wm8940_read_reg_cache(struct snd_soc_codec *codec, + unsigned int reg) +{ + u16 *cache = codec->reg_cache; + + if (reg >= ARRAY_SIZE(wm8940_reg_defaults)) + return -1; + + return cache[reg]; +} + +static inline int wm8940_write_reg_cache(struct snd_soc_codec *codec, + u16 reg, unsigned int value) +{ + u16 *cache = codec->reg_cache; + + if (reg >= ARRAY_SIZE(wm8940_reg_defaults)) + return -1; + + cache[reg] = value; + + return 0; +} + +static int wm8940_write(struct snd_soc_codec *codec, unsigned int reg, + unsigned int value) +{ + int ret; + u8 data[3] = { reg, + (value & 0xff00) >> 8, + (value & 0x00ff) + }; + + wm8940_write_reg_cache(codec, reg, value); + + ret = codec->hw_write(codec->control_data, data, 3); + + if (ret < 0) + return ret; + else if (ret != 3) + return -EIO; + return 0; +} + +static const char *wm8940_companding[] = { "Off", "NC", "u-law", "A-law" }; +static const struct soc_enum wm8940_adc_companding_enum += SOC_ENUM_SINGLE(WM8940_COMPANDINGCTL, 1, 4, wm8940_companding); +static const struct soc_enum wm8940_dac_companding_enum += SOC_ENUM_SINGLE(WM8940_COMPANDINGCTL, 3, 4, wm8940_companding); + +static const char *wm8940_alc_mode_text[] = {"ALC", "Limiter"}; +static const struct soc_enum wm8940_alc_mode_enum += SOC_ENUM_SINGLE(WM8940_ALC3, 8, 2, wm8940_alc_mode_text); + +static const char *wm8940_mic_bias_level_text[] = {"0.9", "0.65"}; +static const struct soc_enum wm8940_mic_bias_level_enum += SOC_ENUM_SINGLE(WM8940_INPUTCTL, 8, 2, wm8940_mic_bias_level_text); + +static const char *wm8940_filter_mode_text[] = {"Audio", "Application"}; +static const struct soc_enum wm8940_filter_mode_enum += SOC_ENUM_SINGLE(WM8940_ADC, 7, 2, wm8940_filter_mode_text); + +DECLARE_TLV_DB_SCALE(wm8940_spk_vol_tlv, -5700, 100, 1); +DECLARE_TLV_DB_SCALE(wm8940_att_tlv, -1000, 1000, 0); +DECLARE_TLV_DB_SCALE(wm8940_pga_vol_tlv, -1200, 75, 0); +DECLARE_TLV_DB_SCALE(wm8940_alc_min_tlv, -1200, 600, 0); +DECLARE_TLV_DB_SCALE(wm8940_alc_max_tlv, 675, 600, 0); +DECLARE_TLV_DB_SCALE(wm8940_alc_tar_tlv, -2250, 50, 0); +DECLARE_TLV_DB_SCALE(wm8940_lim_boost_tlv, 0, 100, 0); +DECLARE_TLV_DB_SCALE(wm8940_lim_thresh_tlv, -600, 100, 0); +DECLARE_TLV_DB_SCALE(wm8940_adc_tlv, -12750, 50, 1); +DECLARE_TLV_DB_SCALE(wm8940_capture_boost_vol_tlv, 0, 2000, 0); + +static const struct snd_kcontrol_new wm8940_snd_controls[] = { + SOC_SINGLE("Digital Loopback Switch", WM8940_COMPANDINGCTL, + 6, 1, 0), + SOC_ENUM("DAC Companding", wm8940_dac_companding_enum), + SOC_ENUM("ADC Companding", wm8940_adc_companding_enum), + + SOC_ENUM("ALC Mode", wm8940_alc_mode_enum), + SOC_SINGLE("ALC Switch", WM8940_ALC1, 8, 1, 0), + SOC_SINGLE_TLV("ALC Capture Max Gain", WM8940_ALC1, + 3, 7, 1, wm8940_alc_max_tlv), + SOC_SINGLE_TLV("ALC Capture Min Gain", WM8940_ALC1, + 0, 7, 0, wm8940_alc_min_tlv), + SOC_SINGLE_TLV("ALC Capture Target", WM8940_ALC2, + 0, 14, 0, wm8940_alc_tar_tlv), + SOC_SINGLE("ALC Capture Hold", WM8940_ALC2, 4, 10, 0), + SOC_SINGLE("ALC Capture Decay", WM8940_ALC3, 4, 10, 0), + SOC_SINGLE("ALC Capture Attach", WM8940_ALC3, 0, 10, 0), + SOC_SINGLE("ALC ZC Switch", WM8940_ALC4, 1, 1, 0), + SOC_SINGLE("ALC Capture Noise Gate Switch", WM8940_NOISEGATE, + 3, 1, 0), + SOC_SINGLE("ALC Capture Noise Gate Threshold", WM8940_NOISEGATE, + 0, 7, 0), + + SOC_SINGLE("DAC Playback Limiter Switch", WM8940_DACLIM1, 8, 1, 0), + SOC_SINGLE("DAC Playback Limiter Attack", WM8940_DACLIM1, 0, 9, 0), + SOC_SINGLE("DAC Playback Limiter Decay", WM8940_DACLIM1, 4, 11, 0), + SOC_SINGLE_TLV("DAC Playback Limiter Threshold", WM8940_DACLIM2, + 4, 9, 1, wm8940_lim_thresh_tlv), + SOC_SINGLE_TLV("DAC Playback Limiter Boost", WM8940_DACLIM2, + 0, 12, 0, wm8940_lim_boost_tlv), + + SOC_SINGLE("Capture PGA ZC Switch", WM8940_PGAGAIN, 7, 1, 0), + SOC_SINGLE_TLV("Capture PGA Volume", WM8940_PGAGAIN, + 0, 63, 0, wm8940_pga_vol_tlv), + SOC_SINGLE_TLV("Digital Playback Volume", WM8940_DACVOL, + 0, 255, 0, wm8940_adc_tlv), + SOC_SINGLE_TLV("Digital Capture Volume", WM8940_ADCVOL, + 0, 255, 0, wm8940_adc_tlv), + SOC_ENUM("Mic Bias Level", wm8940_mic_bias_level_enum), + SOC_SINGLE_TLV("Capture Boost Volue", WM8940_ADCBOOST, + 8, 1, 0, wm8940_capture_boost_vol_tlv), + SOC_SINGLE_TLV("Speaker Playback Volume", WM8940_SPKVOL, + 0, 63, 0, wm8940_spk_vol_tlv), + SOC_SINGLE("Speaker Playback Switch", WM8940_SPKVOL, 6, 1, 1), + + SOC_SINGLE_TLV("Speaker Mixer Line Bypass Volume", WM8940_SPKVOL, + 8, 1, 1, wm8940_att_tlv), + SOC_SINGLE("Speaker Playback ZC Switch", WM8940_SPKVOL, 7, 1, 0), + + SOC_SINGLE("Mono Out Switch", WM8940_MONOMIX, 6, 1, 1), + SOC_SINGLE_TLV("Mono Mixer Line Bypass Volume", WM8940_MONOMIX, + 7, 1, 1, wm8940_att_tlv), + + SOC_SINGLE("High Pass Filter Switch", WM8940_ADC, 8, 1, 0), + SOC_ENUM("High Pass Filter Mode", wm8940_filter_mode_enum), + SOC_SINGLE("High Pass Filter Cut Off", WM8940_ADC, 4, 7, 0), + SOC_SINGLE("ADC Inversion Switch", WM8940_ADC, 0, 1, 0), + SOC_SINGLE("DAC Inversion Switch", WM8940_DAC, 0, 1, 0), + SOC_SINGLE("DAC Auto Mute Switch", WM8940_DAC, 2, 1, 0), + SOC_SINGLE("ZC Timeout Clock Switch", WM8940_ADDCNTRL, 0, 1, 0), +}; + +static const struct snd_kcontrol_new wm8940_speaker_mixer_controls[] = { + SOC_DAPM_SINGLE("Line Bypass Switch", WM8940_SPKMIX, 1, 1, 0), + SOC_DAPM_SINGLE("Aux Playback Switch", WM8940_SPKMIX, 5, 1, 0), + SOC_DAPM_SINGLE("PCM Playback Switch", WM8940_SPKMIX, 0, 1, 0), +}; + +static const struct snd_kcontrol_new wm8940_mono_mixer_controls[] = { + SOC_DAPM_SINGLE("Line Bypass Switch", WM8940_MONOMIX, 1, 1, 0), + SOC_DAPM_SINGLE("Aux Playback Switch", WM8940_MONOMIX, 2, 1, 0), + SOC_DAPM_SINGLE("PCM Playback Switch", WM8940_MONOMIX, 0, 1, 0), +}; + +DECLARE_TLV_DB_SCALE(wm8940_boost_vol_tlv, -1500, 300, 1); +static const struct snd_kcontrol_new wm8940_input_boost_controls[] = { + SOC_DAPM_SINGLE("Mic PGA Switch", WM8940_PGAGAIN, 6, 1, 1), + SOC_DAPM_SINGLE_TLV("Aux Volume", WM8940_ADCBOOST, + 0, 7, 0, wm8940_boost_vol_tlv), + SOC_DAPM_SINGLE_TLV("Mic Volume", WM8940_ADCBOOST, + 4, 7, 0, wm8940_boost_vol_tlv), +}; + +static const struct snd_kcontrol_new wm8940_micpga_controls[] = { + SOC_DAPM_SINGLE("AUX Switch", WM8940_INPUTCTL, 2, 1, 0), + SOC_DAPM_SINGLE("MICP Switch", WM8940_INPUTCTL, 0, 1, 0), + SOC_DAPM_SINGLE("MICN Switch", WM8940_INPUTCTL, 1, 1, 0), +}; + +static const struct snd_soc_dapm_widget wm8940_dapm_widgets[] = { + SND_SOC_DAPM_MIXER("Speaker Mixer", WM8940_POWER3, 2, 0, + &wm8940_speaker_mixer_controls[0], + ARRAY_SIZE(wm8940_speaker_mixer_controls)), + SND_SOC_DAPM_MIXER("Mono Mixer", WM8940_POWER3, 3, 0, + &wm8940_mono_mixer_controls[0], + ARRAY_SIZE(wm8940_mono_mixer_controls)), + SND_SOC_DAPM_DAC("DAC", "HiFi Playback", WM8940_POWER3, 0, 0), + + SND_SOC_DAPM_PGA("SpkN Out", WM8940_POWER3, 5, 0, NULL, 0), + SND_SOC_DAPM_PGA("SpkP Out", WM8940_POWER3, 6, 0, NULL, 0), + SND_SOC_DAPM_PGA("Mono Out", WM8940_POWER3, 7, 0, NULL, 0), + SND_SOC_DAPM_OUTPUT("MONOOUT"), + SND_SOC_DAPM_OUTPUT("SPKOUTP"), + SND_SOC_DAPM_OUTPUT("SPKOUTN"), + + SND_SOC_DAPM_PGA("Aux Input", WM8940_POWER1, 6, 0, NULL, 0), + SND_SOC_DAPM_ADC("ADC", "HiFi Capture", WM8940_POWER2, 0, 0), + SND_SOC_DAPM_MIXER("Mic PGA", WM8940_POWER2, 2, 0, + &wm8940_micpga_controls[0], + ARRAY_SIZE(wm8940_micpga_controls)), + SND_SOC_DAPM_MIXER("Boost Mixer", WM8940_POWER2, 4, 0, + &wm8940_input_boost_controls[0], + ARRAY_SIZE(wm8940_input_boost_controls)), + SND_SOC_DAPM_MICBIAS("Mic Bias", WM8940_POWER1, 4, 0), + + SND_SOC_DAPM_INPUT("MICN"), + SND_SOC_DAPM_INPUT("MICP"), + SND_SOC_DAPM_INPUT("AUX"), +}; + +static const struct snd_soc_dapm_route audio_map[] = { + /* Mono output mixer */ + {"Mono Mixer", "PCM Playback Switch", "DAC"}, + {"Mono Mixer", "Aux Playback Switch", "Aux Input"}, + {"Mono Mixer", "Line Bypass Switch", "Boost Mixer"}, + + /* Speaker output mixer */ + {"Speaker Mixer", "PCM Playback Switch", "DAC"}, + {"Speaker Mixer", "Aux Playback Switch", "Aux Input"}, + {"Speaker Mixer", "Line Bypass Switch", "Boost Mixer"}, + + /* Outputs */ + {"Mono Out", NULL, "Mono Mixer"}, + {"MONOOUT", NULL, "Mono Out"}, + {"SpkN Out", NULL, "Speaker Mixer"}, + {"SpkP Out", NULL, "Speaker Mixer"}, + {"SPKOUTN", NULL, "SpkN Out"}, + {"SPKOUTP", NULL, "SpkP Out"}, + + /* Microphone PGA */ + {"Mic PGA", "MICN Switch", "MICN"}, + {"Mic PGA", "MICP Switch", "MICP"}, + {"Mic PGA", "AUX Switch", "AUX"}, + + /* Boost Mixer */ + {"Boost Mixer", "Mic PGA Switch", "Mic PGA"}, + {"Boost Mixer", "Mic Volume", "MICP"}, + {"Boost Mixer", "Aux Volume", "Aux Input"}, + + {"ADC", NULL, "Boost Mixer"}, +}; + +static int wm8940_add_widgets(struct snd_soc_codec *codec) +{ + int ret; + + ret = snd_soc_dapm_new_controls(codec, wm8940_dapm_widgets, + ARRAY_SIZE(wm8940_dapm_widgets)); + if (ret) + goto error_ret; + ret = snd_soc_dapm_add_routes(codec, audio_map, ARRAY_SIZE(audio_map)); + if (ret) + goto error_ret; + ret = snd_soc_dapm_new_widgets(codec); + +error_ret: + return ret; +} + +#define wm8940_reset(c) wm8940_write(c, WM8940_SOFTRESET, 0); + +static int wm8940_set_dai_fmt(struct snd_soc_dai *codec_dai, + unsigned int fmt) +{ + struct snd_soc_codec *codec = codec_dai->codec; + u16 iface = wm8940_read_reg_cache(codec, WM8940_IFACE) & 0xFE67; + u16 clk = wm8940_read_reg_cache(codec, WM8940_CLOCK) & 0x1fe; + + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: + clk |= 1; + break; + case SND_SOC_DAIFMT_CBS_CFS: + break; + default: + return -EINVAL; + } + wm8940_write(codec, WM8940_CLOCK, clk); + + switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { + case SND_SOC_DAIFMT_I2S: + iface |= (2 << 3); + break; + case SND_SOC_DAIFMT_LEFT_J: + iface |= (1 << 3); + break; + case SND_SOC_DAIFMT_RIGHT_J: + break; + case SND_SOC_DAIFMT_DSP_A: + iface |= (3 << 3); + break; + case SND_SOC_DAIFMT_DSP_B: + iface |= (3 << 3) | (1 << 7); + break; + } + + switch (fmt & SND_SOC_DAIFMT_INV_MASK) { + case SND_SOC_DAIFMT_NB_NF: + break; + case SND_SOC_DAIFMT_NB_IF: + iface |= (1 << 7); + break; + case SND_SOC_DAIFMT_IB_NF: + iface |= (1 << 8); + break; + case SND_SOC_DAIFMT_IB_IF: + iface |= (1 << 8) | (1 << 7); + break; + } + + wm8940_write(codec, WM8940_IFACE, iface); + + return 0; +} + +static int wm8940_i2s_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct snd_soc_device *socdev = rtd->socdev; + struct snd_soc_codec *codec = socdev->card->codec; + u16 iface = wm8940_read_reg_cache(codec, WM8940_IFACE) & 0xFD9F; + u16 addcntrl = wm8940_read_reg_cache(codec, WM8940_ADDCNTRL) & 0xFFF1; + u16 companding = wm8940_read_reg_cache(codec, + WM8940_COMPANDINGCTL) & 0xFFDF; + int ret; + + /* LoutR control */ + if (substream->stream == SNDRV_PCM_STREAM_CAPTURE + && params_channels(params) == 2) + iface |= (1 << 9); + + switch (params_rate(params)) { + case SNDRV_PCM_RATE_8000: + addcntrl |= (0x5 << 1); + break; + case SNDRV_PCM_RATE_11025: + addcntrl |= (0x4 << 1); + break; + case SNDRV_PCM_RATE_16000: + addcntrl |= (0x3 << 1); + break; + case SNDRV_PCM_RATE_22050: + addcntrl |= (0x2 << 1); + break; + case SNDRV_PCM_RATE_32000: + addcntrl |= (0x1 << 1); + break; + case SNDRV_PCM_RATE_44100: + case SNDRV_PCM_RATE_48000: + break; + } + ret = wm8940_write(codec, WM8940_ADDCNTRL, addcntrl); + if (ret) + goto error_ret; + + switch (params_format(params)) { + case SNDRV_PCM_FORMAT_S8: + companding = companding | (1 << 5); + break; + case SNDRV_PCM_FORMAT_S16_LE: + break; + case SNDRV_PCM_FORMAT_S20_3LE: + iface |= (1 << 5); + break; + case SNDRV_PCM_FORMAT_S24_LE: + iface |= (2 << 5); + break; + case SNDRV_PCM_FORMAT_S32_LE: + iface |= (3 << 5); + break; + } + ret = wm8940_write(codec, WM8940_COMPANDINGCTL, companding); + if (ret) + goto error_ret; + ret = wm8940_write(codec, WM8940_IFACE, iface); + +error_ret: + return ret; +} + +static int wm8940_mute(struct snd_soc_dai *dai, int mute) +{ + struct snd_soc_codec *codec = dai->codec; + u16 mute_reg = wm8940_read_reg_cache(codec, WM8940_DAC) & 0xffbf; + + if (mute) + mute_reg |= 0x40; + + return wm8940_write(codec, WM8940_DAC, mute_reg); +} + +static int wm8940_set_bias_level(struct snd_soc_codec *codec, + enum snd_soc_bias_level level) +{ + u16 val; + u16 pwr_reg = wm8940_read_reg_cache(codec, WM8940_POWER1) & 0x1F0; + int ret = 0; + + switch (level) { + case SND_SOC_BIAS_ON: + /* ensure bufioen and biasen */ + pwr_reg |= (1 << 2) | (1 << 3); + /* Enable thermal shutdown */ + val = wm8940_read_reg_cache(codec, WM8940_OUTPUTCTL); + ret = wm8940_write(codec, WM8940_OUTPUTCTL, val | 0x2); + if (ret) + break; + /* set vmid to 75k */ + ret = wm8940_write(codec, WM8940_POWER1, pwr_reg | 0x1); + break; + case SND_SOC_BIAS_PREPARE: + /* ensure bufioen and biasen */ + pwr_reg |= (1 << 2) | (1 << 3); + ret = wm8940_write(codec, WM8940_POWER1, pwr_reg | 0x1); + break; + case SND_SOC_BIAS_STANDBY: + /* ensure bufioen and biasen */ + pwr_reg |= (1 << 2) | (1 << 3); + /* set vmid to 300k for standby */ + ret = wm8940_write(codec, WM8940_POWER1, pwr_reg | 0x2); + break; + case SND_SOC_BIAS_OFF: + ret = wm8940_write(codec, WM8940_POWER1, pwr_reg); + break; + } + + return ret; +} + +struct pll_ { + unsigned int pre_scale:2; + unsigned int n:4; + unsigned int k; +}; + +static struct pll_ pll_div; + +/* The size in bits of the pll divide multiplied by 10 + * to allow rounding later */ +#define FIXED_PLL_SIZE ((1 << 24) * 10) +static void pll_factors(unsigned int target, unsigned int source) +{ + unsigned long long Kpart; + unsigned int K, Ndiv, Nmod; + /* The left shift ist to avoid accuracy loss when right shifting */ + Ndiv = target / source; + + if (Ndiv > 12) { + source <<= 1; + /* Multiply by 2 */ + pll_div.pre_scale = 0; + Ndiv = target / source; + } else if (Ndiv < 3) { + source >>= 2; + /* Divide by 4 */ + pll_div.pre_scale = 3; + Ndiv = target / source; + } else if (Ndiv < 6) { + source >>= 1; + /* divide by 2 */ + pll_div.pre_scale = 2; + Ndiv = target / source; + } else + pll_div.pre_scale = 1; + + if ((Ndiv < 6) || (Ndiv > 12)) + printk(KERN_WARNING + "WM8940 N value %d outwith recommended range!d\n", + Ndiv); + + pll_div.n = Ndiv; + Nmod = target % source; + Kpart = FIXED_PLL_SIZE * (long long)Nmod; + + do_div(Kpart, source); + + K = Kpart & 0xFFFFFFFF; + + /* Check if we need to round */ + if ((K % 10) >= 5) + K += 5; + + /* Move down to proper range now rounding is done */ + K /= 10; + + pll_div.k = K; +} + +/* Untested at the moment */ +static int wm8940_set_dai_pll(struct snd_soc_dai *codec_dai, + int pll_id, unsigned int freq_in, unsigned int freq_out) +{ + struct snd_soc_codec *codec = codec_dai->codec; + u16 reg; + + /* Turn off PLL */ + reg = wm8940_read_reg_cache(codec, WM8940_POWER1); + wm8940_write(codec, WM8940_POWER1, reg & 0x1df); + + if (freq_in == 0 || freq_out == 0) { + /* Clock CODEC directly from MCLK */ + reg = wm8940_read_reg_cache(codec, WM8940_CLOCK); + wm8940_write(codec, WM8940_CLOCK, reg & 0x0ff); + /* Pll power down */ + wm8940_write(codec, WM8940_PLLN, (1 << 7)); + return 0; + } + + /* Pll is followed by a frequency divide by 4 */ + pll_factors(freq_out*4, freq_in); + if (pll_div.k) + wm8940_write(codec, WM8940_PLLN, + (pll_div.pre_scale << 4) | pll_div.n | (1 << 6)); + else /* No factional component */ + wm8940_write(codec, WM8940_PLLN, + (pll_div.pre_scale << 4) | pll_div.n); + wm8940_write(codec, WM8940_PLLK1, pll_div.k >> 18); + wm8940_write(codec, WM8940_PLLK2, (pll_div.k >> 9) & 0x1ff); + wm8940_write(codec, WM8940_PLLK3, pll_div.k & 0x1ff); + /* Enable the PLL */ + reg = wm8940_read_reg_cache(codec, WM8940_POWER1); + wm8940_write(codec, WM8940_POWER1, reg | 0x020); + + /* Run CODEC from PLL instead of MCLK */ + reg = wm8940_read_reg_cache(codec, WM8940_CLOCK); + wm8940_write(codec, WM8940_CLOCK, reg | 0x100); + + return 0; +} + +static int wm8940_set_dai_sysclk(struct snd_soc_dai *codec_dai, + int clk_id, unsigned int freq, int dir) +{ + struct snd_soc_codec *codec = codec_dai->codec; + struct wm8940_priv *wm8940 = codec->private_data; + + switch (freq) { + case 11289600: + case 12000000: + case 12288000: + case 16934400: + case 18432000: + wm8940->sysclk = freq; + return 0; + } + return -EINVAL; +} + +static int wm8940_set_dai_clkdiv(struct snd_soc_dai *codec_dai, + int div_id, int div) +{ + struct snd_soc_codec *codec = codec_dai->codec; + u16 reg; + int ret = 0; + + switch (div_id) { + case WM8940_BCLKDIV: + reg = wm8940_read_reg_cache(codec, WM8940_CLOCK) & 0xFFEF3; + ret = wm8940_write(codec, WM8940_CLOCK, reg | (div << 2)); + break; + case WM8940_MCLKDIV: + reg = wm8940_read_reg_cache(codec, WM8940_CLOCK) & 0xFF1F; + ret = wm8940_write(codec, WM8940_CLOCK, reg | (div << 5)); + break; + case WM8940_OPCLKDIV: + reg = wm8940_read_reg_cache(codec, WM8940_ADDCNTRL) & 0xFFCF; + ret = wm8940_write(codec, WM8940_ADDCNTRL, reg | (div << 4)); + break; + } + return ret; +} + +#define WM8940_RATES SNDRV_PCM_RATE_8000_48000 + +#define WM8940_FORMATS (SNDRV_PCM_FMTBIT_S8 | \ + SNDRV_PCM_FMTBIT_S16_LE | \ + SNDRV_PCM_FMTBIT_S20_3LE | \ + SNDRV_PCM_FMTBIT_S24_LE | \ + SNDRV_PCM_FMTBIT_S32_LE) + +static struct snd_soc_dai_ops wm8940_dai_ops = { + .hw_params = wm8940_i2s_hw_params, + .set_sysclk = wm8940_set_dai_sysclk, + .digital_mute = wm8940_mute, + .set_fmt = wm8940_set_dai_fmt, + .set_clkdiv = wm8940_set_dai_clkdiv, + .set_pll = wm8940_set_dai_pll, +}; + +struct snd_soc_dai wm8940_dai = { + .name = "WM8940", + .playback = { + .stream_name = "Playback", + .channels_min = 1, + .channels_max = 2, + .rates = WM8940_RATES, + .formats = WM8940_FORMATS, + }, + .capture = { + .stream_name = "Capture", + .channels_min = 1, + .channels_max = 2, + .rates = WM8940_RATES, + .formats = WM8940_FORMATS, + }, + .ops = &wm8940_dai_ops, + .symmetric_rates = 1, +}; +EXPORT_SYMBOL_GPL(wm8940_dai); + +static int wm8940_suspend(struct platform_device *pdev, pm_message_t state) +{ + struct snd_soc_device *socdev = platform_get_drvdata(pdev); + struct snd_soc_codec *codec = socdev->card->codec; + + return wm8940_set_bias_level(codec, SND_SOC_BIAS_OFF); +} + +static int wm8940_resume(struct platform_device *pdev) +{ + struct snd_soc_device *socdev = platform_get_drvdata(pdev); + struct snd_soc_codec *codec = socdev->card->codec; + int i; + int ret; + u8 data[3]; + u16 *cache = codec->reg_cache; + + /* Sync reg_cache with the hardware + * Could use auto incremented writes to speed this up + */ + for (i = 0; i < ARRAY_SIZE(wm8940_reg_defaults); i++) { + data[0] = i; + data[1] = (cache[i] & 0xFF00) >> 8; + data[2] = cache[i] & 0x00FF; + ret = codec->hw_write(codec->control_data, data, 3); + if (ret < 0) + goto error_ret; + else if (ret != 3) { + ret = -EIO; + goto error_ret; + } + } + ret = wm8940_set_bias_level(codec, SND_SOC_BIAS_STANDBY); + if (ret) + goto error_ret; + ret = wm8940_set_bias_level(codec, codec->suspend_bias_level); + +error_ret: + return ret; +} + +static struct snd_soc_codec *wm8940_codec; + +static int wm8940_probe(struct platform_device *pdev) +{ + struct snd_soc_device *socdev = platform_get_drvdata(pdev); + struct snd_soc_codec *codec; + + int ret = 0; + + if (wm8940_codec == NULL) { + dev_err(&pdev->dev, "Codec device not registered\n"); + return -ENODEV; + } + + socdev->card->codec = wm8940_codec; + codec = wm8940_codec; + + mutex_init(&codec->mutex); + /* register pcms */ + ret = snd_soc_new_pcms(socdev, SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1); + if (ret < 0) { + dev_err(codec->dev, "failed to create pcms: %d\n", ret); + goto pcm_err; + } + + ret = snd_soc_add_controls(codec, wm8940_snd_controls, + ARRAY_SIZE(wm8940_snd_controls)); + if (ret) + goto error_free_pcms; + ret = wm8940_add_widgets(codec); + if (ret) + goto error_free_pcms; + + ret = snd_soc_init_card(socdev); + if (ret < 0) { + dev_err(codec->dev, "failed to register card: %d\n", ret); + goto error_free_pcms; + } + + return ret; + +error_free_pcms: + snd_soc_free_pcms(socdev); + snd_soc_dapm_free(socdev); +pcm_err: + return ret; +} + +static int wm8940_remove(struct platform_device *pdev) +{ + struct snd_soc_device *socdev = platform_get_drvdata(pdev); + + snd_soc_free_pcms(socdev); + snd_soc_dapm_free(socdev); + + return 0; +} + +struct snd_soc_codec_device soc_codec_dev_wm8940 = { + .probe = wm8940_probe, + .remove = wm8940_remove, + .suspend = wm8940_suspend, + .resume = wm8940_resume, +}; +EXPORT_SYMBOL_GPL(soc_codec_dev_wm8940); + +static int wm8940_register(struct wm8940_priv *wm8940) +{ + struct wm8940_setup_data *pdata = wm8940->codec.dev->platform_data; + struct snd_soc_codec *codec = &wm8940->codec; + int ret; + u16 reg; + if (wm8940_codec) { + dev_err(codec->dev, "Another WM8940 is registered\n"); + return -EINVAL; + } + + INIT_LIST_HEAD(&codec->dapm_widgets); + INIT_LIST_HEAD(&codec->dapm_paths); + + codec->private_data = wm8940; + codec->name = "WM8940"; + codec->owner = THIS_MODULE; + codec->read = wm8940_read_reg_cache; + codec->write = wm8940_write; + codec->bias_level = SND_SOC_BIAS_OFF; + codec->set_bias_level = wm8940_set_bias_level; + codec->dai = &wm8940_dai; + codec->num_dai = 1; + codec->reg_cache_size = ARRAY_SIZE(wm8940_reg_defaults); + codec->reg_cache = &wm8940->reg_cache; + + memcpy(codec->reg_cache, wm8940_reg_defaults, + sizeof(wm8940_reg_defaults)); + + ret = wm8940_reset(codec); + if (ret < 0) { + dev_err(codec->dev, "Failed to issue reset\n"); + return ret; + } + + wm8940_dai.dev = codec->dev; + + wm8940_set_bias_level(codec, SND_SOC_BIAS_STANDBY); + + ret = wm8940_write(codec, WM8940_POWER1, 0x180); + if (ret < 0) + return ret; + + if (!pdata) + dev_warn(codec->dev, "No platform data supplied\n"); + else { + reg = wm8940_read_reg_cache(codec, WM8940_OUTPUTCTL); + ret = wm8940_write(codec, WM8940_OUTPUTCTL, reg | pdata->vroi); + if (ret < 0) + return ret; + } + + + wm8940_codec = codec; + + ret = snd_soc_register_codec(codec); + if (ret) { + dev_err(codec->dev, "Failed to register codec: %d\n", ret); + return ret; + } + + ret = snd_soc_register_dai(&wm8940_dai); + if (ret) { + dev_err(codec->dev, "Failed to register DAI: %d\n", ret); + snd_soc_unregister_codec(codec); + return ret; + } + + return 0; +} + +static void wm8940_unregister(struct wm8940_priv *wm8940) +{ + wm8940_set_bias_level(&wm8940->codec, SND_SOC_BIAS_OFF); + snd_soc_unregister_dai(&wm8940_dai); + snd_soc_unregister_codec(&wm8940->codec); + kfree(wm8940); + wm8940_codec = NULL; +} + +static int wm8940_i2c_probe(struct i2c_client *i2c, + const struct i2c_device_id *id) +{ + struct wm8940_priv *wm8940; + struct snd_soc_codec *codec; + + wm8940 = kzalloc(sizeof *wm8940, GFP_KERNEL); + if (wm8940 == NULL) + return -ENOMEM; + + codec = &wm8940->codec; + codec->hw_write = (hw_write_t)i2c_master_send; + i2c_set_clientdata(i2c, wm8940); + codec->control_data = i2c; + codec->dev = &i2c->dev; + + return wm8940_register(wm8940); +} + +static int wm8940_i2c_remove(struct i2c_client *client) +{ + struct wm8940_priv *wm8940 = i2c_get_clientdata(client); + + wm8940_unregister(wm8940); + + return 0; +} + +static const struct i2c_device_id wm8940_i2c_id[] = { + { "wm8940", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, wm8940_i2c_id); + +static struct i2c_driver wm8940_i2c_driver = { + .driver = { + .name = "WM8940 I2C Codec", + .owner = THIS_MODULE, + }, + .probe = wm8940_i2c_probe, + .remove = __devexit_p(wm8940_i2c_remove), + .id_table = wm8940_i2c_id, +}; + +static int __init wm8940_modinit(void) +{ + int ret; + + ret = i2c_add_driver(&wm8940_i2c_driver); + if (ret) + printk(KERN_ERR "Failed to register WM8940 I2C driver: %d\n", + ret); + return ret; +} +module_init(wm8940_modinit); + +static void __exit wm8940_exit(void) +{ + i2c_del_driver(&wm8940_i2c_driver); +} +module_exit(wm8940_exit); + +MODULE_DESCRIPTION("ASoC WM8940 driver"); +MODULE_AUTHOR("Jonathan Cameron"); +MODULE_LICENSE("GPL"); diff --git a/sound/soc/codecs/wm8940.h b/sound/soc/codecs/wm8940.h new file mode 100644 index 0000000..8410eed --- /dev/null +++ b/sound/soc/codecs/wm8940.h @@ -0,0 +1,104 @@ +/* + * wm8940.h -- WM8940 Soc Audio driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _WM8940_H +#define _WM8940_H + +struct wm8940_setup_data { + /* Vref to analogue output resistance */ +#define WM8940_VROI_1K 0 +#define WM8940_VROI_30K 1 + unsigned int vroi:1; +}; +extern struct snd_soc_dai wm8940_dai; +extern struct snd_soc_codec_device soc_codec_dev_wm8940; + +/* WM8940 register space */ +#define WM8940_SOFTRESET 0x00 +#define WM8940_POWER1 0x01 +#define WM8940_POWER2 0x02 +#define WM8940_POWER3 0x03 +#define WM8940_IFACE 0x04 +#define WM8940_COMPANDINGCTL 0x05 +#define WM8940_CLOCK 0x06 +#define WM8940_ADDCNTRL 0x07 +#define WM8940_GPIO 0x08 +#define WM8940_CTLINT 0x09 +#define WM8940_DAC 0x0A +#define WM8940_DACVOL 0x0B + +#define WM8940_ADC 0x0E +#define WM8940_ADCVOL 0x0F +#define WM8940_NOTCH1 0x10 +#define WM8940_NOTCH2 0x11 +#define WM8940_NOTCH3 0x12 +#define WM8940_NOTCH4 0x13 +#define WM8940_NOTCH5 0x14 +#define WM8940_NOTCH6 0x15 +#define WM8940_NOTCH7 0x16 +#define WM8940_NOTCH8 0x17 +#define WM8940_DACLIM1 0x18 +#define WM8940_DACLIM2 0x19 + +#define WM8940_ALC1 0x20 +#define WM8940_ALC2 0x21 +#define WM8940_ALC3 0x22 +#define WM8940_NOISEGATE 0x23 +#define WM8940_PLLN 0x24 +#define WM8940_PLLK1 0x25 +#define WM8940_PLLK2 0x26 +#define WM8940_PLLK3 0x27 + +#define WM8940_ALC4 0x2A + +#define WM8940_INPUTCTL 0x2C +#define WM8940_PGAGAIN 0x2D + +#define WM8940_ADCBOOST 0x2F + +#define WM8940_OUTPUTCTL 0x31 +#define WM8940_SPKMIX 0x32 + +#define WM8940_SPKVOL 0x36 + +#define WM8940_MONOMIX 0x38 + +#define WM8940_CACHEREGNUM 0x57 + + +/* Clock divider Id's */ +#define WM8940_BCLKDIV 0 +#define WM8940_MCLKDIV 1 +#define WM8940_OPCLKDIV 2 + +/* MCLK clock dividers */ +#define WM8940_MCLKDIV_1 0 +#define WM8940_MCLKDIV_1_5 1 +#define WM8940_MCLKDIV_2 2 +#define WM8940_MCLKDIV_3 3 +#define WM8940_MCLKDIV_4 4 +#define WM8940_MCLKDIV_6 5 +#define WM8940_MCLKDIV_8 6 +#define WM8940_MCLKDIV_12 7 + +/* BCLK clock dividers */ +#define WM8940_BCLKDIV_1 0 +#define WM8940_BCLKDIV_2 1 +#define WM8940_BCLKDIV_4 2 +#define WM8940_BCLKDIV_8 3 +#define WM8940_BCLKDIV_16 4 +#define WM8940_BCLKDIV_32 5 + +/* PLL Out Dividers */ +#define WM8940_OPCLKDIV_1 0 +#define WM8940_OPCLKDIV_2 1 +#define WM8940_OPCLKDIV_3 2 +#define WM8940_OPCLKDIV_4 3 + +#endif /* _WM8940_H */ + -- cgit v0.10.2 From 9c935386512a3faa1be1c3d81cba38b7259a43f5 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Fri, 24 Apr 2009 15:00:25 +0200 Subject: ASoC: cs4270: fix Master Capture Switch polarity The control modifies the MUTE register, hence the polarity must be inverted. Signed-off-by: Daniel Mack Acked-By: Timur Tabi Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/cs4270.c b/sound/soc/codecs/cs4270.c index 7fa09a3..3c34fe6 100644 --- a/sound/soc/codecs/cs4270.c +++ b/sound/soc/codecs/cs4270.c @@ -486,7 +486,7 @@ static const struct snd_kcontrol_new cs4270_snd_controls[] = { SOC_SINGLE("Zero Cross Switch", CS4270_TRANS, 5, 1, 0), SOC_SINGLE("Popguard Switch", CS4270_MODE, 0, 1, 1), SOC_SINGLE("Auto-Mute Switch", CS4270_MUTE, 5, 1, 0), - SOC_DOUBLE("Master Capture Switch", CS4270_MUTE, 3, 4, 1, 0) + SOC_DOUBLE("Master Capture Switch", CS4270_MUTE, 3, 4, 1, 1) }; /* -- cgit v0.10.2 From 1a4ba05ec8369d62c10155a8931e81267bfbd31c Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Fri, 24 Apr 2009 16:37:45 +0200 Subject: ASoC: cs4270: add Master Playback Switch This adds a new control named 'Master Playback Switch' for cs4270 codecs. It is implemented using the new SOC_DOUBLE_EXT macro to catch the put function and store the information about manually set mute controls from userspace. When a manual mute is set, we don't want the soc core to un-mute the outputs. Renamed cs4270_mute() to cs4270_dai_mute() to avoid confusion. Signed-off-by: Daniel Mack Acked-by: Timur Tabi Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/cs4270.c b/sound/soc/codecs/cs4270.c index 3c34fe6..ece6ed6 100644 --- a/sound/soc/codecs/cs4270.c +++ b/sound/soc/codecs/cs4270.c @@ -109,6 +109,7 @@ struct cs4270_private { unsigned int mclk; /* Input frequency of the MCLK pin */ unsigned int mode; /* The mode (I2S or left-justified) */ unsigned int slave_mode; + unsigned int manual_mute; }; /** @@ -453,7 +454,7 @@ static int cs4270_hw_params(struct snd_pcm_substream *substream, } /** - * cs4270_mute - enable/disable the CS4270 external mute + * cs4270_dai_mute - enable/disable the CS4270 external mute * @dai: the SOC DAI * @mute: 0 = disable mute, 1 = enable mute * @@ -462,21 +463,52 @@ static int cs4270_hw_params(struct snd_pcm_substream *substream, * board does not have the MUTEA or MUTEB pins connected to such circuitry, * then this function will do nothing. */ -static int cs4270_mute(struct snd_soc_dai *dai, int mute) +static int cs4270_dai_mute(struct snd_soc_dai *dai, int mute) { struct snd_soc_codec *codec = dai->codec; + struct cs4270_private *cs4270 = codec->private_data; int reg6; reg6 = snd_soc_read(codec, CS4270_MUTE); if (mute) reg6 |= CS4270_MUTE_DAC_A | CS4270_MUTE_DAC_B; - else + else { reg6 &= ~(CS4270_MUTE_DAC_A | CS4270_MUTE_DAC_B); + reg6 |= cs4270->manual_mute; + } return snd_soc_write(codec, CS4270_MUTE, reg6); } +/** + * cs4270_soc_put_mute - put callback for the 'Master Playback switch' + * alsa control. + * @kcontrol: mixer control + * @ucontrol: control element information + * + * This function basically passes the arguments on to the generic + * snd_soc_put_volsw() function and saves the mute information in + * our private data structure. This is because we want to prevent + * cs4270_dai_mute() neglecting the user's decision to manually + * mute the codec's output. + * + * Returns 0 for success. + */ +static int cs4270_soc_put_mute(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); + struct cs4270_private *cs4270 = codec->private_data; + int left = !ucontrol->value.integer.value[0]; + int right = !ucontrol->value.integer.value[1]; + + cs4270->manual_mute = (left ? CS4270_MUTE_DAC_A : 0) | + (right ? CS4270_MUTE_DAC_B : 0); + + return snd_soc_put_volsw(kcontrol, ucontrol); +} + /* A list of non-DAPM controls that the CS4270 supports */ static const struct snd_kcontrol_new cs4270_snd_controls[] = { SOC_DOUBLE_R("Master Playback Volume", @@ -486,7 +518,9 @@ static const struct snd_kcontrol_new cs4270_snd_controls[] = { SOC_SINGLE("Zero Cross Switch", CS4270_TRANS, 5, 1, 0), SOC_SINGLE("Popguard Switch", CS4270_MODE, 0, 1, 1), SOC_SINGLE("Auto-Mute Switch", CS4270_MUTE, 5, 1, 0), - SOC_DOUBLE("Master Capture Switch", CS4270_MUTE, 3, 4, 1, 1) + SOC_DOUBLE("Master Capture Switch", CS4270_MUTE, 3, 4, 1, 1), + SOC_DOUBLE_EXT("Master Playback Switch", CS4270_MUTE, 0, 1, 1, 1, + snd_soc_get_volsw, cs4270_soc_put_mute), }; /* @@ -506,7 +540,7 @@ static struct snd_soc_dai_ops cs4270_dai_ops = { .hw_params = cs4270_hw_params, .set_sysclk = cs4270_set_dai_sysclk, .set_fmt = cs4270_set_dai_fmt, - .digital_mute = cs4270_mute, + .digital_mute = cs4270_dai_mute, }; struct snd_soc_dai cs4270_dai = { -- cgit v0.10.2 From 6be01cfb854818298753bfce65543dbc81d51d5a Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 27 Apr 2009 20:57:42 +0100 Subject: ASoC: Staticise TLV values in WM8940 Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/wm8940.c b/sound/soc/codecs/wm8940.c index 26987dc..a66dacc 100644 --- a/sound/soc/codecs/wm8940.c +++ b/sound/soc/codecs/wm8940.c @@ -168,16 +168,16 @@ static const char *wm8940_filter_mode_text[] = {"Audio", "Application"}; static const struct soc_enum wm8940_filter_mode_enum = SOC_ENUM_SINGLE(WM8940_ADC, 7, 2, wm8940_filter_mode_text); -DECLARE_TLV_DB_SCALE(wm8940_spk_vol_tlv, -5700, 100, 1); -DECLARE_TLV_DB_SCALE(wm8940_att_tlv, -1000, 1000, 0); -DECLARE_TLV_DB_SCALE(wm8940_pga_vol_tlv, -1200, 75, 0); -DECLARE_TLV_DB_SCALE(wm8940_alc_min_tlv, -1200, 600, 0); -DECLARE_TLV_DB_SCALE(wm8940_alc_max_tlv, 675, 600, 0); -DECLARE_TLV_DB_SCALE(wm8940_alc_tar_tlv, -2250, 50, 0); -DECLARE_TLV_DB_SCALE(wm8940_lim_boost_tlv, 0, 100, 0); -DECLARE_TLV_DB_SCALE(wm8940_lim_thresh_tlv, -600, 100, 0); -DECLARE_TLV_DB_SCALE(wm8940_adc_tlv, -12750, 50, 1); -DECLARE_TLV_DB_SCALE(wm8940_capture_boost_vol_tlv, 0, 2000, 0); +static DECLARE_TLV_DB_SCALE(wm8940_spk_vol_tlv, -5700, 100, 1); +static DECLARE_TLV_DB_SCALE(wm8940_att_tlv, -1000, 1000, 0); +static DECLARE_TLV_DB_SCALE(wm8940_pga_vol_tlv, -1200, 75, 0); +static DECLARE_TLV_DB_SCALE(wm8940_alc_min_tlv, -1200, 600, 0); +static DECLARE_TLV_DB_SCALE(wm8940_alc_max_tlv, 675, 600, 0); +static DECLARE_TLV_DB_SCALE(wm8940_alc_tar_tlv, -2250, 50, 0); +static DECLARE_TLV_DB_SCALE(wm8940_lim_boost_tlv, 0, 100, 0); +static DECLARE_TLV_DB_SCALE(wm8940_lim_thresh_tlv, -600, 100, 0); +static DECLARE_TLV_DB_SCALE(wm8940_adc_tlv, -12750, 50, 1); +static DECLARE_TLV_DB_SCALE(wm8940_capture_boost_vol_tlv, 0, 2000, 0); static const struct snd_kcontrol_new wm8940_snd_controls[] = { SOC_SINGLE("Digital Loopback Switch", WM8940_COMPANDINGCTL, @@ -253,7 +253,7 @@ static const struct snd_kcontrol_new wm8940_mono_mixer_controls[] = { SOC_DAPM_SINGLE("PCM Playback Switch", WM8940_MONOMIX, 0, 1, 0), }; -DECLARE_TLV_DB_SCALE(wm8940_boost_vol_tlv, -1500, 300, 1); +static DECLARE_TLV_DB_SCALE(wm8940_boost_vol_tlv, -1500, 300, 1); static const struct snd_kcontrol_new wm8940_input_boost_controls[] = { SOC_DAPM_SINGLE("Mic PGA Switch", WM8940_PGAGAIN, 6, 1, 1), SOC_DAPM_SINGLE_TLV("Aux Volume", WM8940_ADCBOOST, -- cgit v0.10.2 From ac2ff946a53e7bd0ae98f4e5d1d6c1b1dced82e5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 28 Apr 2009 12:38:32 +0900 Subject: mg_disk: fix locking IRQ and timeout handlers call functions which expect locked queue lock without locking it. Fix it. While at it, convert 0s used as null pointer constant to NULLs. [ Impact: fix locking, cleanup ] Signed-off-by: Tejun Heo Cc: unsik Kim diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c index fb39d9a..d3e72ad 100644 --- a/drivers/block/mg_disk.c +++ b/drivers/block/mg_disk.c @@ -160,11 +160,16 @@ static irqreturn_t mg_irq(int irq, void *dev_id) struct mg_host *host = dev_id; void (*handler)(struct mg_host *) = host->mg_do_intr; - host->mg_do_intr = 0; + spin_lock(&host->lock); + + host->mg_do_intr = NULL; del_timer(&host->timer); if (!handler) handler = mg_unexpected_intr; handler(host); + + spin_unlock(&host->lock); + return IRQ_HANDLED; } @@ -319,7 +324,7 @@ static void mg_read(struct request *req) remains = req->nr_sectors; - if (mg_out(host, req->sector, req->nr_sectors, MG_CMD_RD, 0) != + if (mg_out(host, req->sector, req->nr_sectors, MG_CMD_RD, NULL) != MG_ERR_NONE) mg_bad_rw_intr(host); @@ -363,7 +368,7 @@ static void mg_write(struct request *req) remains = req->nr_sectors; - if (mg_out(host, req->sector, req->nr_sectors, MG_CMD_WR, 0) != + if (mg_out(host, req->sector, req->nr_sectors, MG_CMD_WR, NULL) != MG_ERR_NONE) { mg_bad_rw_intr(host); return; @@ -521,9 +526,11 @@ void mg_times_out(unsigned long data) char *name; struct request *req; + spin_lock_irq(&host->lock); + req = elv_next_request(host->breq); if (!req) - return; + goto out_unlock; host->mg_do_intr = NULL; @@ -534,6 +541,8 @@ void mg_times_out(unsigned long data) mg_bad_rw_intr(host); mg_request(host->breq); +out_unlock: + spin_unlock_irq(&host->lock); } static void mg_request_poll(struct request_queue *q) -- cgit v0.10.2 From 7090a0a97f55cbf47547a140fcc5a349f32c598c Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 28 Apr 2009 12:38:33 +0900 Subject: mg_disk: fix CONFIG_LBD=y warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/block/mg_disk.c: In function ‘mg_dump_status’: drivers/block/mg_disk.c:265: warning: format ‘%ld’ expects type ‘long int’, but argument 2 has type ‘sector_t’ [ Impact: kill build warning ] Cc: unsik Kim Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Tejun Heo diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c index d3e72ad..f389835 100644 --- a/drivers/block/mg_disk.c +++ b/drivers/block/mg_disk.c @@ -79,7 +79,7 @@ static void mg_dump_status(const char *msg, unsigned int stat, if (host->breq) { req = elv_next_request(host->breq); if (req) - printk(", sector=%ld", req->sector); + printk(", sector=%u", (u32)req->sector); } } -- cgit v0.10.2 From e93b9fb7d85da4fd9d5171649e5ddcac1dd572bf Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 28 Apr 2009 12:38:33 +0900 Subject: hd: fix locking hd dance around local irq and HD_IRQ enable without achieving much. It ends up transferring data from irq handler with both local irq and HD_IRQ disabled. The only place it actually does something is while transferring the first block of a request which it does with HD_IRQ disabled but local irq enabled. Unfortunately, the dancing is horribly broken from locking POV. IRQ and timeout handlers access block queue without grabbing the queue lock and running the driver in SMP configuration crashes the whole machine pretty quickly. Remove meaningless irq enable/disable dancing and add proper locking in issue, irq and timeout paths. Signed-off-by: Tejun Heo diff --git a/drivers/block/hd.c b/drivers/block/hd.c index 3c11f06..baaa9e4 100644 --- a/drivers/block/hd.c +++ b/drivers/block/hd.c @@ -509,7 +509,6 @@ ok_to_write: if (i > 0) { SET_HANDLER(&write_intr); outsw(HD_DATA, req->buffer, 256); - local_irq_enable(); } else { #if (HD_DELAY > 0) last_req = read_timer(); @@ -541,8 +540,7 @@ static void hd_times_out(unsigned long dummy) if (!CURRENT) return; - disable_irq(HD_IRQ); - local_irq_enable(); + spin_lock_irq(hd_queue->queue_lock); reset = 1; name = CURRENT->rq_disk->disk_name; printk("%s: timeout\n", name); @@ -552,9 +550,8 @@ static void hd_times_out(unsigned long dummy) #endif end_request(CURRENT, 0); } - local_irq_disable(); hd_request(); - enable_irq(HD_IRQ); + spin_unlock_irq(hd_queue->queue_lock); } static int do_special_op(struct hd_i_struct *disk, struct request *req) @@ -592,7 +589,6 @@ static void hd_request(void) return; repeat: del_timer(&device_timer); - local_irq_enable(); req = CURRENT; if (!req) { @@ -601,7 +597,6 @@ repeat: } if (reset) { - local_irq_disable(); reset_hd(); return; } @@ -660,9 +655,7 @@ repeat: static void do_hd_request(struct request_queue *q) { - disable_irq(HD_IRQ); hd_request(); - enable_irq(HD_IRQ); } static int hd_getgeo(struct block_device *bdev, struct hd_geometry *geo) @@ -684,12 +677,16 @@ static irqreturn_t hd_interrupt(int irq, void *dev_id) { void (*handler)(void) = do_hd; + spin_lock(hd_queue->queue_lock); + do_hd = NULL; del_timer(&device_timer); if (!handler) handler = unexpected_hd_interrupt; handler(); - local_irq_enable(); + + spin_unlock(hd_queue->queue_lock); + return IRQ_HANDLED; } -- cgit v0.10.2 From e686307fdc84f249490e6c9da92fcb2424491f14 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 17 Apr 2009 08:41:21 +0200 Subject: loop: use BIO list management functions Now that the bio list management stuff is generic, convert loop to use bio lists instead of its own private bio list implementation. Cc: Jens Axboe Cc: Christoph Hellwig Signed-off-by: Akinobu Mita Signed-off-by: Jens Axboe diff --git a/drivers/block/loop.c b/drivers/block/loop.c index ddae808..9ca4bb0 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -511,11 +511,7 @@ out: */ static void loop_add_bio(struct loop_device *lo, struct bio *bio) { - if (lo->lo_biotail) { - lo->lo_biotail->bi_next = bio; - lo->lo_biotail = bio; - } else - lo->lo_bio = lo->lo_biotail = bio; + bio_list_add(&lo->lo_bio_list, bio); } /* @@ -523,16 +519,7 @@ static void loop_add_bio(struct loop_device *lo, struct bio *bio) */ static struct bio *loop_get_bio(struct loop_device *lo) { - struct bio *bio; - - if ((bio = lo->lo_bio)) { - if (bio == lo->lo_biotail) - lo->lo_biotail = NULL; - lo->lo_bio = bio->bi_next; - bio->bi_next = NULL; - } - - return bio; + return bio_list_pop(&lo->lo_bio_list); } static int loop_make_request(struct request_queue *q, struct bio *old_bio) @@ -609,12 +596,13 @@ static int loop_thread(void *data) set_user_nice(current, -20); - while (!kthread_should_stop() || lo->lo_bio) { + while (!kthread_should_stop() || !bio_list_empty(&lo->lo_bio_list)) { wait_event_interruptible(lo->lo_event, - lo->lo_bio || kthread_should_stop()); + !bio_list_empty(&lo->lo_bio_list) || + kthread_should_stop()); - if (!lo->lo_bio) + if (bio_list_empty(&lo->lo_bio_list)) continue; spin_lock_irq(&lo->lo_lock); bio = loop_get_bio(lo); @@ -841,7 +829,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, lo->old_gfp_mask = mapping_gfp_mask(mapping); mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); - lo->lo_bio = lo->lo_biotail = NULL; + bio_list_init(&lo->lo_bio_list); /* * set queue make_request_fn, and add limits based on lower level diff --git a/include/linux/bio.h b/include/linux/bio.h index 7b214fd..f37ca8c 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -506,7 +506,7 @@ static inline int bio_has_data(struct bio *bio) } /* - * BIO list managment for use by remapping drivers (e.g. DM or MD). + * BIO list management for use by remapping drivers (e.g. DM or MD) and loop. * * A bio_list anchors a singly-linked list of bios chained through the bi_next * member of the bio. The bio_list also caches the last list member to allow diff --git a/include/linux/loop.h b/include/linux/loop.h index 4072544..66c194e 100644 --- a/include/linux/loop.h +++ b/include/linux/loop.h @@ -56,8 +56,7 @@ struct loop_device { gfp_t old_gfp_mask; spinlock_t lo_lock; - struct bio *lo_bio; - struct bio *lo_biotail; + struct bio_list lo_bio_list; int lo_state; struct mutex lo_ctl_mutex; struct task_struct *lo_thread; -- cgit v0.10.2 From 924cec7789f65ab7f022256f6533ecba0747b5f3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 07:00:41 +0900 Subject: block: clear req->errors on bio completion only for fs requests Impact: subtle behavior change For fs requests, rq is only carrier of bios and rq error status as a whole doesn't mean much. This is the reason why rq->errors is being cleared on each partial completion of a request as on each partial completion the error status is transferred to the respective bios. For pc requests, rq->errors is used to carry error status to the issuer and thus __end_that_request_first() doesn't clear it on such cases. The condition was fine till now as only fs and pc requests have used bio and thus the bio completion path. However, future changes will unify data accesses to bio and all non fs users care about rq error status. Clear rq->errors on bio completion only for fs requests. In general, the implicit clearing is a bit too subtle especially as the meaning of rq->errors is completely dependent on low level drivers. Unifying / cleaning up rq->errors usage and letting llds manage it would be better. TODO comment added. Signed-off-by: Tejun Heo Acked-by: Jens Axboe diff --git a/block/blk-core.c b/block/blk-core.c index 2998fe3..41bc0ff 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1741,10 +1741,14 @@ static int __end_that_request_first(struct request *req, int error, trace_block_rq_complete(req->q, req); /* - * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual - * sense key with us all the way through + * For fs requests, rq is just carrier of independent bio's + * and each partial completion should be handled separately. + * Reset per-request error on each partial completion. + * + * TODO: tj: This is too subtle. It would be better to let + * low level drivers do what they see fit. */ - if (!blk_pc_request(req)) + if (blk_fs_request(req)) req->errors = 0; if (error && (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))) { -- cgit v0.10.2 From 0de57fb93b1daaeaecb658a98b3299ae460c02e9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 07:00:41 +0900 Subject: ide-tape: remove back-to-back REQUEST_SENSE detection Impact: fix an oops which always triggers ide_tape_issue_pc() assumed drive->pc isn't NULL on invocation when checking for back-to-back request sense issues but drive->pc can be NULL and even when it's not NULL, it's not safe to dereference it once the previous command is complete because pc could have been freed or was on stack. Kill back-to-back REQUEST_SENSE detection. Signed-off-by: Tejun Heo diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index cb942a9..3a53e08 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -614,12 +614,6 @@ static ide_startstop_t ide_tape_issue_pc(ide_drive_t *drive, { idetape_tape_t *tape = drive->driver_data; - if (drive->pc->c[0] == REQUEST_SENSE && - pc->c[0] == REQUEST_SENSE) { - printk(KERN_ERR "ide-tape: possible ide-tape.c bug - " - "Two request sense in serial were issued\n"); - } - if (drive->failed_pc == NULL && pc->c[0] != REQUEST_SENSE) drive->failed_pc = pc; -- cgit v0.10.2 From 220d06b5531e7b8a6226b2fdfb21198c3ccc4f76 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 07:00:41 +0900 Subject: ide: use blk_run_queue() instead of blk_start_queueing() blk_start_queueing() is being phased out in favor of [__]blk_run_queue(). Switch. Signed-off-by: Tejun Heo diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c index 310d03f..a914023 100644 --- a/drivers/ide/ide-park.c +++ b/drivers/ide/ide-park.c @@ -24,11 +24,8 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout) start_queue = 1; spin_unlock_irq(&hwif->lock); - if (start_queue) { - spin_lock_irq(q->queue_lock); - blk_start_queueing(q); - spin_unlock_irq(q->queue_lock); - } + if (start_queue) + blk_run_queue(q); return; } spin_unlock_irq(&hwif->lock); -- cgit v0.10.2 From b2963ac1738542d30305d7e12c8c078a383a425c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 07:00:41 +0900 Subject: ide: don't set REQ_SOFTBARRIER ide doesn't have to worry about REQ_SOFTBARRIER. Don't set it. Signed-off-by: Tejun Heo diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index a9fbe2c..c243880 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -411,7 +411,6 @@ static void idedisk_prepare_flush(struct request_queue *q, struct request *rq) cmd->protocol = ATA_PROT_NODATA; rq->cmd_type = REQ_TYPE_ATA_TASKFILE; - rq->cmd_flags |= REQ_SOFTBARRIER; rq->special = cmd; } diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c index c1c25eb..5991b23 100644 --- a/drivers/ide/ide-ioctls.c +++ b/drivers/ide/ide-ioctls.c @@ -231,7 +231,6 @@ static int generic_drive_reset(ide_drive_t *drive) rq->cmd_type = REQ_TYPE_SPECIAL; rq->cmd_len = 1; rq->cmd[0] = REQ_DRIVE_RESET; - rq->cmd_flags |= REQ_SOFTBARRIER; if (blk_execute_rq(drive->queue, NULL, rq, 1)) ret = rq->errors; blk_put_request(rq); -- cgit v0.10.2 From 214ae19104141404f18ad6651752eb38e3dd584e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 07:00:41 +0900 Subject: ide kill unused ide_cmd->special Impact: removal of unused field No one uses ide_cmd->special anymore. Kill it. Signed-off-by: Tejun Heo diff --git a/include/linux/ide.h b/include/linux/ide.h index ff65fff..846a1e1 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -324,7 +324,6 @@ struct ide_cmd { unsigned int cursg_ofs; struct request *rq; /* copy of request */ - void *special; /* valid_t generally */ }; /* ATAPI packet command flags */ -- cgit v0.10.2 From 59a4f6f355fc718581ddcf1bb45a469d4756c035 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 07:00:41 +0900 Subject: ide-cd: clear sense buffer before issuing request sense Impact: code simplification ide_cd_request_sense_fixup() clears the tail of the sense buffer if the device didn't completely fill it. This patch makes cdrom_queue_request_sense() clear the sense buffer before issuing the command instead of clearing it afterwards. This simplifies code and eases future changes. Signed-off-by: Tejun Heo diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 3d4e099..b6a0d12 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -217,6 +217,8 @@ static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense, if (sense == NULL) sense = &info->sense_data; + memset(sense, 0, 18); + /* stuff the sense request in front of our current request */ blk_rq_init(NULL, rq); rq->cmd_type = REQ_TYPE_ATA_PC; @@ -504,14 +506,8 @@ static void ide_cd_request_sense_fixup(ide_drive_t *drive, struct ide_cmd *cmd) * and some drives don't send them. Sigh. */ if (rq->cmd[0] == GPCMD_REQUEST_SENSE && - cmd->nleft > 0 && cmd->nleft <= 5) { - unsigned int ofs = cmd->nbytes - cmd->nleft; - - while (cmd->nleft > 0) { - *((u8 *)rq->data + ofs++) = 0; - cmd->nleft--; - } - } + cmd->nleft > 0 && cmd->nleft <= 5) + cmd->nleft = 0; } int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd, -- cgit v0.10.2 From 8968932e54db35cf9d69cfbbd50c26dfaaa586c7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 07:00:42 +0900 Subject: ide-floppy: block pc always uses bio Impact: remove unnecessary code path Block pc requests always use bio and rq->data is always NULL. No need to worry about !rq->bio cases in idefloppy_block_pc_cmd(). Note that ide-atapi uses ide_pio_bytes() for bio PIO transfer which handle sg fine. Signed-off-by: Tejun Heo Cc: Jens Axboe diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index 2b4868d..3b22e06 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -216,15 +216,13 @@ static void idefloppy_blockpc_cmd(struct ide_disk_obj *floppy, ide_init_pc(pc); memcpy(pc->c, rq->cmd, sizeof(pc->c)); pc->rq = rq; - if (rq->data_len && rq_data_dir(rq) == WRITE) - pc->flags |= PC_FLAG_WRITING; - pc->buf = rq->data; - if (rq->bio) + if (rq->data_len) { pc->flags |= PC_FLAG_DMA_OK; - /* - * possibly problematic, doesn't look like ide-floppy correctly - * handled scattered requests if dma fails... - */ + if (rq_data_dir(rq) == WRITE) + pc->flags |= PC_FLAG_WRITING; + } + /* pio will be performed by ide_pio_bytes() which handles sg fine */ + pc->buf = NULL; pc->req_xfer = pc->buf_size = rq->data_len; } -- cgit v0.10.2 From d868ca24302e99a0e8a86071ca2c66273edf97d9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 07:00:42 +0900 Subject: ide-taskfile: don't abuse rq->buffer Impact: rq->buffer usage cleanup ide_raw_taskfile() directly uses rq->buffer to carry pointer to the data buffer. This complicates both block interface and ide backend request handling. Use blk_rq_map_kern() instead and drop special handling for REQ_TYPE_ATA_TASKFILE from ide_map_sg(). Note that REQ_RW setting is moved upwards as blk_rq_map_kern() uses it to initialize bio rw flag. Signed-off-by: Tejun Heo Cc: Jens Axboe diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 35dc38d..9b9e8b1 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -248,10 +248,7 @@ void ide_map_sg(ide_drive_t *drive, struct ide_cmd *cmd) struct scatterlist *sg = hwif->sg_table; struct request *rq = cmd->rq; - if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) { - sg_init_one(sg, rq->buffer, rq->nr_sectors * SECTOR_SIZE); - cmd->sg_nents = 1; - } else if (!rq->bio) { + if (!rq->bio) { sg_init_one(sg, rq->data, rq->data_len); cmd->sg_nents = 1; } else diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index 4aa6223..f400eb4 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -424,7 +424,9 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf, rq = blk_get_request(drive->queue, READ, __GFP_WAIT); rq->cmd_type = REQ_TYPE_ATA_TASKFILE; - rq->buffer = buf; + + if (cmd->tf_flags & IDE_TFLAG_WRITE) + rq->cmd_flags |= REQ_RW; /* * (ks) We transfer currently only whole sectors. @@ -432,18 +434,20 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf, * if we would find a solution to transfer any size. * To support special commands like READ LONG. */ - rq->hard_nr_sectors = rq->nr_sectors = nsect; - rq->hard_cur_sectors = rq->current_nr_sectors = nsect; - - if (cmd->tf_flags & IDE_TFLAG_WRITE) - rq->cmd_flags |= REQ_RW; + if (nsect) { + error = blk_rq_map_kern(drive->queue, rq, buf, + nsect * SECTOR_SIZE, __GFP_WAIT); + if (error) + goto put_req; + } rq->special = cmd; cmd->rq = rq; error = blk_execute_rq(drive->queue, NULL, rq, 0); - blk_put_request(rq); +put_req: + blk_put_request(rq); return error; } -- cgit v0.10.2 From ac0b0113ddbab3ed2388132d368c97292f9f3c84 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 07:00:42 +0900 Subject: ide-atapi: don't abuse rq->buffer Impact: rq->buffer usage cleanup ide-atapi uses rq->buffer as private opaque value for internal special requests. rq->special isn't used for these cases (the only case where rq->special is used is for ide-tape rw requests). Use rq->special instead. Signed-off-by: Tejun Heo Cc: Jens Axboe diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index 7201b17..2894577 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -90,7 +90,7 @@ static void ide_queue_pc_head(ide_drive_t *drive, struct gendisk *disk, blk_rq_init(NULL, rq); rq->cmd_type = REQ_TYPE_SPECIAL; rq->cmd_flags |= REQ_PREEMPT; - rq->buffer = (char *)pc; + rq->special = (char *)pc; rq->rq_disk = disk; if (pc->req_xfer) { @@ -119,7 +119,7 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk, rq = blk_get_request(drive->queue, READ, __GFP_WAIT); rq->cmd_type = REQ_TYPE_SPECIAL; - rq->buffer = (char *)pc; + rq->special = (char *)pc; if (pc->req_xfer) { rq->data = pc->buf; diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index 3b22e06..9460033 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -264,7 +264,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, pc = &floppy->queued_pc; idefloppy_create_rw_cmd(drive, pc, rq, (unsigned long)block); } else if (blk_special_request(rq)) { - pc = (struct ide_atapi_pc *) rq->buffer; + pc = (struct ide_atapi_pc *)rq->special; } else if (blk_pc_request(rq)) { pc = &floppy->queued_pc; idefloppy_blockpc_cmd(floppy, pc, rq); diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 3a53e08..aadf53c 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -828,7 +828,7 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, goto out; } if (rq->cmd[13] & REQ_IDETAPE_PC1) { - pc = (struct ide_atapi_pc *) rq->buffer; + pc = (struct ide_atapi_pc *)rq->special; rq->cmd[13] &= ~(REQ_IDETAPE_PC1); rq->cmd[13] |= REQ_IDETAPE_PC2; goto out; -- cgit v0.10.2 From 1f181d2b1569dfb88a584a6e1847e9e1c7645951 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 07:00:42 +0900 Subject: ide-cd: don't abuse rq->buffer Impact: rq->buffer usage cleanup ide-cd uses rq->buffer to carry pointer to the original request when issuing REQUEST_SENSE. Use rq->special instead. Signed-off-by: Tejun Heo Cc: Jens Axboe diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index b6a0d12..bb77c79 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -232,8 +232,8 @@ static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense, rq->cmd_type = REQ_TYPE_SENSE; rq->cmd_flags |= REQ_PREEMPT; - /* NOTE! Save the failed command in "rq->buffer" */ - rq->buffer = (void *) failed_command; + /* NOTE! Save the failed command in "rq->special" */ + rq->special = (void *)failed_command; if (failed_command) ide_debug_log(IDE_DBG_SENSE, "failed_cmd: 0x%x", @@ -247,10 +247,10 @@ static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense, static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq) { /* - * For REQ_TYPE_SENSE, "rq->buffer" points to the original + * For REQ_TYPE_SENSE, "rq->special" points to the original * failed request */ - struct request *failed = (struct request *)rq->buffer; + struct request *failed = (struct request *)rq->special; struct cdrom_info *info = drive->driver_data; void *sense = &info->sense_data; -- cgit v0.10.2 From e69d800f7e8797a8e3423380ee9d8ca1cb90c388 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 19 Apr 2009 07:00:42 +0900 Subject: ide: add helpers for preparing sense requests This is in preparation of removing the queueing of a sense request out of the IRQ handler path. Use struct request_sense as a general sense buffer for all ATAPI devices ide-{floppy,tape,cd}. tj: * blk_get_request(__GFP_WAIT) can't be called from do_request() as it can cause deadlock. Converted to use inline struct request and blk_rq_init(). * Added xfer / cdb len selection depending on device type. * All sense prep logics folded into ide_prep_sense() which never fails. * hwif->rq clearing and sense_rq used handling moved into ide_queue_sense_rq(). * blk_rq_map_kern() conversion is moved to later patch. CC: Bartlomiej Zolnierkiewicz CC: FUJITA Tomonori Signed-off-by: Borislav Petkov Signed-off-by: Tejun Heo diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index 2894577..c6e0348 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -191,6 +191,67 @@ void ide_create_request_sense_cmd(ide_drive_t *drive, struct ide_atapi_pc *pc) } EXPORT_SYMBOL_GPL(ide_create_request_sense_cmd); +void ide_prep_sense(ide_drive_t *drive, struct request *rq) +{ + struct request_sense *sense = &drive->sense_data; + struct request *sense_rq = &drive->sense_rq; + unsigned int cmd_len, sense_len; + + debug_log("%s: enter\n", __func__); + + switch (drive->media) { + case ide_floppy: + cmd_len = 255; + sense_len = 18; + break; + case ide_tape: + cmd_len = 20; + sense_len = 20; + break; + default: + cmd_len = 18; + sense_len = 18; + } + + BUG_ON(sense_len > sizeof(*sense)); + + if (blk_sense_request(rq) || drive->sense_rq_armed) + return; + + memset(sense, 0, sizeof(*sense)); + + blk_rq_init(rq->q, sense_rq); + sense_rq->rq_disk = rq->rq_disk; + + sense_rq->data = sense; + sense_rq->cmd[0] = GPCMD_REQUEST_SENSE; + sense_rq->cmd[4] = cmd_len; + sense_rq->data_len = sense_len; + + sense_rq->cmd_type = REQ_TYPE_SENSE; + sense_rq->cmd_flags |= REQ_PREEMPT; + + if (drive->media == ide_tape) + sense_rq->cmd[13] = REQ_IDETAPE_PC1; + + drive->sense_rq_armed = true; +} +EXPORT_SYMBOL_GPL(ide_prep_sense); + +void ide_queue_sense_rq(ide_drive_t *drive, void *special) +{ + BUG_ON(!drive->sense_rq_armed); + + drive->sense_rq.special = special; + drive->sense_rq_armed = false; + + drive->hwif->rq = NULL; + + elv_add_request(drive->queue, &drive->sense_rq, + ELEVATOR_INSERT_FRONT, 0); +} +EXPORT_SYMBOL_GPL(ide_queue_sense_rq); + /* * Called when an error was detected during the last packet command. * We queue a request sense packet command in the head of the request list. diff --git a/include/linux/ide.h b/include/linux/ide.h index 846a1e1..a69ccac 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -26,6 +26,9 @@ #include #include +/* for request_sense */ +#include + #if defined(CONFIG_CRIS) || defined(CONFIG_FRV) || defined(CONFIG_MN10300) # define SUPPORT_VLB_SYNC 0 #else @@ -602,6 +605,11 @@ struct ide_drive_s { struct ide_atapi_pc request_sense_pc; struct request request_sense_rq; + + /* current sense rq and buffer */ + bool sense_rq_armed; + struct request sense_rq; + struct request_sense sense_data; }; typedef struct ide_drive_s ide_drive_t; @@ -1175,6 +1183,9 @@ int ide_set_media_lock(ide_drive_t *, struct gendisk *, int); void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *); void ide_retry_pc(ide_drive_t *, struct gendisk *); +void ide_prep_sense(ide_drive_t *drive, struct request *rq); +void ide_queue_sense_rq(ide_drive_t *drive, void *special); + int ide_cd_expiry(ide_drive_t *); int ide_cd_get_xferlen(struct request *); -- cgit v0.10.2 From c457ce874a0f3dfa3d5e9f2309789f6f34e24325 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 19 Apr 2009 07:00:42 +0900 Subject: ide-cd: convert to using generic sense request Preallocate a sense request in the ->do_request method and reinitialize it only on demand, in case it's been consumed in the IRQ handler path. The reason for this is that we don't want to be mapping rq to bio in the IRQ path and introduce all kinds of unnecessary hacks to the block layer. tj: * Both user and kernel PC requests expect sense data to be stored in separate storage other than drive->sense_data. Copy sense data to rq->sense on completion if rq->sense is not NULL. This fixes bogus sense data on PC requests. As a result, remove cdrom_queue_request_sense. CC: Bartlomiej Zolnierkiewicz CC: FUJITA Tomonori Signed-off-by: Borislav Petkov Signed-off-by: Tejun Heo diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index bb77c79..061d7bb 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -206,44 +206,6 @@ static void cdrom_analyze_sense_data(ide_drive_t *drive, ide_cd_log_error(drive->name, failed_command, sense); } -static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense, - struct request *failed_command) -{ - struct cdrom_info *info = drive->driver_data; - struct request *rq = &drive->request_sense_rq; - - ide_debug_log(IDE_DBG_SENSE, "enter"); - - if (sense == NULL) - sense = &info->sense_data; - - memset(sense, 0, 18); - - /* stuff the sense request in front of our current request */ - blk_rq_init(NULL, rq); - rq->cmd_type = REQ_TYPE_ATA_PC; - rq->rq_disk = info->disk; - - rq->data = sense; - rq->cmd[0] = GPCMD_REQUEST_SENSE; - rq->cmd[4] = 18; - rq->data_len = 18; - - rq->cmd_type = REQ_TYPE_SENSE; - rq->cmd_flags |= REQ_PREEMPT; - - /* NOTE! Save the failed command in "rq->special" */ - rq->special = (void *)failed_command; - - if (failed_command) - ide_debug_log(IDE_DBG_SENSE, "failed_cmd: 0x%x", - failed_command->cmd[0]); - - drive->hwif->rq = NULL; - - elv_add_request(drive->queue, rq, ELEVATOR_INSERT_FRONT, 0); -} - static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq) { /* @@ -251,11 +213,16 @@ static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq) * failed request */ struct request *failed = (struct request *)rq->special; - struct cdrom_info *info = drive->driver_data; - void *sense = &info->sense_data; + struct request_sense *sense = &drive->sense_data; if (failed) { if (failed->sense) { + /* + * Sense is always read into drive->sense_data. + * Copy back if the failed request has its + * sense pointer set. + */ + memcpy(failed->sense, sense, 18); sense = failed->sense; failed->sense_len = rq->sense_len; } @@ -431,7 +398,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) /* if we got a CHECK_CONDITION status, queue a request sense command */ if (stat & ATA_ERR) - cdrom_queue_request_sense(drive, NULL, NULL); + ide_queue_sense_rq(drive, NULL); return 1; end_request: @@ -445,7 +412,7 @@ end_request: hwif->rq = NULL; - cdrom_queue_request_sense(drive, rq->sense, rq); + ide_queue_sense_rq(drive, rq); return 1; } else return 2; @@ -893,6 +860,9 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq, goto out_end; } + /* prepare sense request for this command */ + ide_prep_sense(drive, rq); + memset(&cmd, 0, sizeof(cmd)); if (rq_data_dir(rq)) diff --git a/drivers/ide/ide-cd.h b/drivers/ide/ide-cd.h index 1d97101..93a3cf1b 100644 --- a/drivers/ide/ide-cd.h +++ b/drivers/ide/ide-cd.h @@ -87,10 +87,6 @@ struct cdrom_info { struct atapi_toc *toc; - /* The result of the last successful request sense command - on this device. */ - struct request_sense sense_data; - u8 max_speed; /* Max speed of the drive. */ u8 current_speed; /* Current speed of the drive. */ -- cgit v0.10.2 From 068753203e6cd085664a62e0fc0636e19b148a12 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 19 Apr 2009 07:00:42 +0900 Subject: ide-atapi: convert ide-{floppy,tape} to using preallocated sense buffer Since we're issuing REQ_TYPE_SENSE now we need to allow those types of rqs in the ->do_request callbacks. As a future improvement, sense_len assignment might be unified across all ATAPI devices. Borislav to check with specs and test. As a result, get rid of ide_queue_pc_head() and drive->request_sense_rq. tj: * Init request sense ide_atapi_pc from sense request. In the longer timer, it would probably better to fold ide_create_request_sense_cmd() into its only current user - ide_floppy_get_format_progress(). * ide_retry_pc() no longer takes @disk. CC: Bartlomiej Zolnierkiewicz CC: FUJITA Tomonori Signed-off-by: Borislav Petkov Signed-off-by: Tejun Heo diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index c6e0348..972c522 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -80,34 +80,6 @@ void ide_init_pc(struct ide_atapi_pc *pc) EXPORT_SYMBOL_GPL(ide_init_pc); /* - * Generate a new packet command request in front of the request queue, before - * the current request, so that it will be processed immediately, on the next - * pass through the driver. - */ -static void ide_queue_pc_head(ide_drive_t *drive, struct gendisk *disk, - struct ide_atapi_pc *pc, struct request *rq) -{ - blk_rq_init(NULL, rq); - rq->cmd_type = REQ_TYPE_SPECIAL; - rq->cmd_flags |= REQ_PREEMPT; - rq->special = (char *)pc; - rq->rq_disk = disk; - - if (pc->req_xfer) { - rq->data = pc->buf; - rq->data_len = pc->req_xfer; - } - - memcpy(rq->cmd, pc->c, 12); - if (drive->media == ide_tape) - rq->cmd[13] = REQ_IDETAPE_PC1; - - drive->hwif->rq = NULL; - - elv_add_request(drive->queue, rq, ELEVATOR_INSERT_FRONT, 0); -} - -/* * Add a special packet command request to the tail of the request queue, * and wait for it to be serviced. */ @@ -254,18 +226,26 @@ EXPORT_SYMBOL_GPL(ide_queue_sense_rq); /* * Called when an error was detected during the last packet command. - * We queue a request sense packet command in the head of the request list. + * We queue a request sense packet command at the head of the request + * queue. */ -void ide_retry_pc(ide_drive_t *drive, struct gendisk *disk) +void ide_retry_pc(ide_drive_t *drive) { - struct request *rq = &drive->request_sense_rq; + struct request *sense_rq = &drive->sense_rq; struct ide_atapi_pc *pc = &drive->request_sense_pc; (void)ide_read_error(drive); - ide_create_request_sense_cmd(drive, pc); + + /* init pc from sense_rq */ + ide_init_pc(pc); + memcpy(pc->c, sense_rq->cmd, 12); + pc->buf = sense_rq->data; + pc->req_xfer = sense_rq->data_len; + if (drive->media == ide_tape) set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags); - ide_queue_pc_head(drive, disk, pc, rq); + + ide_queue_sense_rq(drive, pc); } EXPORT_SYMBOL_GPL(ide_retry_pc); @@ -404,7 +384,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) debug_log("[cmd %x]: check condition\n", rq->cmd[0]); /* Retry operation */ - ide_retry_pc(drive, rq->rq_disk); + ide_retry_pc(drive); /* queued, but not started */ return ide_stopped; diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index 9460033..d3302cc 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -263,7 +263,7 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, } pc = &floppy->queued_pc; idefloppy_create_rw_cmd(drive, pc, rq, (unsigned long)block); - } else if (blk_special_request(rq)) { + } else if (blk_special_request(rq) || blk_sense_request(rq)) { pc = (struct ide_atapi_pc *)rq->special; } else if (blk_pc_request(rq)) { pc = &floppy->queued_pc; @@ -273,6 +273,8 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, goto out_end; } + ide_prep_sense(drive, rq); + memset(&cmd, 0, sizeof(cmd)); if (rq_data_dir(rq)) diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index aadf53c..8324dfa 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -695,7 +695,7 @@ static ide_startstop_t idetape_media_access_finished(ide_drive_t *drive) printk(KERN_ERR "ide-tape: %s: I/O error, ", tape->name); /* Retry operation */ - ide_retry_pc(drive, tape->disk); + ide_retry_pc(drive); return ide_stopped; } pc->error = 0; @@ -752,7 +752,7 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, (unsigned long long)rq->sector, rq->nr_sectors, rq->current_nr_sectors); - if (!blk_special_request(rq)) { + if (!(blk_special_request(rq) || blk_sense_request(rq))) { /* We do not support buffer cache originated requests. */ printk(KERN_NOTICE "ide-tape: %s: Unsupported request in " "request queue (%d)\n", drive->name, rq->cmd_type); @@ -840,6 +840,9 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, BUG(); out: + /* prepare sense request for this command */ + ide_prep_sense(drive, rq); + memset(&cmd, 0, sizeof(cmd)); if (rq_data_dir(rq)) diff --git a/include/linux/ide.h b/include/linux/ide.h index a69ccac..9e67cca 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -604,7 +604,6 @@ struct ide_drive_s { unsigned long atapi_flags; struct ide_atapi_pc request_sense_pc; - struct request request_sense_rq; /* current sense rq and buffer */ bool sense_rq_armed; @@ -1181,7 +1180,7 @@ int ide_do_test_unit_ready(ide_drive_t *, struct gendisk *); int ide_do_start_stop(ide_drive_t *, struct gendisk *, int); int ide_set_media_lock(ide_drive_t *, struct gendisk *, int); void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *); -void ide_retry_pc(ide_drive_t *, struct gendisk *); +void ide_retry_pc(ide_drive_t *drive); void ide_prep_sense(ide_drive_t *drive, struct request *rq); void ide_queue_sense_rq(ide_drive_t *drive, void *special); -- cgit v0.10.2 From 02e7cf8f848841ca21864ccd019e480b73c323b7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 07:00:42 +0900 Subject: ide-cd,atapi: use bio for internal commands Impact: unify request data buffer handling rq->data is used mostly to pass kernel buffer through request queue without using bio. There are only a couple of places which still do this in kernel and converting to bio isn't difficult. This patch converts ide-cd and atapi to use bio instead of rq->data for request sense and internal pc commands. With previous change to unify sense request handling, this is relatively easily achieved by adding blk_rq_map_kern() during sense_rq prep and PC issue. If blk_rq_map_kern() fails for sense, the error is deferred till sense issue and aborts the failed command which triggered the sense. Note that this is a slim possibility as sense prep is done on each command issue, so for the above condition to actually trigger, all preps since the last sense issue till the issue of the request which would require a sense should fail. * do_request functions might sleep now. This should be okay as ide request_fn - do_ide_request() - is invoked only from make_request and plug work. Make sure this is the case by adding might_sleep() to do_ide_request(). * Functions which access the read sense data before the sense request is complete now should access bio_data(sense_rq->bio) as the sense buffer might have been copied during blk_rq_map_kern(). * ide-tape updated to map sg. * cdrom_do_block_pc() now doesn't have to deal with REQ_TYPE_ATA_PC special case. Simplified. * tp_ops->output/input_data path dropped from ide_pc_intr(). Signed-off-by: Tejun Heo diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index 972c522..5cefe12 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -94,16 +94,18 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk, rq->special = (char *)pc; if (pc->req_xfer) { - rq->data = pc->buf; - rq->data_len = pc->req_xfer; + error = blk_rq_map_kern(drive->queue, rq, pc->buf, pc->req_xfer, + GFP_NOIO); + if (error) + goto put_req; } memcpy(rq->cmd, pc->c, 12); if (drive->media == ide_tape) rq->cmd[13] = REQ_IDETAPE_PC1; error = blk_execute_rq(drive->queue, disk, rq, 0); +put_req: blk_put_request(rq); - return error; } EXPORT_SYMBOL_GPL(ide_queue_pc_tail); @@ -168,6 +170,7 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq) struct request_sense *sense = &drive->sense_data; struct request *sense_rq = &drive->sense_rq; unsigned int cmd_len, sense_len; + int err; debug_log("%s: enter\n", __func__); @@ -193,13 +196,19 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq) memset(sense, 0, sizeof(*sense)); blk_rq_init(rq->q, sense_rq); - sense_rq->rq_disk = rq->rq_disk; - sense_rq->data = sense; + err = blk_rq_map_kern(drive->queue, sense_rq, sense, sense_len, + GFP_NOIO); + if (unlikely(err)) { + if (printk_ratelimit()) + printk(KERN_WARNING "%s: failed to map sense buffer\n", + drive->name); + return; + } + + sense_rq->rq_disk = rq->rq_disk; sense_rq->cmd[0] = GPCMD_REQUEST_SENSE; sense_rq->cmd[4] = cmd_len; - sense_rq->data_len = sense_len; - sense_rq->cmd_type = REQ_TYPE_SENSE; sense_rq->cmd_flags |= REQ_PREEMPT; @@ -210,9 +219,14 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq) } EXPORT_SYMBOL_GPL(ide_prep_sense); -void ide_queue_sense_rq(ide_drive_t *drive, void *special) +int ide_queue_sense_rq(ide_drive_t *drive, void *special) { - BUG_ON(!drive->sense_rq_armed); + /* deferred failure from ide_prep_sense() */ + if (!drive->sense_rq_armed) { + printk(KERN_WARNING "%s: failed queue sense request\n", + drive->name); + return -ENOMEM; + } drive->sense_rq.special = special; drive->sense_rq_armed = false; @@ -221,6 +235,7 @@ void ide_queue_sense_rq(ide_drive_t *drive, void *special) elv_add_request(drive->queue, &drive->sense_rq, ELEVATOR_INSERT_FRONT, 0); + return 0; } EXPORT_SYMBOL_GPL(ide_queue_sense_rq); @@ -239,13 +254,14 @@ void ide_retry_pc(ide_drive_t *drive) /* init pc from sense_rq */ ide_init_pc(pc); memcpy(pc->c, sense_rq->cmd, 12); - pc->buf = sense_rq->data; + pc->buf = bio_data(sense_rq->bio); /* pointer to mapped address */ pc->req_xfer = sense_rq->data_len; if (drive->media == ide_tape) set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags); - ide_queue_sense_rq(drive, pc); + if (ide_queue_sense_rq(drive, pc)) + ide_complete_rq(drive, -EIO, blk_rq_bytes(drive->hwif->rq)); } EXPORT_SYMBOL_GPL(ide_retry_pc); @@ -317,7 +333,6 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) struct ide_cmd *cmd = &hwif->cmd; struct request *rq = hwif->rq; const struct ide_tp_ops *tp_ops = hwif->tp_ops; - xfer_func_t *xferfunc; unsigned int timeout, done; u16 bcount; u8 stat, ireason, dsc = 0; @@ -411,7 +426,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) rq->errors = -EIO; } - if (drive->media == ide_tape) + if (drive->media == ide_tape && !rq->bio) done = ide_rq_bytes(rq); /* FIXME */ else done = blk_rq_bytes(rq); @@ -448,16 +463,11 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) return ide_do_reset(drive); } - xferfunc = write ? tp_ops->output_data : tp_ops->input_data; - - if (drive->media == ide_floppy && pc->buf == NULL) { + if (drive->media == ide_tape && pc->bh) + done = drive->pc_io_buffers(drive, pc, bcount, write); + else { done = min_t(unsigned int, bcount, cmd->nleft); ide_pio_bytes(drive, cmd, write, done); - } else if (drive->media == ide_tape && pc->bh) { - done = drive->pc_io_buffers(drive, pc, bcount, write); - } else { - done = min_t(unsigned int, bcount, pc->req_xfer - pc->xferred); - xferfunc(drive, NULL, pc->cur_pos, done); } /* Update the current position */ diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 061d7bb..6736287 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -210,10 +210,12 @@ static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq) { /* * For REQ_TYPE_SENSE, "rq->special" points to the original - * failed request + * failed request. Also, the sense data should be read + * directly from rq which might be different from the original + * sense buffer if it got copied during mapping. */ struct request *failed = (struct request *)rq->special; - struct request_sense *sense = &drive->sense_data; + void *sense = bio_data(rq->bio); if (failed) { if (failed->sense) { @@ -398,7 +400,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) /* if we got a CHECK_CONDITION status, queue a request sense command */ if (stat & ATA_ERR) - ide_queue_sense_rq(drive, NULL); + return ide_queue_sense_rq(drive, NULL) ? 2 : 1; return 1; end_request: @@ -412,8 +414,7 @@ end_request: hwif->rq = NULL; - ide_queue_sense_rq(drive, rq); - return 1; + return ide_queue_sense_rq(drive, rq) ? 2 : 1; } else return 2; } @@ -507,8 +508,12 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd, rq->cmd_flags |= cmd_flags; rq->timeout = timeout; if (buffer) { - rq->data = buffer; - rq->data_len = *bufflen; + error = blk_rq_map_kern(drive->queue, rq, buffer, + *bufflen, GFP_NOIO); + if (error) { + blk_put_request(rq); + return error; + } } error = blk_execute_rq(drive->queue, info->disk, rq, 0); @@ -802,15 +807,10 @@ static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq) drive->dma = 0; /* sg request */ - if (rq->bio || ((rq->cmd_type == REQ_TYPE_ATA_PC) && rq->data_len)) { + if (rq->bio) { struct request_queue *q = drive->queue; + char *buf = bio_data(rq->bio); unsigned int alignment; - char *buf; - - if (rq->bio) - buf = bio_data(rq->bio); - else - buf = rq->data; drive->dma = !!(drive->dev_flags & IDE_DFLAG_USING_DMA); diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 9b9e8b1..3245c2d 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -481,6 +481,9 @@ void do_ide_request(struct request_queue *q) spin_unlock_irq(q->queue_lock); + /* HLD do_request() callback might sleep, make sure it's okay */ + might_sleep(); + if (ide_lock_host(host, hwif)) goto plug_device_2; diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 8324dfa..9b762a2 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -850,6 +850,9 @@ out: cmd.rq = rq; + ide_init_sg_cmd(&cmd, pc->req_xfer); + ide_map_sg(drive, &cmd); + return ide_tape_issue_pc(drive, &cmd, pc); } diff --git a/include/linux/ide.h b/include/linux/ide.h index 9e67cca..1957461 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1183,7 +1183,7 @@ void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *); void ide_retry_pc(ide_drive_t *drive); void ide_prep_sense(ide_drive_t *drive, struct request *rq); -void ide_queue_sense_rq(ide_drive_t *drive, void *special); +int ide_queue_sense_rq(ide_drive_t *drive, void *special); int ide_cd_expiry(ide_drive_t *); -- cgit v0.10.2 From 765139ef5f1a4b1d5cb1f1a7a12de7ee61f6500f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 07:00:43 +0900 Subject: ide-pm: don't abuse rq->data Impact: cleanup rq->data usage ide-pm uses rq->data to carry pointer to struct request_pm_state through request queue and rq->special is used to carray pointer to local struct ide_cmd, which isn't necessary. Use rq->special for request_pm_state instead and use local ide_cmd in ide_start_power_step(). Signed-off-by: Tejun Heo Cc: Jens Axboe diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 3245c2d..6e3094e 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -368,7 +368,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) return execute_drive_cmd(drive, rq); else if (blk_pm_request(rq)) { - struct request_pm_state *pm = rq->data; + struct request_pm_state *pm = rq->special; #ifdef DEBUG_PM printk("%s: start_power_step(step: %d)\n", drive->name, pm->pm_step); diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c index 0d8a151..ba1488b 100644 --- a/drivers/ide/ide-pm.c +++ b/drivers/ide/ide-pm.c @@ -7,7 +7,6 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg) ide_hwif_t *hwif = drive->hwif; struct request *rq; struct request_pm_state rqpm; - struct ide_cmd cmd; int ret; /* call ACPI _GTM only once */ @@ -15,11 +14,9 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg) ide_acpi_get_timing(hwif); memset(&rqpm, 0, sizeof(rqpm)); - memset(&cmd, 0, sizeof(cmd)); rq = blk_get_request(drive->queue, READ, __GFP_WAIT); rq->cmd_type = REQ_TYPE_PM_SUSPEND; - rq->special = &cmd; - rq->data = &rqpm; + rq->special = &rqpm; rqpm.pm_step = IDE_PM_START_SUSPEND; if (mesg.event == PM_EVENT_PRETHAW) mesg.event = PM_EVENT_FREEZE; @@ -41,7 +38,6 @@ int generic_ide_resume(struct device *dev) ide_hwif_t *hwif = drive->hwif; struct request *rq; struct request_pm_state rqpm; - struct ide_cmd cmd; int err; /* call ACPI _PS0 / _STM only once */ @@ -53,12 +49,10 @@ int generic_ide_resume(struct device *dev) ide_acpi_exec_tfs(drive); memset(&rqpm, 0, sizeof(rqpm)); - memset(&cmd, 0, sizeof(cmd)); rq = blk_get_request(drive->queue, READ, __GFP_WAIT); rq->cmd_type = REQ_TYPE_PM_RESUME; rq->cmd_flags |= REQ_PREEMPT; - rq->special = &cmd; - rq->data = &rqpm; + rq->special = &rqpm; rqpm.pm_step = IDE_PM_START_RESUME; rqpm.pm_state = PM_EVENT_ON; @@ -77,7 +71,7 @@ int generic_ide_resume(struct device *dev) void ide_complete_power_step(ide_drive_t *drive, struct request *rq) { - struct request_pm_state *pm = rq->data; + struct request_pm_state *pm = rq->special; #ifdef DEBUG_PM printk(KERN_INFO "%s: complete_power_step(step: %d)\n", @@ -107,10 +101,8 @@ void ide_complete_power_step(ide_drive_t *drive, struct request *rq) ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq) { - struct request_pm_state *pm = rq->data; - struct ide_cmd *cmd = rq->special; - - memset(cmd, 0, sizeof(*cmd)); + struct request_pm_state *pm = rq->special; + struct ide_cmd cmd = { }; switch (pm->pm_step) { case IDE_PM_FLUSH_CACHE: /* Suspend step 1 (flush cache) */ @@ -123,12 +115,12 @@ ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq) return ide_stopped; } if (ata_id_flush_ext_enabled(drive->id)) - cmd->tf.command = ATA_CMD_FLUSH_EXT; + cmd.tf.command = ATA_CMD_FLUSH_EXT; else - cmd->tf.command = ATA_CMD_FLUSH; + cmd.tf.command = ATA_CMD_FLUSH; goto out_do_tf; case IDE_PM_STANDBY: /* Suspend step 2 (standby) */ - cmd->tf.command = ATA_CMD_STANDBYNOW1; + cmd.tf.command = ATA_CMD_STANDBYNOW1; goto out_do_tf; case IDE_PM_RESTORE_PIO: /* Resume step 1 (restore PIO) */ ide_set_max_pio(drive); @@ -141,7 +133,7 @@ ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq) ide_complete_power_step(drive, rq); return ide_stopped; case IDE_PM_IDLE: /* Resume step 2 (idle) */ - cmd->tf.command = ATA_CMD_IDLEIMMEDIATE; + cmd.tf.command = ATA_CMD_IDLEIMMEDIATE; goto out_do_tf; case IDE_PM_RESTORE_DMA: /* Resume step 3 (restore DMA) */ /* @@ -163,11 +155,11 @@ ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq) return ide_stopped; out_do_tf: - cmd->valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE; - cmd->valid.in.tf = IDE_VALID_IN_TF | IDE_VALID_DEVICE; - cmd->protocol = ATA_PROT_NODATA; + cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE; + cmd.valid.in.tf = IDE_VALID_IN_TF | IDE_VALID_DEVICE; + cmd.protocol = ATA_PROT_NODATA; - return do_rw_taskfile(drive, cmd); + return do_rw_taskfile(drive, &cmd); } /** @@ -181,7 +173,7 @@ out_do_tf: void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq) { struct request_queue *q = drive->queue; - struct request_pm_state *pm = rq->data; + struct request_pm_state *pm = rq->special; unsigned long flags; ide_complete_power_step(drive, rq); @@ -207,7 +199,7 @@ void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq) void ide_check_pm_state(ide_drive_t *drive, struct request *rq) { - struct request_pm_state *pm = rq->data; + struct request_pm_state *pm = rq->special; if (blk_pm_suspend_request(rq) && pm->pm_step == IDE_PM_START_SUSPEND) -- cgit v0.10.2 From 08f370f0a2fb223bf48d0bfa2a173be0393c19dc Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 08:46:02 +0900 Subject: ide-tape,floppy: fix failed command completion after request sense Impact: fix infinite retry loop After a command failed, ide-tape and floppy inserts REQUEST_SENSE in front of the failed command and according to the result, sets pc->retries, flags and errors. After REQUEST_SENSE is complete, the failed command is again at the front of the queue and if the verdict was to terminate the request, the issue functions tries to complete it directly by calling drive->pc_callback() and returning ide_stopped. However, drive->pc_callback() doesn't complete a request. It only prepares for completion of the request. As a result, this creates an infinite loop where the failed request is retried perpetually. Fix it by actually ending the request by calling ide_complete_rq(). Signed-off-by: Tejun Heo diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index d3302cc..d20704a 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -141,6 +141,7 @@ static ide_startstop_t ide_floppy_issue_pc(ide_drive_t *drive, drive->failed_pc = NULL; drive->pc_callback(drive, 0); + ide_complete_rq(drive, -EIO, blk_rq_bytes(drive->hwif->rq)); return ide_stopped; } diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 9b762a2..2b9a136 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -643,6 +643,7 @@ static ide_startstop_t ide_tape_issue_pc(ide_drive_t *drive, } drive->failed_pc = NULL; drive->pc_callback(drive, 0); + ide_complete_rq(drive, -EIO, blk_rq_bytes(drive->hwif->rq)); return ide_stopped; } debug_log(DBG_SENSE, "Retry #%d, cmd = %02X\n", pc->retries, pc->c[0]); -- cgit v0.10.2 From eb6a61bb9543aa54d62595e27206b3d3c0293bcc Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 08:46:02 +0900 Subject: ide-atapi,tape,floppy: allow ->pc_callback() to change rq->data_len Impact: allow residual count implementation in ->pc_callback() rq->data_len has two duties - carrying the number of input bytes on issue and carrying residual count back to the issuer on completion. ide-atapi completion callback ->pc_callback() is the right place to do this but currently ide-atapi depends on rq->data_len carrying the original request size after calling ->pc_callback() to complete the pc request. This patch makes ide_pc_intr(), ide_tape_issue_pc() and ide_floppy_issue_pc() cache length to complete before calling ->pc_callback() so that it can modify rq->data_len as necessary. Note: As using rq->data_len for two purposes can make cases like this incorrect in subtle ways, future changes will introduce separate field for residual count. Signed-off-by: Tejun Heo Cc: Jens Axboe diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index 5cefe12..3df5442 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -409,6 +409,16 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) if ((pc->flags & PC_FLAG_WAIT_FOR_DSC) && (stat & ATA_DSC) == 0) dsc = 1; + /* + * ->pc_callback() might change rq->data_len for + * residual count, cache total length. + */ + if (!blk_special_request(rq) && + (drive->media == ide_tape && !rq->bio)) + done = ide_rq_bytes(rq); /* FIXME */ + else + done = blk_rq_bytes(rq); + /* Command finished - Call the callback function */ uptodate = drive->pc_callback(drive, dsc); @@ -417,7 +427,6 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) if (blk_special_request(rq)) { rq->errors = 0; - done = blk_rq_bytes(rq); error = 0; } else { @@ -426,11 +435,6 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) rq->errors = -EIO; } - if (drive->media == ide_tape && !rq->bio) - done = ide_rq_bytes(rq); /* FIXME */ - else - done = blk_rq_bytes(rq); - error = uptodate ? 0 : -EIO; } diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index d20704a..537b7c5 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -134,14 +134,17 @@ static ide_startstop_t ide_floppy_issue_pc(ide_drive_t *drive, drive->pc = pc; if (pc->retries > IDEFLOPPY_MAX_PC_RETRIES) { + unsigned int done = blk_rq_bytes(drive->hwif->rq); + if (!(pc->flags & PC_FLAG_SUPPRESS_ERROR)) ide_floppy_report_error(floppy, pc); + /* Giving up */ pc->error = IDE_DRV_ERROR_GENERAL; drive->failed_pc = NULL; drive->pc_callback(drive, 0); - ide_complete_rq(drive, -EIO, blk_rq_bytes(drive->hwif->rq)); + ide_complete_rq(drive, -EIO, done); return ide_stopped; } diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 2b9a136..8226d52 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -622,6 +622,8 @@ static ide_startstop_t ide_tape_issue_pc(ide_drive_t *drive, if (pc->retries > IDETAPE_MAX_PC_RETRIES || (pc->flags & PC_FLAG_ABORT)) { + unsigned int done = blk_rq_bytes(drive->hwif->rq); + /* * We will "abort" retrying a packet command in case legitimate * error code was received (crossing a filemark, or end of the @@ -641,9 +643,10 @@ static ide_startstop_t ide_tape_issue_pc(ide_drive_t *drive, /* Giving up */ pc->error = IDE_DRV_ERROR_GENERAL; } + drive->failed_pc = NULL; drive->pc_callback(drive, 0); - ide_complete_rq(drive, -EIO, blk_rq_bytes(drive->hwif->rq)); + ide_complete_rq(drive, -EIO, done); return ide_stopped; } debug_log(DBG_SENSE, "Retry #%d, cmd = %02X\n", pc->retries, pc->c[0]); -- cgit v0.10.2 From 7b13354eeaabaf6283b8c669a7d67d104ce7c638 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 08:46:02 +0900 Subject: ide-tape: use single continuous buffer Impact: simpler buffer allocation and handling, kills OOM, fix DMA transfers ide-tape has its own multiple buffer mechanism using struct idetape_bh. It allocates buffer with decreasing order-of-two allocations so that it results in minimum number of segments. However, the implementation is quite complex and works in a way that no other block or ide driver works necessitating a lot of special case handling. The benefit this complex allocation scheme brings is questionable as PIO or DMA the number of segments (16 maximum) doesn't make any noticeable difference and it also doesn't negate the need for multiple order allocation which can fail under memory pressure or high fragmentation although it does lower the highest order necessary by one when the buffer size isn't power of two. As the first step to remove the custom buffer management, this patch makes ide-tape allocate single continous buffer. The maximum order is four. I doubt the change would cause any trouble but if it ever matters, it should be converted to regular sg mechanism like everyone else and even in that case dropping custom buffer handling and moving to standard mechanism first make sense as an intermediate step. This patch makes the first bh to contain the whole buffer and drops multi bh handling code. Following patches will make further changes. This patch has the side effect of killing OOM triggered by allocation path and fixing DMA transfers. Previously, bug in alloc path triggered OOM on command issue and commands were passed to DMA engine without DMA-mapping all the segments. Signed-off-by: Tejun Heo diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 8226d52..b2afbc7 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -134,7 +134,6 @@ enum { struct idetape_bh { u32 b_size; atomic_t b_count; - struct idetape_bh *b_reqnext; char *b_data; }; @@ -228,10 +227,6 @@ typedef struct ide_tape_obj { char *b_data; int b_count; - int pages_per_buffer; - /* Wasted space in each stage */ - int excess_bh_size; - /* Measures average tape speed */ unsigned long avg_time; int avg_size; @@ -303,9 +298,7 @@ static int idetape_input_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, struct idetape_bh *bh = pc->bh; int count; - while (bcount) { - if (bh == NULL) - break; + if (bcount && bh) { count = min( (unsigned int)(bh->b_size - atomic_read(&bh->b_count)), bcount); @@ -313,15 +306,10 @@ static int idetape_input_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, atomic_read(&bh->b_count), count); bcount -= count; atomic_add(count, &bh->b_count); - if (atomic_read(&bh->b_count) == bh->b_size) { - bh = bh->b_reqnext; - if (bh) - atomic_set(&bh->b_count, 0); - } + if (atomic_read(&bh->b_count) == bh->b_size) + pc->bh = NULL; } - pc->bh = bh; - return bcount; } @@ -331,22 +319,14 @@ static int idetape_output_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, struct idetape_bh *bh = pc->bh; int count; - while (bcount) { - if (bh == NULL) - break; + if (bcount && bh) { count = min((unsigned int)pc->b_count, (unsigned int)bcount); drive->hwif->tp_ops->output_data(drive, NULL, pc->b_data, count); bcount -= count; pc->b_data += count; pc->b_count -= count; - if (!pc->b_count) { - bh = bh->b_reqnext; - pc->bh = bh; - if (bh) { - pc->b_data = bh->b_data; - pc->b_count = atomic_read(&bh->b_count); - } - } + if (!pc->b_count) + pc->bh = NULL; } return bcount; @@ -355,24 +335,20 @@ static int idetape_output_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, static void idetape_update_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc) { struct idetape_bh *bh = pc->bh; - int count; unsigned int bcount = pc->xferred; if (pc->flags & PC_FLAG_WRITING) return; - while (bcount) { - if (bh == NULL) { + if (bcount) { + if (bh == NULL || bcount > bh->b_size) { printk(KERN_ERR "ide-tape: bh == NULL in %s\n", __func__); return; } - count = min((unsigned int)bh->b_size, (unsigned int)bcount); - atomic_set(&bh->b_count, count); + atomic_set(&bh->b_count, bcount); if (atomic_read(&bh->b_count) == bh->b_size) - bh = bh->b_reqnext; - bcount -= count; + pc->bh = NULL; } - pc->bh = bh; } /* @@ -439,24 +415,10 @@ static void idetape_analyze_error(ide_drive_t *drive, u8 *sense) /* Free data buffers completely. */ static void ide_tape_kfree_buffer(idetape_tape_t *tape) { - struct idetape_bh *prev_bh, *bh = tape->merge_bh; - - while (bh) { - u32 size = bh->b_size; - - while (size) { - unsigned int order = fls(size >> PAGE_SHIFT)-1; - - if (bh->b_data) - free_pages((unsigned long)bh->b_data, order); + struct idetape_bh *bh = tape->merge_bh; - size &= (order-1); - bh->b_data += (1 << order) * PAGE_SIZE; - } - prev_bh = bh; - bh = bh->b_reqnext; - kfree(prev_bh); - } + kfree(bh->b_data); + kfree(bh); } static void ide_tape_handle_dsc(ide_drive_t *); @@ -861,117 +823,50 @@ out: } /* - * The function below uses __get_free_pages to allocate a data buffer of size - * tape->buffer_size (or a bit more). We attempt to combine sequential pages as - * much as possible. - * - * It returns a pointer to the newly allocated buffer, or NULL in case of - * failure. + * It returns a pointer to the newly allocated buffer, or NULL in case + * of failure. */ static struct idetape_bh *ide_tape_kmalloc_buffer(idetape_tape_t *tape, - int full, int clear) -{ - struct idetape_bh *prev_bh, *bh, *merge_bh; - int pages = tape->pages_per_buffer; - unsigned int order, b_allocd; - char *b_data = NULL; - - merge_bh = kmalloc(sizeof(struct idetape_bh), GFP_KERNEL); - bh = merge_bh; - if (bh == NULL) - goto abort; - - order = fls(pages) - 1; - bh->b_data = (char *) __get_free_pages(GFP_KERNEL, order); - if (!bh->b_data) - goto abort; - b_allocd = (1 << order) * PAGE_SIZE; - pages &= (order-1); - - if (clear) - memset(bh->b_data, 0, b_allocd); - bh->b_reqnext = NULL; - bh->b_size = b_allocd; - atomic_set(&bh->b_count, full ? bh->b_size : 0); + int full) +{ + struct idetape_bh *bh; - while (pages) { - order = fls(pages) - 1; - b_data = (char *) __get_free_pages(GFP_KERNEL, order); - if (!b_data) - goto abort; - b_allocd = (1 << order) * PAGE_SIZE; - - if (clear) - memset(b_data, 0, b_allocd); - - /* newly allocated page frames below buffer header or ...*/ - if (bh->b_data == b_data + b_allocd) { - bh->b_size += b_allocd; - bh->b_data -= b_allocd; - if (full) - atomic_add(b_allocd, &bh->b_count); - continue; - } - /* they are above the header */ - if (b_data == bh->b_data + bh->b_size) { - bh->b_size += b_allocd; - if (full) - atomic_add(b_allocd, &bh->b_count); - continue; - } - prev_bh = bh; - bh = kmalloc(sizeof(struct idetape_bh), GFP_KERNEL); - if (!bh) { - free_pages((unsigned long) b_data, order); - goto abort; - } - bh->b_reqnext = NULL; - bh->b_data = b_data; - bh->b_size = b_allocd; - atomic_set(&bh->b_count, full ? bh->b_size : 0); - prev_bh->b_reqnext = bh; + bh = kmalloc(sizeof(struct idetape_bh), GFP_KERNEL); + if (!bh) + return NULL; - pages &= (order-1); + bh->b_data = kmalloc(tape->buffer_size, GFP_KERNEL); + if (!bh->b_data) { + kfree(bh); + return NULL; } - bh->b_size -= tape->excess_bh_size; - if (full) - atomic_sub(tape->excess_bh_size, &bh->b_count); - return merge_bh; -abort: - ide_tape_kfree_buffer(tape); - return NULL; + bh->b_size = tape->buffer_size; + atomic_set(&bh->b_count, full ? bh->b_size : 0); + + return bh; } static int idetape_copy_stage_from_user(idetape_tape_t *tape, const char __user *buf, int n) { struct idetape_bh *bh = tape->bh; - int count; int ret = 0; - while (n) { - if (bh == NULL) { + if (n) { + if (bh == NULL || n > bh->b_size - atomic_read(&bh->b_count)) { printk(KERN_ERR "ide-tape: bh == NULL in %s\n", __func__); return 1; } - count = min((unsigned int) - (bh->b_size - atomic_read(&bh->b_count)), - (unsigned int)n); if (copy_from_user(bh->b_data + atomic_read(&bh->b_count), buf, - count)) + n)) ret = 1; - n -= count; - atomic_add(count, &bh->b_count); - buf += count; - if (atomic_read(&bh->b_count) == bh->b_size) { - bh = bh->b_reqnext; - if (bh) - atomic_set(&bh->b_count, 0); - } + atomic_add(n, &bh->b_count); + if (atomic_read(&bh->b_count) == bh->b_size) + tape->bh = NULL; } - tape->bh = bh; + return ret; } @@ -979,30 +874,20 @@ static int idetape_copy_stage_to_user(idetape_tape_t *tape, char __user *buf, int n) { struct idetape_bh *bh = tape->bh; - int count; int ret = 0; - while (n) { - if (bh == NULL) { + if (n) { + if (bh == NULL || n > tape->b_count) { printk(KERN_ERR "ide-tape: bh == NULL in %s\n", __func__); return 1; } - count = min(tape->b_count, n); - if (copy_to_user(buf, tape->b_data, count)) + if (copy_to_user(buf, tape->b_data, n)) ret = 1; - n -= count; - tape->b_data += count; - tape->b_count -= count; - buf += count; - if (!tape->b_count) { - bh = bh->b_reqnext; - tape->bh = bh; - if (bh) { - tape->b_data = bh->b_data; - tape->b_count = atomic_read(&bh->b_count); - } - } + tape->b_data += n; + tape->b_count -= n; + if (!tape->b_count) + tape->bh = NULL; } return ret; } @@ -1254,7 +1139,7 @@ static int idetape_add_chrdev_write_request(ide_drive_t *drive, int blocks) static void ide_tape_flush_merge_buffer(ide_drive_t *drive) { idetape_tape_t *tape = drive->driver_data; - int blocks, min; + int blocks; struct idetape_bh *bh; if (tape->chrdev_dir != IDETAPE_DIR_WRITE) { @@ -1269,31 +1154,16 @@ static void ide_tape_flush_merge_buffer(ide_drive_t *drive) if (tape->merge_bh_size) { blocks = tape->merge_bh_size / tape->blk_size; if (tape->merge_bh_size % tape->blk_size) { - unsigned int i; - + unsigned int i = tape->blk_size - + tape->merge_bh_size % tape->blk_size; blocks++; - i = tape->blk_size - tape->merge_bh_size % - tape->blk_size; - bh = tape->bh->b_reqnext; - while (bh) { - atomic_set(&bh->b_count, 0); - bh = bh->b_reqnext; - } bh = tape->bh; - while (i) { - if (bh == NULL) { - printk(KERN_INFO "ide-tape: bug," - " bh NULL\n"); - break; - } - min = min(i, (unsigned int)(bh->b_size - - atomic_read(&bh->b_count))); + if (bh) { memset(bh->b_data + atomic_read(&bh->b_count), - 0, min); - atomic_add(min, &bh->b_count); - i -= min; - bh = bh->b_reqnext; - } + 0, i); + atomic_add(i, &bh->b_count); + } else + printk(KERN_INFO "ide-tape: bug, bh NULL\n"); } (void) idetape_add_chrdev_write_request(drive, blocks); tape->merge_bh_size = 0; @@ -1321,7 +1191,7 @@ static int idetape_init_read(ide_drive_t *drive) " 0 now\n"); tape->merge_bh_size = 0; } - tape->merge_bh = ide_tape_kmalloc_buffer(tape, 0, 0); + tape->merge_bh = ide_tape_kmalloc_buffer(tape, 0); if (!tape->merge_bh) return -ENOMEM; tape->chrdev_dir = IDETAPE_DIR_READ; @@ -1368,23 +1238,18 @@ static int idetape_add_chrdev_read_request(ide_drive_t *drive, int blocks) static void idetape_pad_zeros(ide_drive_t *drive, int bcount) { idetape_tape_t *tape = drive->driver_data; - struct idetape_bh *bh; + struct idetape_bh *bh = tape->merge_bh; int blocks; while (bcount) { unsigned int count; - bh = tape->merge_bh; count = min(tape->buffer_size, bcount); bcount -= count; blocks = count / tape->blk_size; - while (count) { - atomic_set(&bh->b_count, - min(count, (unsigned int)bh->b_size)); - memset(bh->b_data, 0, atomic_read(&bh->b_count)); - count -= atomic_read(&bh->b_count); - bh = bh->b_reqnext; - } + atomic_set(&bh->b_count, count); + memset(bh->b_data, 0, atomic_read(&bh->b_count)); + idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, blocks, tape->merge_bh); } @@ -1596,7 +1461,7 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf, "should be 0 now\n"); tape->merge_bh_size = 0; } - tape->merge_bh = ide_tape_kmalloc_buffer(tape, 0, 0); + tape->merge_bh = ide_tape_kmalloc_buffer(tape, 0); if (!tape->merge_bh) return -ENOMEM; tape->chrdev_dir = IDETAPE_DIR_WRITE; @@ -1970,7 +1835,7 @@ static void idetape_write_release(ide_drive_t *drive, unsigned int minor) idetape_tape_t *tape = drive->driver_data; ide_tape_flush_merge_buffer(drive); - tape->merge_bh = ide_tape_kmalloc_buffer(tape, 1, 0); + tape->merge_bh = ide_tape_kmalloc_buffer(tape, 1); if (tape->merge_bh != NULL) { idetape_pad_zeros(drive, tape->blk_size * (tape->user_bs_factor - 1)); @@ -2201,11 +2066,6 @@ static void idetape_setup(ide_drive_t *drive, idetape_tape_t *tape, int minor) tape->buffer_size = *ctl * tape->blk_size; } buffer_size = tape->buffer_size; - tape->pages_per_buffer = buffer_size / PAGE_SIZE; - if (buffer_size % PAGE_SIZE) { - tape->pages_per_buffer++; - tape->excess_bh_size = PAGE_SIZE - buffer_size % PAGE_SIZE; - } /* select the "best" DSC read/write polling freq */ speed = max(*(u16 *)&tape->caps[14], *(u16 *)&tape->caps[8]); -- cgit v0.10.2 From e998f30b45efb99a3c3ce7b5483f76317a17abed Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 08:46:02 +0900 Subject: ide-tape: use standard data transfer mechanism Impact: use standard way to transfer data ide-tape uses rq in an interesting way. For r/w requests, rq->special is used to carry a private buffer management structure idetape_bh and rq->nr_sectors and current_nr_sectors are initialized to the number of idetape blocks which isn't necessary 512 bytes. Also, rq->current_nr_sectors is used to report back the residual count in units of idetape blocks. This peculiarity taxes both block layer and ide. ide-atapi has different paths and hooks to accomodate it and what a rq means becomes quite confusing and making changes at the block layer becomes quite difficult and error-prone. This patch makes ide-tape use bio instead. With the previous patch, ide-tape currently is using single contiguos buffer so replacing it isn't difficult. Data buffer is mapped into bio using blk_rq_map_kern() in idetape_queue_rw_tail(). idetape_io_buffers() and idetape_update_buffers() are dropped and pc->bh is set to null to tell ide-atapi to use standard data transfer mechanism and idetape_bh byte counts are updated by the issuer on completion using the residual count. This change also nicely removes the FIXME in ide_pc_intr() where ide-tape rqs need to be completed using ide_rq_bytes() instead of blk_rq_bytes() (although this didn't really matter as the request didn't have bio). Signed-off-by: Tejun Heo Cc: Jens Axboe diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index 3df5442..b9dd450 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -413,11 +413,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) * ->pc_callback() might change rq->data_len for * residual count, cache total length. */ - if (!blk_special_request(rq) && - (drive->media == ide_tape && !rq->bio)) - done = ide_rq_bytes(rq); /* FIXME */ - else - done = blk_rq_bytes(rq); + done = blk_rq_bytes(rq); /* Command finished - Call the callback function */ uptodate = drive->pc_callback(drive, dsc); diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index b2afbc7..b373ac8 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -292,65 +292,6 @@ static struct ide_tape_obj *ide_tape_chrdev_get(unsigned int i) return tape; } -static int idetape_input_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, - unsigned int bcount) -{ - struct idetape_bh *bh = pc->bh; - int count; - - if (bcount && bh) { - count = min( - (unsigned int)(bh->b_size - atomic_read(&bh->b_count)), - bcount); - drive->hwif->tp_ops->input_data(drive, NULL, bh->b_data + - atomic_read(&bh->b_count), count); - bcount -= count; - atomic_add(count, &bh->b_count); - if (atomic_read(&bh->b_count) == bh->b_size) - pc->bh = NULL; - } - - return bcount; -} - -static int idetape_output_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, - unsigned int bcount) -{ - struct idetape_bh *bh = pc->bh; - int count; - - if (bcount && bh) { - count = min((unsigned int)pc->b_count, (unsigned int)bcount); - drive->hwif->tp_ops->output_data(drive, NULL, pc->b_data, count); - bcount -= count; - pc->b_data += count; - pc->b_count -= count; - if (!pc->b_count) - pc->bh = NULL; - } - - return bcount; -} - -static void idetape_update_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc) -{ - struct idetape_bh *bh = pc->bh; - unsigned int bcount = pc->xferred; - - if (pc->flags & PC_FLAG_WRITING) - return; - if (bcount) { - if (bh == NULL || bcount > bh->b_size) { - printk(KERN_ERR "ide-tape: bh == NULL in %s\n", - __func__); - return; - } - atomic_set(&bh->b_count, bcount); - if (atomic_read(&bh->b_count) == bh->b_size) - pc->bh = NULL; - } -} - /* * called on each failed packet command retry to analyze the request sense. We * currently do not utilize this information. @@ -368,12 +309,10 @@ static void idetape_analyze_error(ide_drive_t *drive, u8 *sense) pc->c[0], tape->sense_key, tape->asc, tape->ascq); /* Correct pc->xferred by asking the tape. */ - if (pc->flags & PC_FLAG_DMA_ERROR) { + if (pc->flags & PC_FLAG_DMA_ERROR) pc->xferred = pc->req_xfer - tape->blk_size * get_unaligned_be32(&sense[3]); - idetape_update_buffers(drive, pc); - } /* * If error was the result of a zero-length read or write command, @@ -458,7 +397,7 @@ static int ide_tape_callback(ide_drive_t *drive, int dsc) } tape->first_frame += blocks; - rq->current_nr_sectors -= blocks; + rq->data_len -= blocks * tape->blk_size; if (pc->error) { uptodate = 0; @@ -520,19 +459,6 @@ static void ide_tape_handle_dsc(ide_drive_t *drive) idetape_postpone_request(drive); } -static int ide_tape_io_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, - unsigned int bcount, int write) -{ - unsigned int bleft; - - if (write) - bleft = idetape_output_buffers(drive, pc, bcount); - else - bleft = idetape_input_buffers(drive, pc, bcount); - - return bcount - bleft; -} - /* * Packet Command Interface * @@ -683,7 +609,7 @@ static void ide_tape_create_rw_cmd(idetape_tape_t *tape, ide_init_pc(pc); put_unaligned(cpu_to_be32(length), (unsigned int *) &pc->c[1]); pc->c[1] = 1; - pc->bh = bh; + pc->bh = NULL; pc->buf = NULL; pc->buf_size = length * tape->blk_size; pc->req_xfer = pc->buf_size; @@ -1063,10 +989,12 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks, struct idetape_bh *bh) { idetape_tape_t *tape = drive->driver_data; + size_t size = blocks * tape->blk_size; struct request *rq; - int ret, errors; + int ret; debug_log(DBG_SENSE, "%s: cmd=%d\n", __func__, cmd); + BUG_ON(cmd != REQ_IDETAPE_READ && cmd != REQ_IDETAPE_WRITE); rq = blk_get_request(drive->queue, READ, __GFP_WAIT); rq->cmd_type = REQ_TYPE_SPECIAL; @@ -1074,21 +1002,29 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks, rq->rq_disk = tape->disk; rq->special = (void *)bh; rq->sector = tape->first_frame; - rq->nr_sectors = blocks; - rq->current_nr_sectors = blocks; + + if (size) { + ret = blk_rq_map_kern(drive->queue, rq, bh->b_data, size, + __GFP_WAIT); + if (ret) + goto out_put; + } + blk_execute_rq(drive->queue, tape->disk, rq, 0); - errors = rq->errors; - ret = tape->blk_size * (blocks - rq->current_nr_sectors); - blk_put_request(rq); + /* calculate the number of transferred bytes and update bh */ + size -= rq->data_len; + if (cmd == REQ_IDETAPE_READ) + atomic_add(size, &bh->b_count); - if ((cmd & (REQ_IDETAPE_READ | REQ_IDETAPE_WRITE)) == 0) - return 0; + ret = size; + if (rq->errors == IDE_DRV_ERROR_GENERAL) + ret = -EIO; if (tape->merge_bh) idetape_init_merge_buffer(tape); - if (errors == IDE_DRV_ERROR_GENERAL) - return -EIO; +out_put: + blk_put_request(rq); return ret; } @@ -2034,8 +1970,6 @@ static void idetape_setup(ide_drive_t *drive, idetape_tape_t *tape, int minor) u16 *ctl = (u16 *)&tape->caps[12]; drive->pc_callback = ide_tape_callback; - drive->pc_update_buffers = idetape_update_buffers; - drive->pc_io_buffers = ide_tape_io_buffers; drive->dev_flags |= IDE_DFLAG_DSC_OVERLAP; -- cgit v0.10.2 From 6cf3d545f7d71b183e5b89960d4cc850a42c410d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 08:46:02 +0900 Subject: ide-tape: kill idetape_bh Impact: kill now unnecessary idetape_bh With everything using standard mechanisms, there is no need for idetape_bh anymore. Kill it and use tape->buf, cur and valid to describe data buffer instead. Changes worth mentioning are... * idetape_queue_rq_tail() now always queue tape->buf and and adjusts buffer state properly before completion. * idetape_pad_zeros() clears the buffer only once. Signed-off-by: Tejun Heo diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index b373ac8..d2e9c09 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -131,12 +131,6 @@ enum { IDETAPE_DIR_WRITE = (1 << 2), }; -struct idetape_bh { - u32 b_size; - atomic_t b_count; - char *b_data; -}; - /* Tape door status */ #define DOOR_UNLOCKED 0 #define DOOR_LOCKED 1 @@ -218,14 +212,12 @@ typedef struct ide_tape_obj { /* Data buffer size chosen based on the tape's recommendation */ int buffer_size; - /* merge buffer */ - struct idetape_bh *merge_bh; - /* size of the merge buffer */ - int merge_bh_size; - /* pointer to current buffer head within the merge buffer */ - struct idetape_bh *bh; - char *b_data; - int b_count; + /* Staging buffer of buffer_size bytes */ + void *buf; + /* The read/write cursor */ + void *cur; + /* The number of valid bytes in buf */ + size_t valid; /* Measures average tape speed */ unsigned long avg_time; @@ -351,15 +343,6 @@ static void idetape_analyze_error(ide_drive_t *drive, u8 *sense) } } -/* Free data buffers completely. */ -static void ide_tape_kfree_buffer(idetape_tape_t *tape) -{ - struct idetape_bh *bh = tape->merge_bh; - - kfree(bh->b_data); - kfree(bh); -} - static void ide_tape_handle_dsc(ide_drive_t *); static int ide_tape_callback(ide_drive_t *drive, int dsc) @@ -603,8 +586,7 @@ static void ide_tape_create_rw_cmd(idetape_tape_t *tape, struct ide_atapi_pc *pc, struct request *rq, u8 opcode) { - struct idetape_bh *bh = (struct idetape_bh *)rq->special; - unsigned int length = rq->current_nr_sectors; + unsigned int length = rq->nr_sectors; ide_init_pc(pc); put_unaligned(cpu_to_be32(length), (unsigned int *) &pc->c[1]); @@ -616,14 +598,11 @@ static void ide_tape_create_rw_cmd(idetape_tape_t *tape, if (pc->req_xfer == tape->buffer_size) pc->flags |= PC_FLAG_DMA_OK; - if (opcode == READ_6) { + if (opcode == READ_6) pc->c[0] = READ_6; - atomic_set(&bh->b_count, 0); - } else if (opcode == WRITE_6) { + else if (opcode == WRITE_6) { pc->c[0] = WRITE_6; pc->flags |= PC_FLAG_WRITING; - pc->b_data = bh->b_data; - pc->b_count = atomic_read(&bh->b_count); } memcpy(rq->cmd, pc->c, 12); @@ -639,10 +618,8 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, struct ide_cmd cmd; u8 stat; - debug_log(DBG_SENSE, "sector: %llu, nr_sectors: %lu," - " current_nr_sectors: %u\n", - (unsigned long long)rq->sector, rq->nr_sectors, - rq->current_nr_sectors); + debug_log(DBG_SENSE, "sector: %llu, nr_sectors: %lu\n" + (unsigned long long)rq->sector, rq->nr_sectors); if (!(blk_special_request(rq) || blk_sense_request(rq))) { /* We do not support buffer cache originated requests. */ @@ -749,89 +726,6 @@ out: } /* - * It returns a pointer to the newly allocated buffer, or NULL in case - * of failure. - */ -static struct idetape_bh *ide_tape_kmalloc_buffer(idetape_tape_t *tape, - int full) -{ - struct idetape_bh *bh; - - bh = kmalloc(sizeof(struct idetape_bh), GFP_KERNEL); - if (!bh) - return NULL; - - bh->b_data = kmalloc(tape->buffer_size, GFP_KERNEL); - if (!bh->b_data) { - kfree(bh); - return NULL; - } - - bh->b_size = tape->buffer_size; - atomic_set(&bh->b_count, full ? bh->b_size : 0); - - return bh; -} - -static int idetape_copy_stage_from_user(idetape_tape_t *tape, - const char __user *buf, int n) -{ - struct idetape_bh *bh = tape->bh; - int ret = 0; - - if (n) { - if (bh == NULL || n > bh->b_size - atomic_read(&bh->b_count)) { - printk(KERN_ERR "ide-tape: bh == NULL in %s\n", - __func__); - return 1; - } - if (copy_from_user(bh->b_data + atomic_read(&bh->b_count), buf, - n)) - ret = 1; - atomic_add(n, &bh->b_count); - if (atomic_read(&bh->b_count) == bh->b_size) - tape->bh = NULL; - } - - return ret; -} - -static int idetape_copy_stage_to_user(idetape_tape_t *tape, char __user *buf, - int n) -{ - struct idetape_bh *bh = tape->bh; - int ret = 0; - - if (n) { - if (bh == NULL || n > tape->b_count) { - printk(KERN_ERR "ide-tape: bh == NULL in %s\n", - __func__); - return 1; - } - if (copy_to_user(buf, tape->b_data, n)) - ret = 1; - tape->b_data += n; - tape->b_count -= n; - if (!tape->b_count) - tape->bh = NULL; - } - return ret; -} - -static void idetape_init_merge_buffer(idetape_tape_t *tape) -{ - struct idetape_bh *bh = tape->merge_bh; - tape->bh = tape->merge_bh; - - if (tape->chrdev_dir == IDETAPE_DIR_WRITE) - atomic_set(&bh->b_count, 0); - else { - tape->b_data = bh->b_data; - tape->b_count = atomic_read(&bh->b_count); - } -} - -/* * Write a filemark if write_filemark=1. Flush the device buffers without * writing a filemark otherwise. */ @@ -928,10 +822,10 @@ static void __ide_tape_discard_merge_buffer(ide_drive_t *drive) return; clear_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags); - tape->merge_bh_size = 0; - if (tape->merge_bh != NULL) { - ide_tape_kfree_buffer(tape); - tape->merge_bh = NULL; + tape->valid = 0; + if (tape->buf != NULL) { + kfree(tape->buf); + tape->buf = NULL; } tape->chrdev_dir = IDETAPE_DIR_NONE; @@ -985,8 +879,7 @@ static void ide_tape_discard_merge_buffer(ide_drive_t *drive, * Generate a read/write request for the block device interface and wait for it * to be serviced. */ -static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks, - struct idetape_bh *bh) +static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks) { idetape_tape_t *tape = drive->driver_data; size_t size = blocks * tape->blk_size; @@ -1000,11 +893,10 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks, rq->cmd_type = REQ_TYPE_SPECIAL; rq->cmd[13] = cmd; rq->rq_disk = tape->disk; - rq->special = (void *)bh; rq->sector = tape->first_frame; if (size) { - ret = blk_rq_map_kern(drive->queue, rq, bh->b_data, size, + ret = blk_rq_map_kern(drive->queue, rq, tape->buf, size, __GFP_WAIT); if (ret) goto out_put; @@ -1012,17 +904,17 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks, blk_execute_rq(drive->queue, tape->disk, rq, 0); - /* calculate the number of transferred bytes and update bh */ + /* calculate the number of transferred bytes and update buffer state */ size -= rq->data_len; + tape->cur = tape->buf; if (cmd == REQ_IDETAPE_READ) - atomic_add(size, &bh->b_count); + tape->valid = size; + else + tape->valid = 0; ret = size; if (rq->errors == IDE_DRV_ERROR_GENERAL) ret = -EIO; - - if (tape->merge_bh) - idetape_init_merge_buffer(tape); out_put: blk_put_request(rq); return ret; @@ -1064,49 +956,33 @@ static void idetape_create_space_cmd(struct ide_atapi_pc *pc, int count, u8 cmd) /* Queue up a character device originated write request. */ static int idetape_add_chrdev_write_request(ide_drive_t *drive, int blocks) { - idetape_tape_t *tape = drive->driver_data; - debug_log(DBG_CHRDEV, "Enter %s\n", __func__); - return idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, - blocks, tape->merge_bh); + return idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, blocks); } static void ide_tape_flush_merge_buffer(ide_drive_t *drive) { idetape_tape_t *tape = drive->driver_data; int blocks; - struct idetape_bh *bh; if (tape->chrdev_dir != IDETAPE_DIR_WRITE) { printk(KERN_ERR "ide-tape: bug: Trying to empty merge buffer" " but we are not writing.\n"); return; } - if (tape->merge_bh_size > tape->buffer_size) { - printk(KERN_ERR "ide-tape: bug: merge_buffer too big\n"); - tape->merge_bh_size = tape->buffer_size; - } - if (tape->merge_bh_size) { - blocks = tape->merge_bh_size / tape->blk_size; - if (tape->merge_bh_size % tape->blk_size) { - unsigned int i = tape->blk_size - - tape->merge_bh_size % tape->blk_size; + if (tape->buf) { + blocks = tape->valid / tape->blk_size; + if (tape->valid % tape->blk_size) { blocks++; - bh = tape->bh; - if (bh) { - memset(bh->b_data + atomic_read(&bh->b_count), - 0, i); - atomic_add(i, &bh->b_count); - } else - printk(KERN_INFO "ide-tape: bug, bh NULL\n"); + memset(tape->buf + tape->valid, 0, + tape->blk_size - tape->valid % tape->blk_size); } (void) idetape_add_chrdev_write_request(drive, blocks); - tape->merge_bh_size = 0; } - if (tape->merge_bh != NULL) { - ide_tape_kfree_buffer(tape); - tape->merge_bh = NULL; + if (tape->buf != NULL) { + kfree(tape->buf); + tape->buf = NULL; } tape->chrdev_dir = IDETAPE_DIR_NONE; } @@ -1122,15 +998,15 @@ static int idetape_init_read(ide_drive_t *drive) ide_tape_flush_merge_buffer(drive); idetape_flush_tape_buffers(drive); } - if (tape->merge_bh || tape->merge_bh_size) { - printk(KERN_ERR "ide-tape: merge_bh_size should be" - " 0 now\n"); - tape->merge_bh_size = 0; + if (tape->buf || tape->valid) { + printk(KERN_ERR "ide-tape: valid should be 0 now\n"); + tape->valid = 0; } - tape->merge_bh = ide_tape_kmalloc_buffer(tape, 0); - if (!tape->merge_bh) + tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL); + if (!tape->buf) return -ENOMEM; tape->chrdev_dir = IDETAPE_DIR_READ; + tape->cur = tape->buf; /* * Issue a read 0 command to ensure that DSC handshake is @@ -1140,11 +1016,10 @@ static int idetape_init_read(ide_drive_t *drive) */ if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) { bytes_read = idetape_queue_rw_tail(drive, - REQ_IDETAPE_READ, 0, - tape->merge_bh); + REQ_IDETAPE_READ, 0); if (bytes_read < 0) { - ide_tape_kfree_buffer(tape); - tape->merge_bh = NULL; + kfree(tape->buf); + tape->buf = NULL; tape->chrdev_dir = IDETAPE_DIR_NONE; return bytes_read; } @@ -1157,8 +1032,6 @@ static int idetape_init_read(ide_drive_t *drive) /* called from idetape_chrdev_read() to service a chrdev read request. */ static int idetape_add_chrdev_read_request(ide_drive_t *drive, int blocks) { - idetape_tape_t *tape = drive->driver_data; - debug_log(DBG_PROCS, "Enter %s, %d blocks\n", __func__, blocks); /* If we are at a filemark, return a read length of 0 */ @@ -1167,27 +1040,21 @@ static int idetape_add_chrdev_read_request(ide_drive_t *drive, int blocks) idetape_init_read(drive); - return idetape_queue_rw_tail(drive, REQ_IDETAPE_READ, blocks, - tape->merge_bh); + return idetape_queue_rw_tail(drive, REQ_IDETAPE_READ, blocks); } static void idetape_pad_zeros(ide_drive_t *drive, int bcount) { idetape_tape_t *tape = drive->driver_data; - struct idetape_bh *bh = tape->merge_bh; - int blocks; + + memset(tape->buf, 0, tape->buffer_size); while (bcount) { - unsigned int count; + unsigned int count = min(tape->buffer_size, bcount); - count = min(tape->buffer_size, bcount); + idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, + count / tape->blk_size); bcount -= count; - blocks = count / tape->blk_size; - atomic_set(&bh->b_count, count); - memset(bh->b_data, 0, atomic_read(&bh->b_count)); - - idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, blocks, - tape->merge_bh); } } @@ -1267,7 +1134,7 @@ static int idetape_space_over_filemarks(ide_drive_t *drive, short mt_op, } if (tape->chrdev_dir == IDETAPE_DIR_READ) { - tape->merge_bh_size = 0; + tape->valid = 0; if (test_and_clear_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) ++count; ide_tape_discard_merge_buffer(drive, 0); @@ -1333,20 +1200,20 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf, return rc; if (count == 0) return (0); - if (tape->merge_bh_size) { - actually_read = min((unsigned int)(tape->merge_bh_size), - (unsigned int)count); - if (idetape_copy_stage_to_user(tape, buf, actually_read)) + if (tape->valid) { + actually_read = min_t(unsigned int, tape->valid, count); + if (copy_to_user(buf, tape->cur, actually_read)) ret = -EFAULT; buf += actually_read; - tape->merge_bh_size -= actually_read; count -= actually_read; + tape->cur += actually_read; + tape->valid -= actually_read; } while (count >= tape->buffer_size) { bytes_read = idetape_add_chrdev_read_request(drive, ctl); if (bytes_read <= 0) goto finish; - if (idetape_copy_stage_to_user(tape, buf, bytes_read)) + if (copy_to_user(buf, tape->cur, bytes_read)) ret = -EFAULT; buf += bytes_read; count -= bytes_read; @@ -1357,10 +1224,11 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf, if (bytes_read <= 0) goto finish; temp = min((unsigned long)count, (unsigned long)bytes_read); - if (idetape_copy_stage_to_user(tape, buf, temp)) + if (copy_to_user(buf, tape->cur, temp)) ret = -EFAULT; actually_read += temp; - tape->merge_bh_size = bytes_read-temp; + tape->cur += temp; + tape->valid -= temp; } finish: if (!actually_read && test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) { @@ -1392,16 +1260,15 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf, if (tape->chrdev_dir != IDETAPE_DIR_WRITE) { if (tape->chrdev_dir == IDETAPE_DIR_READ) ide_tape_discard_merge_buffer(drive, 1); - if (tape->merge_bh || tape->merge_bh_size) { - printk(KERN_ERR "ide-tape: merge_bh_size " - "should be 0 now\n"); - tape->merge_bh_size = 0; + if (tape->buf || tape->valid) { + printk(KERN_ERR "ide-tape: valid should be 0 now\n"); + tape->valid = 0; } - tape->merge_bh = ide_tape_kmalloc_buffer(tape, 0); - if (!tape->merge_bh) + tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL); + if (!tape->buf) return -ENOMEM; tape->chrdev_dir = IDETAPE_DIR_WRITE; - idetape_init_merge_buffer(tape); + tape->cur = tape->buf; /* * Issue a write 0 command to ensure that DSC handshake is @@ -1411,11 +1278,10 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf, */ if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) { ssize_t retval = idetape_queue_rw_tail(drive, - REQ_IDETAPE_WRITE, 0, - tape->merge_bh); + REQ_IDETAPE_WRITE, 0); if (retval < 0) { - ide_tape_kfree_buffer(tape); - tape->merge_bh = NULL; + kfree(tape->buf); + tape->buf = NULL; tape->chrdev_dir = IDETAPE_DIR_NONE; return retval; } @@ -1423,23 +1289,19 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf, } if (count == 0) return (0); - if (tape->merge_bh_size) { - if (tape->merge_bh_size >= tape->buffer_size) { - printk(KERN_ERR "ide-tape: bug: merge buf too big\n"); - tape->merge_bh_size = 0; - } - actually_written = min((unsigned int) - (tape->buffer_size - tape->merge_bh_size), - (unsigned int)count); - if (idetape_copy_stage_from_user(tape, buf, actually_written)) - ret = -EFAULT; + if (tape->valid < tape->buffer_size) { + actually_written = min_t(unsigned int, + tape->buffer_size - tape->valid, + count); + if (copy_from_user(tape->cur, buf, actually_written)) + ret = -EFAULT; buf += actually_written; - tape->merge_bh_size += actually_written; count -= actually_written; + tape->cur += actually_written; + tape->valid += actually_written; - if (tape->merge_bh_size == tape->buffer_size) { + if (tape->valid == tape->buffer_size) { ssize_t retval; - tape->merge_bh_size = 0; retval = idetape_add_chrdev_write_request(drive, ctl); if (retval <= 0) return (retval); @@ -1447,7 +1309,7 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf, } while (count >= tape->buffer_size) { ssize_t retval; - if (idetape_copy_stage_from_user(tape, buf, tape->buffer_size)) + if (copy_from_user(tape->cur, buf, tape->buffer_size)) ret = -EFAULT; buf += tape->buffer_size; count -= tape->buffer_size; @@ -1458,9 +1320,10 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf, } if (count) { actually_written += count; - if (idetape_copy_stage_from_user(tape, buf, count)) + if (copy_from_user(tape->cur, buf, count)) ret = -EFAULT; - tape->merge_bh_size += count; + tape->cur += count; + tape->valid += count; } return ret ? ret : actually_written; } @@ -1623,7 +1486,7 @@ static int idetape_chrdev_ioctl(struct inode *inode, struct file *file, idetape_flush_tape_buffers(drive); } if (cmd == MTIOCGET || cmd == MTIOCPOS) { - block_offset = tape->merge_bh_size / + block_offset = tape->valid / (tape->blk_size * tape->user_bs_factor); position = idetape_read_position(drive); if (position < 0) @@ -1771,12 +1634,12 @@ static void idetape_write_release(ide_drive_t *drive, unsigned int minor) idetape_tape_t *tape = drive->driver_data; ide_tape_flush_merge_buffer(drive); - tape->merge_bh = ide_tape_kmalloc_buffer(tape, 1); - if (tape->merge_bh != NULL) { + tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL); + if (tape->buf != NULL) { idetape_pad_zeros(drive, tape->blk_size * (tape->user_bs_factor - 1)); - ide_tape_kfree_buffer(tape); - tape->merge_bh = NULL; + kfree(tape->buf); + tape->buf = NULL; } idetape_write_filemark(drive); idetape_flush_tape_buffers(drive); @@ -2042,7 +1905,7 @@ static void ide_tape_release(struct device *dev) ide_drive_t *drive = tape->drive; struct gendisk *g = tape->disk; - BUG_ON(tape->merge_bh_size); + BUG_ON(tape->valid); drive->dev_flags &= ~IDE_DFLAG_DSC_OVERLAP; drive->driver_data = NULL; -- cgit v0.10.2 From 3596b66452491a3cff26256a5e6e6061a66c4142 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 08:46:02 +0900 Subject: ide-tape: unify r/w init paths Impact: cleanup Read and write init paths are almost identical. Unify them into idetape_init_rw(). Signed-off-by: Tejun Heo diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index d2e9c09..4da7fa9 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -987,42 +987,50 @@ static void ide_tape_flush_merge_buffer(ide_drive_t *drive) tape->chrdev_dir = IDETAPE_DIR_NONE; } -static int idetape_init_read(ide_drive_t *drive) +static int idetape_init_rw(ide_drive_t *drive, int dir) { idetape_tape_t *tape = drive->driver_data; - int bytes_read; + int rc; - /* Initialize read operation */ - if (tape->chrdev_dir != IDETAPE_DIR_READ) { - if (tape->chrdev_dir == IDETAPE_DIR_WRITE) { - ide_tape_flush_merge_buffer(drive); - idetape_flush_tape_buffers(drive); - } - if (tape->buf || tape->valid) { - printk(KERN_ERR "ide-tape: valid should be 0 now\n"); - tape->valid = 0; - } - tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL); - if (!tape->buf) - return -ENOMEM; - tape->chrdev_dir = IDETAPE_DIR_READ; - tape->cur = tape->buf; + BUG_ON(dir != IDETAPE_DIR_READ && dir != IDETAPE_DIR_WRITE); - /* - * Issue a read 0 command to ensure that DSC handshake is - * switched from completion mode to buffer available mode. - * No point in issuing this if DSC overlap isn't supported, some - * drives (Seagate STT3401A) will return an error. - */ - if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) { - bytes_read = idetape_queue_rw_tail(drive, - REQ_IDETAPE_READ, 0); - if (bytes_read < 0) { - kfree(tape->buf); - tape->buf = NULL; - tape->chrdev_dir = IDETAPE_DIR_NONE; - return bytes_read; - } + if (tape->chrdev_dir == dir) + return 0; + + if (tape->chrdev_dir == IDETAPE_DIR_READ) + ide_tape_discard_merge_buffer(drive, 1); + else if (tape->chrdev_dir == IDETAPE_DIR_WRITE) { + ide_tape_flush_merge_buffer(drive); + idetape_flush_tape_buffers(drive); + } + + if (tape->buf || tape->valid) { + printk(KERN_ERR "ide-tape: valid should be 0 now\n"); + tape->valid = 0; + } + + tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL); + if (!tape->buf) + return -ENOMEM; + tape->chrdev_dir = dir; + tape->cur = tape->buf; + + /* + * Issue a 0 rw command to ensure that DSC handshake is + * switched from completion mode to buffer available mode. No + * point in issuing this if DSC overlap isn't supported, some + * drives (Seagate STT3401A) will return an error. + */ + if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) { + int cmd = dir == IDETAPE_DIR_READ ? REQ_IDETAPE_READ + : REQ_IDETAPE_WRITE; + + rc = idetape_queue_rw_tail(drive, cmd, 0); + if (rc < 0) { + kfree(tape->buf); + tape->buf = NULL; + tape->chrdev_dir = IDETAPE_DIR_NONE; + return rc; } } @@ -1038,7 +1046,7 @@ static int idetape_add_chrdev_read_request(ide_drive_t *drive, int blocks) if (test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) return 0; - idetape_init_read(drive); + idetape_init_rw(drive, IDETAPE_DIR_READ); return idetape_queue_rw_tail(drive, REQ_IDETAPE_READ, blocks); } @@ -1195,7 +1203,7 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf, (count % tape->blk_size) == 0) tape->user_bs_factor = count / tape->blk_size; } - rc = idetape_init_read(drive); + rc = idetape_init_rw(drive, IDETAPE_DIR_READ); if (rc < 0) return rc; if (count == 0) @@ -1249,6 +1257,7 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf, ssize_t actually_written = 0; ssize_t ret = 0; u16 ctl = *(u16 *)&tape->caps[12]; + int rc; /* The drive is write protected. */ if (tape->write_prot) @@ -1257,36 +1266,9 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf, debug_log(DBG_CHRDEV, "Enter %s, count %Zd\n", __func__, count); /* Initialize write operation */ - if (tape->chrdev_dir != IDETAPE_DIR_WRITE) { - if (tape->chrdev_dir == IDETAPE_DIR_READ) - ide_tape_discard_merge_buffer(drive, 1); - if (tape->buf || tape->valid) { - printk(KERN_ERR "ide-tape: valid should be 0 now\n"); - tape->valid = 0; - } - tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL); - if (!tape->buf) - return -ENOMEM; - tape->chrdev_dir = IDETAPE_DIR_WRITE; - tape->cur = tape->buf; - - /* - * Issue a write 0 command to ensure that DSC handshake is - * switched from completion mode to buffer available mode. No - * point in issuing this if DSC overlap isn't supported, some - * drives (Seagate STT3401A) will return an error. - */ - if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) { - ssize_t retval = idetape_queue_rw_tail(drive, - REQ_IDETAPE_WRITE, 0); - if (retval < 0) { - kfree(tape->buf); - tape->buf = NULL; - tape->chrdev_dir = IDETAPE_DIR_NONE; - return retval; - } - } - } + rc = idetape_init_rw(drive, IDETAPE_DIR_WRITE); + if (rc < 0) + return rc; if (count == 0) return (0); if (tape->valid < tape->buffer_size) { -- cgit v0.10.2 From 71294cf93d22ceaa75448cc6ebee2c65897be8c2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 08:46:03 +0900 Subject: ide-tape: use byte size instead of sectors on rw issue functions Impact: cleanup Byte size is what most issue functions deal with, make idetape_queue_rw_tail() and its wrappers take byte size instead of sector counts. idetape_chrdev_read() and write() functions are converted to use tape->buffer_size instead of ctl from tape->cap. This cleans up code a little bit and will ease the next r/w reimplementation. Signed-off-by: Tejun Heo diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 4da7fa9..d5e9bb2 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -879,15 +879,15 @@ static void ide_tape_discard_merge_buffer(ide_drive_t *drive, * Generate a read/write request for the block device interface and wait for it * to be serviced. */ -static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks) +static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size) { idetape_tape_t *tape = drive->driver_data; - size_t size = blocks * tape->blk_size; struct request *rq; int ret; debug_log(DBG_SENSE, "%s: cmd=%d\n", __func__, cmd); BUG_ON(cmd != REQ_IDETAPE_READ && cmd != REQ_IDETAPE_WRITE); + BUG_ON(size < 0 || size % tape->blk_size); rq = blk_get_request(drive->queue, READ, __GFP_WAIT); rq->cmd_type = REQ_TYPE_SPECIAL; @@ -954,17 +954,16 @@ static void idetape_create_space_cmd(struct ide_atapi_pc *pc, int count, u8 cmd) } /* Queue up a character device originated write request. */ -static int idetape_add_chrdev_write_request(ide_drive_t *drive, int blocks) +static int idetape_add_chrdev_write_request(ide_drive_t *drive, int size) { debug_log(DBG_CHRDEV, "Enter %s\n", __func__); - return idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, blocks); + return idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, size); } static void ide_tape_flush_merge_buffer(ide_drive_t *drive) { idetape_tape_t *tape = drive->driver_data; - int blocks; if (tape->chrdev_dir != IDETAPE_DIR_WRITE) { printk(KERN_ERR "ide-tape: bug: Trying to empty merge buffer" @@ -972,15 +971,10 @@ static void ide_tape_flush_merge_buffer(ide_drive_t *drive) return; } if (tape->buf) { - blocks = tape->valid / tape->blk_size; - if (tape->valid % tape->blk_size) { - blocks++; - memset(tape->buf + tape->valid, 0, - tape->blk_size - tape->valid % tape->blk_size); - } - (void) idetape_add_chrdev_write_request(drive, blocks); - } - if (tape->buf != NULL) { + size_t aligned = roundup(tape->valid, tape->blk_size); + + memset(tape->cur, 0, aligned - tape->valid); + idetape_add_chrdev_write_request(drive, aligned); kfree(tape->buf); tape->buf = NULL; } @@ -1038,9 +1032,9 @@ static int idetape_init_rw(ide_drive_t *drive, int dir) } /* called from idetape_chrdev_read() to service a chrdev read request. */ -static int idetape_add_chrdev_read_request(ide_drive_t *drive, int blocks) +static int idetape_add_chrdev_read_request(ide_drive_t *drive, int size) { - debug_log(DBG_PROCS, "Enter %s, %d blocks\n", __func__, blocks); + debug_log(DBG_PROCS, "Enter %s, %d bytes\n", __func__, size); /* If we are at a filemark, return a read length of 0 */ if (test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) @@ -1048,7 +1042,7 @@ static int idetape_add_chrdev_read_request(ide_drive_t *drive, int blocks) idetape_init_rw(drive, IDETAPE_DIR_READ); - return idetape_queue_rw_tail(drive, REQ_IDETAPE_READ, blocks); + return idetape_queue_rw_tail(drive, REQ_IDETAPE_READ, size); } static void idetape_pad_zeros(ide_drive_t *drive, int bcount) @@ -1060,8 +1054,7 @@ static void idetape_pad_zeros(ide_drive_t *drive, int bcount) while (bcount) { unsigned int count = min(tape->buffer_size, bcount); - idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, - count / tape->blk_size); + idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, count); bcount -= count; } } @@ -1193,7 +1186,6 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf, ide_drive_t *drive = tape->drive; ssize_t bytes_read, temp, actually_read = 0, rc; ssize_t ret = 0; - u16 ctl = *(u16 *)&tape->caps[12]; debug_log(DBG_CHRDEV, "Enter %s, count %Zd\n", __func__, count); @@ -1218,7 +1210,8 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf, tape->valid -= actually_read; } while (count >= tape->buffer_size) { - bytes_read = idetape_add_chrdev_read_request(drive, ctl); + bytes_read = idetape_add_chrdev_read_request(drive, + tape->buffer_size); if (bytes_read <= 0) goto finish; if (copy_to_user(buf, tape->cur, bytes_read)) @@ -1228,7 +1221,8 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf, actually_read += bytes_read; } if (count) { - bytes_read = idetape_add_chrdev_read_request(drive, ctl); + bytes_read = idetape_add_chrdev_read_request(drive, + tape->buffer_size); if (bytes_read <= 0) goto finish; temp = min((unsigned long)count, (unsigned long)bytes_read); @@ -1256,7 +1250,6 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf, ide_drive_t *drive = tape->drive; ssize_t actually_written = 0; ssize_t ret = 0; - u16 ctl = *(u16 *)&tape->caps[12]; int rc; /* The drive is write protected. */ @@ -1284,7 +1277,8 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf, if (tape->valid == tape->buffer_size) { ssize_t retval; - retval = idetape_add_chrdev_write_request(drive, ctl); + retval = idetape_add_chrdev_write_request(drive, + tape->buffer_size); if (retval <= 0) return (retval); } @@ -1295,7 +1289,8 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf, ret = -EFAULT; buf += tape->buffer_size; count -= tape->buffer_size; - retval = idetape_add_chrdev_write_request(drive, ctl); + retval = idetape_add_chrdev_write_request(drive, + tape->buffer_size); actually_written += tape->buffer_size; if (retval <= 0) return (retval); -- cgit v0.10.2 From 4344d07fb8dbf0cbfec1f7d7c1afeccaceaaa120 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 08:46:03 +0900 Subject: ide-tape: simplify read/write functions Impact: cleanup idetape_chrdev_read/write() functions are unnecessarily complex when everything can be handled in a single loop. Collapse idetape_add_chrdev_read/write_request() into the rw functions and simplify the implementation. Signed-off-by: Tejun Heo diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index d5e9bb2..2599579 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -953,14 +953,6 @@ static void idetape_create_space_cmd(struct ide_atapi_pc *pc, int count, u8 cmd) pc->flags |= PC_FLAG_WAIT_FOR_DSC; } -/* Queue up a character device originated write request. */ -static int idetape_add_chrdev_write_request(ide_drive_t *drive, int size) -{ - debug_log(DBG_CHRDEV, "Enter %s\n", __func__); - - return idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, size); -} - static void ide_tape_flush_merge_buffer(ide_drive_t *drive) { idetape_tape_t *tape = drive->driver_data; @@ -974,7 +966,7 @@ static void ide_tape_flush_merge_buffer(ide_drive_t *drive) size_t aligned = roundup(tape->valid, tape->blk_size); memset(tape->cur, 0, aligned - tape->valid); - idetape_add_chrdev_write_request(drive, aligned); + idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, aligned); kfree(tape->buf); tape->buf = NULL; } @@ -1031,20 +1023,6 @@ static int idetape_init_rw(ide_drive_t *drive, int dir) return 0; } -/* called from idetape_chrdev_read() to service a chrdev read request. */ -static int idetape_add_chrdev_read_request(ide_drive_t *drive, int size) -{ - debug_log(DBG_PROCS, "Enter %s, %d bytes\n", __func__, size); - - /* If we are at a filemark, return a read length of 0 */ - if (test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) - return 0; - - idetape_init_rw(drive, IDETAPE_DIR_READ); - - return idetape_queue_rw_tail(drive, REQ_IDETAPE_READ, size); -} - static void idetape_pad_zeros(ide_drive_t *drive, int bcount) { idetape_tape_t *tape = drive->driver_data; @@ -1184,8 +1162,9 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf, { struct ide_tape_obj *tape = file->private_data; ide_drive_t *drive = tape->drive; - ssize_t bytes_read, temp, actually_read = 0, rc; + size_t done = 0; ssize_t ret = 0; + int rc; debug_log(DBG_CHRDEV, "Enter %s, count %Zd\n", __func__, count); @@ -1195,52 +1174,43 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf, (count % tape->blk_size) == 0) tape->user_bs_factor = count / tape->blk_size; } + rc = idetape_init_rw(drive, IDETAPE_DIR_READ); if (rc < 0) return rc; - if (count == 0) - return (0); - if (tape->valid) { - actually_read = min_t(unsigned int, tape->valid, count); - if (copy_to_user(buf, tape->cur, actually_read)) - ret = -EFAULT; - buf += actually_read; - count -= actually_read; - tape->cur += actually_read; - tape->valid -= actually_read; - } - while (count >= tape->buffer_size) { - bytes_read = idetape_add_chrdev_read_request(drive, - tape->buffer_size); - if (bytes_read <= 0) - goto finish; - if (copy_to_user(buf, tape->cur, bytes_read)) - ret = -EFAULT; - buf += bytes_read; - count -= bytes_read; - actually_read += bytes_read; - } - if (count) { - bytes_read = idetape_add_chrdev_read_request(drive, - tape->buffer_size); - if (bytes_read <= 0) - goto finish; - temp = min((unsigned long)count, (unsigned long)bytes_read); - if (copy_to_user(buf, tape->cur, temp)) + + while (done < count) { + size_t todo; + + /* refill if staging buffer is empty */ + if (!tape->valid) { + /* If we are at a filemark, nothing more to read */ + if (test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) + break; + /* read */ + if (idetape_queue_rw_tail(drive, REQ_IDETAPE_READ, + tape->buffer_size) <= 0) + break; + } + + /* copy out */ + todo = min_t(size_t, count - done, tape->valid); + if (copy_to_user(buf + done, tape->cur, todo)) ret = -EFAULT; - actually_read += temp; - tape->cur += temp; - tape->valid -= temp; + + tape->cur += todo; + tape->valid -= todo; + done += todo; } -finish: - if (!actually_read && test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) { + + if (!done && test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) { debug_log(DBG_SENSE, "%s: spacing over filemark\n", tape->name); idetape_space_over_filemarks(drive, MTFSF, 1); return 0; } - return ret ? ret : actually_read; + return ret ? ret : done; } static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf, @@ -1248,7 +1218,7 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf, { struct ide_tape_obj *tape = file->private_data; ide_drive_t *drive = tape->drive; - ssize_t actually_written = 0; + size_t done = 0; ssize_t ret = 0; int rc; @@ -1262,47 +1232,28 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf, rc = idetape_init_rw(drive, IDETAPE_DIR_WRITE); if (rc < 0) return rc; - if (count == 0) - return (0); - if (tape->valid < tape->buffer_size) { - actually_written = min_t(unsigned int, - tape->buffer_size - tape->valid, - count); - if (copy_from_user(tape->cur, buf, actually_written)) - ret = -EFAULT; - buf += actually_written; - count -= actually_written; - tape->cur += actually_written; - tape->valid += actually_written; - - if (tape->valid == tape->buffer_size) { - ssize_t retval; - retval = idetape_add_chrdev_write_request(drive, - tape->buffer_size); - if (retval <= 0) - return (retval); - } - } - while (count >= tape->buffer_size) { - ssize_t retval; - if (copy_from_user(tape->cur, buf, tape->buffer_size)) - ret = -EFAULT; - buf += tape->buffer_size; - count -= tape->buffer_size; - retval = idetape_add_chrdev_write_request(drive, - tape->buffer_size); - actually_written += tape->buffer_size; - if (retval <= 0) - return (retval); - } - if (count) { - actually_written += count; - if (copy_from_user(tape->cur, buf, count)) + + while (done < count) { + size_t todo; + + /* flush if staging buffer is full */ + if (tape->valid == tape->buffer_size && + idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, + tape->buffer_size) <= 0) + return rc; + + /* copy in */ + todo = min_t(size_t, count - done, + tape->buffer_size - tape->valid); + if (copy_from_user(tape->cur, buf + done, todo)) ret = -EFAULT; - tape->cur += count; - tape->valid += count; + + tape->cur += todo; + tape->valid += todo; + done += todo; } - return ret ? ret : actually_written; + + return ret ? ret : done; } static int idetape_write_filemark(ide_drive_t *drive) -- cgit v0.10.2 From 29d1a4371035e01b0d079bc5aa88b50f5af7a566 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 08:46:03 +0900 Subject: ide-atapi: kill unused fields and callbacks Impact: remove fields and code paths which are no longer necessary Now that ide-tape uses standard mechanisms to transfer data, special case handling for bh handling can be dropped from ide-atapi. Drop the followings. * pc->cur_pos, b_count, bh and b_data * drive->pc_update_buffers() and pc_io_buffers(). Signed-off-by: Tejun Heo diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index b9dd450..afe5a43 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -359,11 +359,8 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) drive->name, rq_data_dir(pc->rq) ? "write" : "read"); pc->flags |= PC_FLAG_DMA_ERROR; - } else { + } else pc->xferred = pc->req_xfer; - if (drive->pc_update_buffers) - drive->pc_update_buffers(drive, pc); - } debug_log("%s: DMA finished\n", drive->name); } @@ -463,16 +460,11 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) return ide_do_reset(drive); } - if (drive->media == ide_tape && pc->bh) - done = drive->pc_io_buffers(drive, pc, bcount, write); - else { - done = min_t(unsigned int, bcount, cmd->nleft); - ide_pio_bytes(drive, cmd, write, done); - } + done = min_t(unsigned int, bcount, cmd->nleft); + ide_pio_bytes(drive, cmd, write, done); - /* Update the current position */ + /* Update transferred byte count */ pc->xferred += done; - pc->cur_pos += done; bcount -= done; @@ -650,7 +642,6 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, struct ide_cmd *cmd) /* We haven't transferred any data yet */ pc->xferred = 0; - pc->cur_pos = pc->buf; valid_tf = IDE_VALID_DEVICE; bcount = ((drive->media == ide_tape) ? diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 2599579..8dfc688 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -591,7 +591,6 @@ static void ide_tape_create_rw_cmd(idetape_tape_t *tape, ide_init_pc(pc); put_unaligned(cpu_to_be32(length), (unsigned int *) &pc->c[1]); pc->c[1] = 1; - pc->bh = NULL; pc->buf = NULL; pc->buf_size = length * tape->blk_size; pc->req_xfer = pc->buf_size; diff --git a/include/linux/ide.h b/include/linux/ide.h index 1957461..34c128f 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -362,11 +362,7 @@ struct ide_atapi_pc { /* data buffer */ u8 *buf; - /* current buffer position */ - u8 *cur_pos; int buf_size; - /* missing/available data on the current buffer */ - int b_count; /* the corresponding request */ struct request *rq; @@ -379,10 +375,6 @@ struct ide_atapi_pc { */ u8 pc_buf[IDE_PC_BUFFER_SIZE]; - /* idetape only */ - struct idetape_bh *bh; - char *b_data; - unsigned long timeout; }; @@ -595,10 +587,6 @@ struct ide_drive_s { /* callback for packet commands */ int (*pc_callback)(struct ide_drive_s *, int); - void (*pc_update_buffers)(struct ide_drive_s *, struct ide_atapi_pc *); - int (*pc_io_buffers)(struct ide_drive_s *, struct ide_atapi_pc *, - unsigned int, int); - ide_startstop_t (*irq_handler)(struct ide_drive_s *); unsigned long atapi_flags; -- cgit v0.10.2 From 5ad960fe8d0e4f99fe2b8dded45e8251137293c9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 19 Apr 2009 08:46:03 +0900 Subject: ide: drop rq->data handling from ide_map_sg() Impact: remove code path which is no longer necessary All IDE data transfers now use rq->bio. Simplify ide_map_sg() accordingly. Signed-off-by: Tejun Heo Cc: Jens Axboe diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 6e3094e..a0309ea 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -248,11 +248,7 @@ void ide_map_sg(ide_drive_t *drive, struct ide_cmd *cmd) struct scatterlist *sg = hwif->sg_table; struct request *rq = cmd->rq; - if (!rq->bio) { - sg_init_one(sg, rq->data, rq->data_len); - cmd->sg_nents = 1; - } else - cmd->sg_nents = blk_rq_map_sg(drive->queue, rq, sg); + cmd->sg_nents = blk_rq_map_sg(drive->queue, rq, sg); } EXPORT_SYMBOL_GPL(ide_map_sg); -- cgit v0.10.2 From 586cf2681f527ce8b85b9bd57c8b9f7945fbe051 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 21 Apr 2009 12:16:56 +0900 Subject: ide-dma: don't reset request fields on dma_timeout_retry() Impact: drop unnecessary code Now that everything uses bio and block operations, there is no need to reset request fields manually when retrying a request. Every field is guaranteed to be always valid. Drop unnecessary request field resetting from ide_dma_timeout_retry(). Signed-off-by: Tejun Heo diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c index a0b8cab1..d9123ec 100644 --- a/drivers/ide/ide-dma.c +++ b/drivers/ide/ide-dma.c @@ -510,23 +510,11 @@ ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error) /* * un-busy drive etc and make sure request is sane */ - rq = hwif->rq; - if (!rq) - goto out; - - hwif->rq = NULL; - - rq->errors = 0; - - if (!rq->bio) - goto out; - - rq->sector = rq->bio->bi_sector; - rq->current_nr_sectors = bio_iovec(rq->bio)->bv_len >> 9; - rq->hard_cur_sectors = rq->current_nr_sectors; - rq->buffer = bio_data(rq->bio); -out: + if (rq) { + hwif->rq = NULL; + rq->errors = 0; + } return ret; } -- cgit v0.10.2 From db29a6b49674085f136331014ba0eee249c16a2c Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 21 Apr 2009 09:27:03 +0200 Subject: block: enable by default support for large devices and files on 32-bit archs Enable by default support for large devices and files (CONFIG_LBD): - With 1TB disks being a commodity hardware it is quite easy to hit 2TB limitation while building RAIDs etc. and many distros have been using CONFIG_LBD=y by default already (at least Fedora 10 and openSUSE 11.1). - This should also prevent a subtle ext4 filesystem compatibility issue: mke2fs.ext4 defaults to creating filesystems with huge_files feature enabled and such filesystems cannot be later mounted read-write on machines with CONFIG_LBD=n (it should be quite easy to hit this issue when trying to use filesystem created using distro kernel on system running the self-build kernel, think about USB disk enclosures & co.). While at it: - Clarify config option help text w.r.t. mounting ext4 filesystems (they can be mounted with CONFIG_LBD=n but in the read-only mode). Cc: "Theodore Ts'o" Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Jens Axboe diff --git a/block/Kconfig b/block/Kconfig index e7d1278..2c39527 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -26,6 +26,7 @@ if BLOCK config LBD bool "Support for large block devices and files" depends on !64BIT + default y help Enable block devices or files of size 2TB and larger. @@ -38,11 +39,13 @@ config LBD The ext4 filesystem requires that this feature be enabled in order to support filesystems that have the huge_file feature - enabled. Otherwise, it will refuse to mount any filesystems - that use the huge_file feature, which is enabled by default - by mke2fs.ext4. The GFS2 filesystem also requires this feature. + enabled. Otherwise, it will refuse to mount in the read-write + mode any filesystems that use the huge_file feature, which is + enabled by default by mke2fs.ext4. - If unsure, say N. + The GFS2 filesystem also requires this feature. + + If unsure, say Y. config BLK_DEV_BSG bool "Block layer SG support v4 (EXPERIMENTAL)" -- cgit v0.10.2 From db2dbb12dc47a50c7a4c5678f526014063e486f6 Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Wed, 22 Apr 2009 14:08:13 +0200 Subject: block: implement blkdev_readpages Doing a proper block dev ->readpages() speeds up the crazy dump(8) approach of using interleaved process IO. Signed-off-by: Jeff Moyer Signed-off-by: Jens Axboe diff --git a/fs/block_dev.c b/fs/block_dev.c index f45dbc1..a85fe31 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -331,6 +331,12 @@ static int blkdev_readpage(struct file * file, struct page * page) return block_read_full_page(page, blkdev_get_block); } +static int blkdev_readpages(struct file *file, struct address_space *mapping, + struct list_head *pages, unsigned nr_pages) +{ + return mpage_readpages(mapping, pages, nr_pages, blkdev_get_block); +} + static int blkdev_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) @@ -1399,6 +1405,7 @@ static int blkdev_releasepage(struct page *page, gfp_t wait) static const struct address_space_operations def_blk_aops = { .readpage = blkdev_readpage, + .readpages = blkdev_readpages, .writepage = blkdev_writepage, .sync_page = block_sync_page, .write_begin = blkdev_write_begin, -- cgit v0.10.2 From a538cd03be6f363d039daa94199c28cfbd508455 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 23 Apr 2009 11:05:17 +0900 Subject: block: merge blk_invoke_request_fn() into __blk_run_queue() __blk_run_queue wraps blk_invoke_request_fn() such that it additionally removes plug and bails out early if the queue is empty. Both extra operations have their own pending mechanisms and don't cause any harm correctness-wise when they are done superflously. The only user of blk_invoke_request_fn() being blk_start_queue(), there isn't much reason to keep both functions around. Merge blk_invoke_request_fn() into __blk_run_queue() and make blk_start_queue() use __blk_run_queue() instead. [ Impact: merge two subtly different internal functions ] Signed-off-by: Tejun Heo diff --git a/block/blk-core.c b/block/blk-core.c index 41bc0ff..02f53bc 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -333,24 +333,6 @@ void blk_unplug(struct request_queue *q) } EXPORT_SYMBOL(blk_unplug); -static void blk_invoke_request_fn(struct request_queue *q) -{ - if (unlikely(blk_queue_stopped(q))) - return; - - /* - * one level of recursion is ok and is much faster than kicking - * the unplug handling - */ - if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { - q->request_fn(q); - queue_flag_clear(QUEUE_FLAG_REENTER, q); - } else { - queue_flag_set(QUEUE_FLAG_PLUGGED, q); - kblockd_schedule_work(q, &q->unplug_work); - } -} - /** * blk_start_queue - restart a previously stopped queue * @q: The &struct request_queue in question @@ -365,7 +347,7 @@ void blk_start_queue(struct request_queue *q) WARN_ON(!irqs_disabled()); queue_flag_clear(QUEUE_FLAG_STOPPED, q); - blk_invoke_request_fn(q); + __blk_run_queue(q); } EXPORT_SYMBOL(blk_start_queue); @@ -425,12 +407,23 @@ void __blk_run_queue(struct request_queue *q) { blk_remove_plug(q); + if (unlikely(blk_queue_stopped(q))) + return; + + if (elv_queue_empty(q)) + return; + /* * Only recurse once to avoid overrunning the stack, let the unplug * handling reinvoke the handler shortly if we already got there. */ - if (!elv_queue_empty(q)) - blk_invoke_request_fn(q); + if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { + q->request_fn(q); + queue_flag_clear(QUEUE_FLAG_REENTER, q); + } else { + queue_flag_set(QUEUE_FLAG_PLUGGED, q); + kblockd_schedule_work(q, &q->unplug_work); + } } EXPORT_SYMBOL(__blk_run_queue); -- cgit v0.10.2 From a7f557923441186a3cdbabc54f1bcacf42b63bf5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 23 Apr 2009 11:05:17 +0900 Subject: block: kill blk_start_queueing() blk_start_queueing() is identical to __blk_run_queue() except that it doesn't check for recursion. None of the current users depends on blk_start_queueing() running request_fn directly. Replace usages of blk_start_queueing() with [__]blk_run_queue() and kill it. [ Impact: removal of mostly duplicate interface function ] Signed-off-by: Tejun Heo diff --git a/block/as-iosched.c b/block/as-iosched.c index c48fa67..45bd070 100644 --- a/block/as-iosched.c +++ b/block/as-iosched.c @@ -1312,12 +1312,8 @@ static void as_merged_requests(struct request_queue *q, struct request *req, static void as_work_handler(struct work_struct *work) { struct as_data *ad = container_of(work, struct as_data, antic_work); - struct request_queue *q = ad->q; - unsigned long flags; - spin_lock_irqsave(q->queue_lock, flags); - blk_start_queueing(q); - spin_unlock_irqrestore(q->queue_lock, flags); + blk_run_queue(ad->q); } static int as_may_queue(struct request_queue *q, int rw) diff --git a/block/blk-core.c b/block/blk-core.c index 02f53bc..8b4a0af 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -433,9 +433,7 @@ EXPORT_SYMBOL(__blk_run_queue); * * Description: * Invoke request handling on this queue, if it has pending work to do. - * May be used to restart queueing when a request has completed. Also - * See @blk_start_queueing. - * + * May be used to restart queueing when a request has completed. */ void blk_run_queue(struct request_queue *q) { @@ -895,28 +893,6 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) EXPORT_SYMBOL(blk_get_request); /** - * blk_start_queueing - initiate dispatch of requests to device - * @q: request queue to kick into gear - * - * This is basically a helper to remove the need to know whether a queue - * is plugged or not if someone just wants to initiate dispatch of requests - * for this queue. Should be used to start queueing on a device outside - * of ->request_fn() context. Also see @blk_run_queue. - * - * The queue lock must be held with interrupts disabled. - */ -void blk_start_queueing(struct request_queue *q) -{ - if (!blk_queue_plugged(q)) { - if (unlikely(blk_queue_stopped(q))) - return; - q->request_fn(q); - } else - __generic_unplug_device(q); -} -EXPORT_SYMBOL(blk_start_queueing); - -/** * blk_requeue_request - put a request back on queue * @q: request queue where request should be inserted * @rq: request to be inserted @@ -984,7 +960,7 @@ void blk_insert_request(struct request_queue *q, struct request *rq, drive_stat_acct(rq, 1); __elv_add_request(q, rq, where, 0); - blk_start_queueing(q); + __blk_run_queue(q); spin_unlock_irqrestore(q->queue_lock, flags); } EXPORT_SYMBOL(blk_insert_request); diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index a55a9bd..def0c69 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -2088,7 +2088,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, if (blk_rq_bytes(rq) > PAGE_CACHE_SIZE || cfqd->busy_queues > 1) { del_timer(&cfqd->idle_slice_timer); - blk_start_queueing(cfqd->queue); + __blk_run_queue(cfqd->queue); } cfq_mark_cfqq_must_dispatch(cfqq); } @@ -2100,7 +2100,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, * this new queue is RT and the current one is BE */ cfq_preempt_queue(cfqd, cfqq); - blk_start_queueing(cfqd->queue); + __blk_run_queue(cfqd->queue); } } @@ -2345,7 +2345,7 @@ static void cfq_kick_queue(struct work_struct *work) struct request_queue *q = cfqd->queue; spin_lock_irq(q->queue_lock); - blk_start_queueing(q); + __blk_run_queue(cfqd->queue); spin_unlock_irq(q->queue_lock); } diff --git a/block/elevator.c b/block/elevator.c index 7073a90..2e0fb21 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -599,7 +599,7 @@ void elv_quiesce_start(struct request_queue *q) */ elv_drain_elevator(q); while (q->rq.elvpriv) { - blk_start_queueing(q); + __blk_run_queue(q); spin_unlock_irq(q->queue_lock); msleep(10); spin_lock_irq(q->queue_lock); @@ -643,8 +643,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) * with anything. There's no point in delaying queue * processing. */ - blk_remove_plug(q); - blk_start_queueing(q); + __blk_run_queue(q); break; case ELEVATOR_INSERT_SORT: @@ -971,7 +970,7 @@ void elv_completed_request(struct request_queue *q, struct request *rq) blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN && (!next || blk_ordered_req_seq(next) > QUEUE_ORDSEQ_DRAIN)) { blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0); - blk_start_queueing(q); + __blk_run_queue(q); } } } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2755d5c..12e20de 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -797,7 +797,6 @@ extern void blk_sync_queue(struct request_queue *q); extern void __blk_stop_queue(struct request_queue *q); extern void __blk_run_queue(struct request_queue *); extern void blk_run_queue(struct request_queue *); -extern void blk_start_queueing(struct request_queue *); extern int blk_rq_map_user(struct request_queue *, struct request *, struct rq_map_data *, void __user *, unsigned long, gfp_t); -- cgit v0.10.2 From e4025f6c21f1389696c069be2dc647f364925c45 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 23 Apr 2009 11:05:17 +0900 Subject: block: don't set REQ_NOMERGE unnecessarily RQ_NOMERGE_FLAGS already clears defines which REQ flags aren't mergeable. There is no reason to specify it superflously. It only adds to confusion. Don't set REQ_NOMERGE for barriers and requests with specific queueing directive. REQ_NOMERGE is now exclusively used by the merging code. [ Impact: cleanup ] Signed-off-by: Tejun Heo diff --git a/block/blk-core.c b/block/blk-core.c index 8b4a0af..7e0fab5 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1082,16 +1082,13 @@ void init_request_from_bio(struct request *req, struct bio *bio) if (bio_failfast_driver(bio)) req->cmd_flags |= REQ_FAILFAST_DRIVER; - /* - * REQ_BARRIER implies no merging, but lets make it explicit - */ if (unlikely(bio_discard(bio))) { req->cmd_flags |= REQ_DISCARD; if (bio_barrier(bio)) req->cmd_flags |= REQ_SOFTBARRIER; req->q->prepare_discard_fn(req->q, req); } else if (unlikely(bio_barrier(bio))) - req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE); + req->cmd_flags |= REQ_HARDBARRIER; if (bio_sync(bio)) req->cmd_flags |= REQ_RW_SYNC; diff --git a/block/blk-exec.c b/block/blk-exec.c index 6af716d..49557e9 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -51,7 +51,6 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; rq->rq_disk = bd_disk; - rq->cmd_flags |= REQ_NOMERGE; rq->end_io = done; WARN_ON(irqs_disabled()); spin_lock_irq(q->queue_lock); -- cgit v0.10.2 From 10732f5661fb7adf62e20733b0030fc0fc93b0c4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 23 Apr 2009 11:05:18 +0900 Subject: block: cleanup REQ_SOFTBARRIER usages blk_insert_request() doesn't need to worry about REQ_SOFTBARRIER. Don't set it. Combined with recent ide updates, REQ_SOFTBARRIER is now only used in elevator proper and for discard requests. [ Impact: cleanup ] Signed-off-by: Tejun Heo diff --git a/block/blk-core.c b/block/blk-core.c index 7e0fab5..cf10dfc 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -946,7 +946,6 @@ void blk_insert_request(struct request_queue *q, struct request *rq, * barrier */ rq->cmd_type = REQ_TYPE_SPECIAL; - rq->cmd_flags |= REQ_SOFTBARRIER; rq->special = data; -- cgit v0.10.2 From 2eef33e439ba9ae387cdc3f1abcef2f3f6c4e7a8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 23 Apr 2009 11:05:18 +0900 Subject: block: clean up misc stuff after block layer timeout conversion * In blk_rq_timed_out_timer(), else { if } to else if * In blk_add_timer(), simplify if/else block [ Impact: cleanup ] Signed-off-by: Tejun Heo diff --git a/block/blk-timeout.c b/block/blk-timeout.c index 1ec0d50..1ba7e0a 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c @@ -122,10 +122,8 @@ void blk_rq_timed_out_timer(unsigned long data) if (blk_mark_rq_complete(rq)) continue; blk_rq_timed_out(rq); - } else { - if (!next || time_after(next, rq->deadline)) - next = rq->deadline; - } + } else if (!next || time_after(next, rq->deadline)) + next = rq->deadline; } /* @@ -176,16 +174,14 @@ void blk_add_timer(struct request *req) BUG_ON(!list_empty(&req->timeout_list)); BUG_ON(test_bit(REQ_ATOM_COMPLETE, &req->atomic_flags)); - if (req->timeout) - req->deadline = jiffies + req->timeout; - else { - req->deadline = jiffies + q->rq_timeout; - /* - * Some LLDs, like scsi, peek at the timeout to prevent - * a command from being retried forever. - */ + /* + * Some LLDs, like scsi, peek at the timeout to prevent a + * command from being retried forever. + */ + if (!req->timeout) req->timeout = q->rq_timeout; - } + + req->deadline = jiffies + req->timeout; list_add_tail(&req->timeout_list, &q->timeout_list); /* -- cgit v0.10.2 From 5efccd17ceb0fc43837a331297c2c407969d7201 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 23 Apr 2009 11:05:18 +0900 Subject: block: reorder request completion functions Reorder request completion functions such that * All request completion functions are located together. * Functions which are used by only one caller is put right above the caller. * end_request() is put after other completion functions but before blk_update_request(). This change is for completion function cleanup which will follow. [ Impact: cleanup, code reorganization ] Signed-off-by: Tejun Heo diff --git a/block/blk-core.c b/block/blk-core.c index cf10dfc..406a93e 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1684,6 +1684,35 @@ static void blk_account_io_done(struct request *req) } /** + * blk_rq_bytes - Returns bytes left to complete in the entire request + * @rq: the request being processed + **/ +unsigned int blk_rq_bytes(struct request *rq) +{ + if (blk_fs_request(rq)) + return rq->hard_nr_sectors << 9; + + return rq->data_len; +} +EXPORT_SYMBOL_GPL(blk_rq_bytes); + +/** + * blk_rq_cur_bytes - Returns bytes left to complete in the current segment + * @rq: the request being processed + **/ +unsigned int blk_rq_cur_bytes(struct request *rq) +{ + if (blk_fs_request(rq)) + return rq->current_nr_sectors << 9; + + if (rq->bio) + return rq->bio->bi_size; + + return rq->data_len; +} +EXPORT_SYMBOL_GPL(blk_rq_cur_bytes); + +/** * __end_that_request_first - end I/O on a request * @req: the request being processed * @error: %0 for success, < %0 for error @@ -1797,6 +1826,22 @@ static int __end_that_request_first(struct request *req, int error, return 1; } +static int end_that_request_data(struct request *rq, int error, + unsigned int nr_bytes, unsigned int bidi_bytes) +{ + if (rq->bio) { + if (__end_that_request_first(rq, error, nr_bytes)) + return 1; + + /* Bidi request must be completed as a whole */ + if (blk_bidi_rq(rq) && + __end_that_request_first(rq->next_rq, error, bidi_bytes)) + return 1; + } + + return 0; +} + /* * queue lock must be held */ @@ -1826,78 +1871,6 @@ static void end_that_request_last(struct request *req, int error) } /** - * blk_rq_bytes - Returns bytes left to complete in the entire request - * @rq: the request being processed - **/ -unsigned int blk_rq_bytes(struct request *rq) -{ - if (blk_fs_request(rq)) - return rq->hard_nr_sectors << 9; - - return rq->data_len; -} -EXPORT_SYMBOL_GPL(blk_rq_bytes); - -/** - * blk_rq_cur_bytes - Returns bytes left to complete in the current segment - * @rq: the request being processed - **/ -unsigned int blk_rq_cur_bytes(struct request *rq) -{ - if (blk_fs_request(rq)) - return rq->current_nr_sectors << 9; - - if (rq->bio) - return rq->bio->bi_size; - - return rq->data_len; -} -EXPORT_SYMBOL_GPL(blk_rq_cur_bytes); - -/** - * end_request - end I/O on the current segment of the request - * @req: the request being processed - * @uptodate: error value or %0/%1 uptodate flag - * - * Description: - * Ends I/O on the current segment of a request. If that is the only - * remaining segment, the request is also completed and freed. - * - * This is a remnant of how older block drivers handled I/O completions. - * Modern drivers typically end I/O on the full request in one go, unless - * they have a residual value to account for. For that case this function - * isn't really useful, unless the residual just happens to be the - * full current segment. In other words, don't use this function in new - * code. Use blk_end_request() or __blk_end_request() to end a request. - **/ -void end_request(struct request *req, int uptodate) -{ - int error = 0; - - if (uptodate <= 0) - error = uptodate ? uptodate : -EIO; - - __blk_end_request(req, error, req->hard_cur_sectors << 9); -} -EXPORT_SYMBOL(end_request); - -static int end_that_request_data(struct request *rq, int error, - unsigned int nr_bytes, unsigned int bidi_bytes) -{ - if (rq->bio) { - if (__end_that_request_first(rq, error, nr_bytes)) - return 1; - - /* Bidi request must be completed as a whole */ - if (blk_bidi_rq(rq) && - __end_that_request_first(rq->next_rq, error, bidi_bytes)) - return 1; - } - - return 0; -} - -/** * blk_end_io - Generic end_io function to complete a request. * @rq: the request being processed * @error: %0 for success, < %0 for error @@ -2007,6 +1980,33 @@ int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes, EXPORT_SYMBOL_GPL(blk_end_bidi_request); /** + * end_request - end I/O on the current segment of the request + * @req: the request being processed + * @uptodate: error value or %0/%1 uptodate flag + * + * Description: + * Ends I/O on the current segment of a request. If that is the only + * remaining segment, the request is also completed and freed. + * + * This is a remnant of how older block drivers handled I/O completions. + * Modern drivers typically end I/O on the full request in one go, unless + * they have a residual value to account for. For that case this function + * isn't really useful, unless the residual just happens to be the + * full current segment. In other words, don't use this function in new + * code. Use blk_end_request() or __blk_end_request() to end a request. + **/ +void end_request(struct request *req, int uptodate) +{ + int error = 0; + + if (uptodate <= 0) + error = uptodate ? uptodate : -EIO; + + __blk_end_request(req, error, req->hard_cur_sectors << 9); +} +EXPORT_SYMBOL(end_request); + +/** * blk_update_request - Special helper function for request stacking drivers * @rq: the request being processed * @error: %0 for success, < %0 for error diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 12e20de..156ffd9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -832,6 +832,14 @@ static inline void blk_run_address_space(struct address_space *mapping) extern void blkdev_dequeue_request(struct request *req); /* + * blk_end_request() takes bytes instead of sectors as a complete size. + * blk_rq_bytes() returns bytes left to complete in the entire request. + * blk_rq_cur_bytes() returns bytes left to complete in the current segment. + */ +extern unsigned int blk_rq_bytes(struct request *rq); +extern unsigned int blk_rq_cur_bytes(struct request *rq); + +/* * blk_end_request() and friends. * __blk_end_request() and end_request() must be called with * the request queue spinlock acquired. @@ -858,14 +866,6 @@ extern void blk_update_request(struct request *rq, int error, unsigned int nr_bytes); /* - * blk_end_request() takes bytes instead of sectors as a complete size. - * blk_rq_bytes() returns bytes left to complete in the entire request. - * blk_rq_cur_bytes() returns bytes left to complete in the current segment. - */ -extern unsigned int blk_rq_bytes(struct request *rq); -extern unsigned int blk_rq_cur_bytes(struct request *rq); - -/* * Access functions for manipulating queue properties */ extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn, -- cgit v0.10.2 From 158dbda0068e63c7cce7bd47c123bd1dfa5a902c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 23 Apr 2009 11:05:18 +0900 Subject: block: reorganize request fetching functions Impact: code reorganization elv_next_request() and elv_dequeue_request() are public block layer interface than actual elevator implementation. They mostly deal with how requests interact with block layer and low level drivers at the beginning of rqeuest processing whereas __elv_next_request() is the actual eleveator request fetching interface. Move the two functions to blk-core.c. This prepares for further interface cleanup. Signed-off-by: Tejun Heo diff --git a/block/blk-core.c b/block/blk-core.c index 406a93e..678ede2 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1712,6 +1712,101 @@ unsigned int blk_rq_cur_bytes(struct request *rq) } EXPORT_SYMBOL_GPL(blk_rq_cur_bytes); +struct request *elv_next_request(struct request_queue *q) +{ + struct request *rq; + int ret; + + while ((rq = __elv_next_request(q)) != NULL) { + if (!(rq->cmd_flags & REQ_STARTED)) { + /* + * This is the first time the device driver + * sees this request (possibly after + * requeueing). Notify IO scheduler. + */ + if (blk_sorted_rq(rq)) + elv_activate_rq(q, rq); + + /* + * just mark as started even if we don't start + * it, a request that has been delayed should + * not be passed by new incoming requests + */ + rq->cmd_flags |= REQ_STARTED; + trace_block_rq_issue(q, rq); + } + + if (!q->boundary_rq || q->boundary_rq == rq) { + q->end_sector = rq_end_sector(rq); + q->boundary_rq = NULL; + } + + if (rq->cmd_flags & REQ_DONTPREP) + break; + + if (q->dma_drain_size && rq->data_len) { + /* + * make sure space for the drain appears we + * know we can do this because max_hw_segments + * has been adjusted to be one fewer than the + * device can handle + */ + rq->nr_phys_segments++; + } + + if (!q->prep_rq_fn) + break; + + ret = q->prep_rq_fn(q, rq); + if (ret == BLKPREP_OK) { + break; + } else if (ret == BLKPREP_DEFER) { + /* + * the request may have been (partially) prepped. + * we need to keep this request in the front to + * avoid resource deadlock. REQ_STARTED will + * prevent other fs requests from passing this one. + */ + if (q->dma_drain_size && rq->data_len && + !(rq->cmd_flags & REQ_DONTPREP)) { + /* + * remove the space for the drain we added + * so that we don't add it again + */ + --rq->nr_phys_segments; + } + + rq = NULL; + break; + } else if (ret == BLKPREP_KILL) { + rq->cmd_flags |= REQ_QUIET; + __blk_end_request(rq, -EIO, blk_rq_bytes(rq)); + } else { + printk(KERN_ERR "%s: bad return=%d\n", __func__, ret); + break; + } + } + + return rq; +} +EXPORT_SYMBOL(elv_next_request); + +void elv_dequeue_request(struct request_queue *q, struct request *rq) +{ + BUG_ON(list_empty(&rq->queuelist)); + BUG_ON(ELV_ON_HASH(rq)); + + list_del_init(&rq->queuelist); + + /* + * the time frame between a request being removed from the lists + * and to it is freed is accounted as io that is in progress at + * the driver side. + */ + if (blk_account_rq(rq)) + q->in_flight++; +} + /** * __end_that_request_first - end I/O on a request * @req: the request being processed diff --git a/block/blk.h b/block/blk.h index 79c85f7..9b2c324 100644 --- a/block/blk.h +++ b/block/blk.h @@ -43,6 +43,43 @@ static inline void blk_clear_rq_complete(struct request *rq) clear_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags); } +/* + * Internal elevator interface + */ +#define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) + +static inline struct request *__elv_next_request(struct request_queue *q) +{ + struct request *rq; + + while (1) { + while (!list_empty(&q->queue_head)) { + rq = list_entry_rq(q->queue_head.next); + if (blk_do_ordered(q, &rq)) + return rq; + } + + if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) + return NULL; + } +} + +static inline void elv_activate_rq(struct request_queue *q, struct request *rq) +{ + struct elevator_queue *e = q->elevator; + + if (e->ops->elevator_activate_req_fn) + e->ops->elevator_activate_req_fn(q, rq); +} + +static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq) +{ + struct elevator_queue *e = q->elevator; + + if (e->ops->elevator_deactivate_req_fn) + e->ops->elevator_deactivate_req_fn(q, rq); +} + #ifdef CONFIG_FAIL_IO_TIMEOUT int blk_should_fake_timeout(struct request_queue *); ssize_t part_timeout_show(struct device *, struct device_attribute *, char *); diff --git a/block/elevator.c b/block/elevator.c index 2e0fb21..b03b875 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -53,7 +53,6 @@ static const int elv_hash_shift = 6; (hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift)) #define ELV_HASH_ENTRIES (1 << elv_hash_shift) #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) -#define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) DEFINE_TRACE(block_rq_insert); DEFINE_TRACE(block_rq_issue); @@ -310,22 +309,6 @@ void elevator_exit(struct elevator_queue *e) } EXPORT_SYMBOL(elevator_exit); -static void elv_activate_rq(struct request_queue *q, struct request *rq) -{ - struct elevator_queue *e = q->elevator; - - if (e->ops->elevator_activate_req_fn) - e->ops->elevator_activate_req_fn(q, rq); -} - -static void elv_deactivate_rq(struct request_queue *q, struct request *rq) -{ - struct elevator_queue *e = q->elevator; - - if (e->ops->elevator_deactivate_req_fn) - e->ops->elevator_deactivate_req_fn(q, rq); -} - static inline void __elv_rqhash_del(struct request *rq) { hlist_del_init(&rq->hash); @@ -758,117 +741,6 @@ void elv_add_request(struct request_queue *q, struct request *rq, int where, } EXPORT_SYMBOL(elv_add_request); -static inline struct request *__elv_next_request(struct request_queue *q) -{ - struct request *rq; - - while (1) { - while (!list_empty(&q->queue_head)) { - rq = list_entry_rq(q->queue_head.next); - if (blk_do_ordered(q, &rq)) - return rq; - } - - if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) - return NULL; - } -} - -struct request *elv_next_request(struct request_queue *q) -{ - struct request *rq; - int ret; - - while ((rq = __elv_next_request(q)) != NULL) { - if (!(rq->cmd_flags & REQ_STARTED)) { - /* - * This is the first time the device driver - * sees this request (possibly after - * requeueing). Notify IO scheduler. - */ - if (blk_sorted_rq(rq)) - elv_activate_rq(q, rq); - - /* - * just mark as started even if we don't start - * it, a request that has been delayed should - * not be passed by new incoming requests - */ - rq->cmd_flags |= REQ_STARTED; - trace_block_rq_issue(q, rq); - } - - if (!q->boundary_rq || q->boundary_rq == rq) { - q->end_sector = rq_end_sector(rq); - q->boundary_rq = NULL; - } - - if (rq->cmd_flags & REQ_DONTPREP) - break; - - if (q->dma_drain_size && rq->data_len) { - /* - * make sure space for the drain appears we - * know we can do this because max_hw_segments - * has been adjusted to be one fewer than the - * device can handle - */ - rq->nr_phys_segments++; - } - - if (!q->prep_rq_fn) - break; - - ret = q->prep_rq_fn(q, rq); - if (ret == BLKPREP_OK) { - break; - } else if (ret == BLKPREP_DEFER) { - /* - * the request may have been (partially) prepped. - * we need to keep this request in the front to - * avoid resource deadlock. REQ_STARTED will - * prevent other fs requests from passing this one. - */ - if (q->dma_drain_size && rq->data_len && - !(rq->cmd_flags & REQ_DONTPREP)) { - /* - * remove the space for the drain we added - * so that we don't add it again - */ - --rq->nr_phys_segments; - } - - rq = NULL; - break; - } else if (ret == BLKPREP_KILL) { - rq->cmd_flags |= REQ_QUIET; - __blk_end_request(rq, -EIO, blk_rq_bytes(rq)); - } else { - printk(KERN_ERR "%s: bad return=%d\n", __func__, ret); - break; - } - } - - return rq; -} -EXPORT_SYMBOL(elv_next_request); - -void elv_dequeue_request(struct request_queue *q, struct request *rq) -{ - BUG_ON(list_empty(&rq->queuelist)); - BUG_ON(ELV_ON_HASH(rq)); - - list_del_init(&rq->queuelist); - - /* - * the time frame between a request being removed from the lists - * and to it is freed is accounted as io that is in progress at - * the driver side. - */ - if (blk_account_rq(rq)) - q->in_flight++; -} - int elv_queue_empty(struct request_queue *q) { struct elevator_queue *e = q->elevator; -- cgit v0.10.2 From 0b302d5aa7975006fa2ec3d66386610b9b36c669 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 23 Apr 2009 11:05:18 +0900 Subject: block: kill blk_end_request_callback() With recent IDE updates, blk_end_request_callback() doesn't have any user now. Kill it. [ Impact: removal of unused convoluted interface ] Signed-off-by: Tejun Heo diff --git a/block/blk-core.c b/block/blk-core.c index 678ede2..2f277ea 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1971,10 +1971,6 @@ static void end_that_request_last(struct request *req, int error) * @error: %0 for success, < %0 for error * @nr_bytes: number of bytes to complete @rq * @bidi_bytes: number of bytes to complete @rq->next_rq - * @drv_callback: function called between completion of bios in the request - * and completion of the request. - * If the callback returns non %0, this helper returns without - * completion of the request. * * Description: * Ends I/O on a number of bytes attached to @rq and @rq->next_rq. @@ -1985,8 +1981,7 @@ static void end_that_request_last(struct request *req, int error) * %1 - this request is not freed yet, it still has pending buffers. **/ static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes, - unsigned int bidi_bytes, - int (drv_callback)(struct request *)) + unsigned int bidi_bytes) { struct request_queue *q = rq->q; unsigned long flags = 0UL; @@ -1994,10 +1989,6 @@ static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes, if (end_that_request_data(rq, error, nr_bytes, bidi_bytes)) return 1; - /* Special feature for tricky drivers */ - if (drv_callback && drv_callback(rq)) - return 1; - add_disk_randomness(rq->rq_disk); spin_lock_irqsave(q->queue_lock, flags); @@ -2023,7 +2014,7 @@ static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes, **/ int blk_end_request(struct request *rq, int error, unsigned int nr_bytes) { - return blk_end_io(rq, error, nr_bytes, 0, NULL); + return blk_end_io(rq, error, nr_bytes, 0); } EXPORT_SYMBOL_GPL(blk_end_request); @@ -2070,7 +2061,7 @@ EXPORT_SYMBOL_GPL(__blk_end_request); int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes, unsigned int bidi_bytes) { - return blk_end_io(rq, error, nr_bytes, bidi_bytes, NULL); + return blk_end_io(rq, error, nr_bytes, bidi_bytes); } EXPORT_SYMBOL_GPL(blk_end_bidi_request); @@ -2131,39 +2122,6 @@ void blk_update_request(struct request *rq, int error, unsigned int nr_bytes) } EXPORT_SYMBOL_GPL(blk_update_request); -/** - * blk_end_request_callback - Special helper function for tricky drivers - * @rq: the request being processed - * @error: %0 for success, < %0 for error - * @nr_bytes: number of bytes to complete - * @drv_callback: function called between completion of bios in the request - * and completion of the request. - * If the callback returns non %0, this helper returns without - * completion of the request. - * - * Description: - * Ends I/O on a number of bytes attached to @rq. - * If @rq has leftover, sets it up for the next range of segments. - * - * This special helper function is used only for existing tricky drivers. - * (e.g. cdrom_newpc_intr() of ide-cd) - * This interface will be removed when such drivers are rewritten. - * Don't use this interface in other places anymore. - * - * Return: - * %0 - we are done with this request - * %1 - this request is not freed yet. - * this request still has pending buffers or - * the driver doesn't want to finish this request yet. - **/ -int blk_end_request_callback(struct request *rq, int error, - unsigned int nr_bytes, - int (drv_callback)(struct request *)) -{ - return blk_end_io(rq, error, nr_bytes, 0, drv_callback); -} -EXPORT_SYMBOL_GPL(blk_end_request_callback); - void blk_rq_bio_prep(struct request_queue *q, struct request *rq, struct bio *bio) { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 156ffd9..1fa9dcf 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -855,9 +855,6 @@ extern int __blk_end_request(struct request *rq, int error, extern int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes, unsigned int bidi_bytes); extern void end_request(struct request *, int); -extern int blk_end_request_callback(struct request *rq, int error, - unsigned int nr_bytes, - int (drv_callback)(struct request *)); extern void blk_complete_request(struct request *); extern void __blk_complete_request(struct request *); extern void blk_abort_request(struct request *); -- cgit v0.10.2 From 2e60e02297cf54e367567f2d85b2ca56b1c4a906 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 23 Apr 2009 11:05:18 +0900 Subject: block: clean up request completion API Request completion has gone through several changes and became a bit messy over the time. Clean it up. 1. end_that_request_data() is a thin wrapper around end_that_request_data_first() which checks whether bio is NULL before doing anything and handles bidi completion. blk_update_request() is a thin wrapper around end_that_request_data() which clears nr_sectors on the last iteration but doesn't use the bidi completion. Clean it up by moving the initial bio NULL check and nr_sectors clearing on the last iteration into end_that_request_data() and renaming it to blk_update_request(), which makes blk_end_io() the only user of end_that_request_data(). Collapse end_that_request_data() into blk_end_io(). 2. There are four visible completion variants - blk_end_request(), __blk_end_request(), blk_end_bidi_request() and end_request(). blk_end_request() and blk_end_bidi_request() uses blk_end_request() as the backend but __blk_end_request() and end_request() use separate implementation in __blk_end_request() due to different locking rules. blk_end_bidi_request() is identical to blk_end_io(). Collapse blk_end_io() into blk_end_bidi_request(), separate out request update into internal helper blk_update_bidi_request() and add __blk_end_bidi_request(). Redefine [__]blk_end_request() as thin inline wrappers around [__]blk_end_bidi_request(). 3. As the whole request issue/completion usages are about to be modified and audited, it's a good chance to convert completion functions return bool which better indicates the intended meaning of return values. 4. The function name end_that_request_last() is from the days when it was a public interface and slighly confusing. Give it a proper internal name - blk_finish_request(). 5. Add description explaning that blk_end_bidi_request() can be safely used for uni requests as suggested by Boaz Harrosh. The only visible behavior change is from #1. nr_sectors counts are cleared after the final iteration no matter which function is used to complete the request. I couldn't find any place where the code assumes those nr_sectors counters contain the values for the last segment and this change is good as it makes the API much more consistent as the end result is now same whether a request is completed using [__]blk_end_request() alone or in combination with blk_update_request(). API further cleaned up per Christoph's suggestion. [ Impact: cleanup, rq->*nr_sectors always updated after req completion ] Signed-off-by: Tejun Heo Reviewed-by: Boaz Harrosh Cc: Christoph Hellwig diff --git a/block/blk-core.c b/block/blk-core.c index 2f277ea..89cc05d 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1808,25 +1808,35 @@ void elv_dequeue_request(struct request_queue *q, struct request *rq) } /** - * __end_that_request_first - end I/O on a request - * @req: the request being processed + * blk_update_request - Special helper function for request stacking drivers + * @rq: the request being processed * @error: %0 for success, < %0 for error - * @nr_bytes: number of bytes to complete + * @nr_bytes: number of bytes to complete @rq * * Description: - * Ends I/O on a number of bytes attached to @req, and sets it up - * for the next range of segments (if any) in the cluster. + * Ends I/O on a number of bytes attached to @rq, but doesn't complete + * the request structure even if @rq doesn't have leftover. + * If @rq has leftover, sets it up for the next range of segments. + * + * This special helper function is only for request stacking drivers + * (e.g. request-based dm) so that they can handle partial completion. + * Actual device drivers should use blk_end_request instead. + * + * Passing the result of blk_rq_bytes() as @nr_bytes guarantees + * %false return from this function. * * Return: - * %0 - we are done with this request, call end_that_request_last() - * %1 - still buffers pending for this request + * %false - this request doesn't have any more data + * %true - this request has more data **/ -static int __end_that_request_first(struct request *req, int error, - int nr_bytes) +bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) { int total_bytes, bio_nbytes, next_idx = 0; struct bio *bio; + if (!req->bio) + return false; + trace_block_rq_complete(req->q, req); /* @@ -1903,8 +1913,16 @@ static int __end_that_request_first(struct request *req, int error, /* * completely done */ - if (!req->bio) - return 0; + if (!req->bio) { + /* + * Reset counters so that the request stacking driver + * can find how many bytes remain in the request + * later. + */ + req->nr_sectors = req->hard_nr_sectors = 0; + req->current_nr_sectors = req->hard_cur_sectors = 0; + return false; + } /* * if the request wasn't completed, update state @@ -1918,29 +1936,31 @@ static int __end_that_request_first(struct request *req, int error, blk_recalc_rq_sectors(req, total_bytes >> 9); blk_recalc_rq_segments(req); - return 1; + return true; } +EXPORT_SYMBOL_GPL(blk_update_request); -static int end_that_request_data(struct request *rq, int error, - unsigned int nr_bytes, unsigned int bidi_bytes) +static bool blk_update_bidi_request(struct request *rq, int error, + unsigned int nr_bytes, + unsigned int bidi_bytes) { - if (rq->bio) { - if (__end_that_request_first(rq, error, nr_bytes)) - return 1; + if (blk_update_request(rq, error, nr_bytes)) + return true; - /* Bidi request must be completed as a whole */ - if (blk_bidi_rq(rq) && - __end_that_request_first(rq->next_rq, error, bidi_bytes)) - return 1; - } + /* Bidi request must be completed as a whole */ + if (unlikely(blk_bidi_rq(rq)) && + blk_update_request(rq->next_rq, error, bidi_bytes)) + return true; - return 0; + add_disk_randomness(rq->rq_disk); + + return false; } /* * queue lock must be held */ -static void end_that_request_last(struct request *req, int error) +static void blk_finish_request(struct request *req, int error) { if (blk_rq_tagged(req)) blk_queue_end_tag(req->q, req); @@ -1966,161 +1986,65 @@ static void end_that_request_last(struct request *req, int error) } /** - * blk_end_io - Generic end_io function to complete a request. - * @rq: the request being processed - * @error: %0 for success, < %0 for error - * @nr_bytes: number of bytes to complete @rq - * @bidi_bytes: number of bytes to complete @rq->next_rq + * blk_end_bidi_request - Complete a bidi request + * @rq: the request to complete + * @error: %0 for success, < %0 for error + * @nr_bytes: number of bytes to complete @rq + * @bidi_bytes: number of bytes to complete @rq->next_rq * * Description: * Ends I/O on a number of bytes attached to @rq and @rq->next_rq. - * If @rq has leftover, sets it up for the next range of segments. + * Drivers that supports bidi can safely call this member for any + * type of request, bidi or uni. In the later case @bidi_bytes is + * just ignored. * * Return: - * %0 - we are done with this request - * %1 - this request is not freed yet, it still has pending buffers. + * %false - we are done with this request + * %true - still buffers pending for this request **/ -static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes, - unsigned int bidi_bytes) +bool blk_end_bidi_request(struct request *rq, int error, + unsigned int nr_bytes, unsigned int bidi_bytes) { struct request_queue *q = rq->q; - unsigned long flags = 0UL; - - if (end_that_request_data(rq, error, nr_bytes, bidi_bytes)) - return 1; + unsigned long flags; - add_disk_randomness(rq->rq_disk); + if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) + return true; spin_lock_irqsave(q->queue_lock, flags); - end_that_request_last(rq, error); + blk_finish_request(rq, error); spin_unlock_irqrestore(q->queue_lock, flags); - return 0; -} - -/** - * blk_end_request - Helper function for drivers to complete the request. - * @rq: the request being processed - * @error: %0 for success, < %0 for error - * @nr_bytes: number of bytes to complete - * - * Description: - * Ends I/O on a number of bytes attached to @rq. - * If @rq has leftover, sets it up for the next range of segments. - * - * Return: - * %0 - we are done with this request - * %1 - still buffers pending for this request - **/ -int blk_end_request(struct request *rq, int error, unsigned int nr_bytes) -{ - return blk_end_io(rq, error, nr_bytes, 0); -} -EXPORT_SYMBOL_GPL(blk_end_request); - -/** - * __blk_end_request - Helper function for drivers to complete the request. - * @rq: the request being processed - * @error: %0 for success, < %0 for error - * @nr_bytes: number of bytes to complete - * - * Description: - * Must be called with queue lock held unlike blk_end_request(). - * - * Return: - * %0 - we are done with this request - * %1 - still buffers pending for this request - **/ -int __blk_end_request(struct request *rq, int error, unsigned int nr_bytes) -{ - if (rq->bio && __end_that_request_first(rq, error, nr_bytes)) - return 1; - - add_disk_randomness(rq->rq_disk); - - end_that_request_last(rq, error); - - return 0; + return false; } -EXPORT_SYMBOL_GPL(__blk_end_request); +EXPORT_SYMBOL_GPL(blk_end_bidi_request); /** - * blk_end_bidi_request - Helper function for drivers to complete bidi request. - * @rq: the bidi request being processed + * __blk_end_bidi_request - Complete a bidi request with queue lock held + * @rq: the request to complete * @error: %0 for success, < %0 for error * @nr_bytes: number of bytes to complete @rq * @bidi_bytes: number of bytes to complete @rq->next_rq * * Description: - * Ends I/O on a number of bytes attached to @rq and @rq->next_rq. + * Identical to blk_end_bidi_request() except that queue lock is + * assumed to be locked on entry and remains so on return. * * Return: - * %0 - we are done with this request - * %1 - still buffers pending for this request - **/ -int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes, - unsigned int bidi_bytes) -{ - return blk_end_io(rq, error, nr_bytes, bidi_bytes); -} -EXPORT_SYMBOL_GPL(blk_end_bidi_request); - -/** - * end_request - end I/O on the current segment of the request - * @req: the request being processed - * @uptodate: error value or %0/%1 uptodate flag - * - * Description: - * Ends I/O on the current segment of a request. If that is the only - * remaining segment, the request is also completed and freed. - * - * This is a remnant of how older block drivers handled I/O completions. - * Modern drivers typically end I/O on the full request in one go, unless - * they have a residual value to account for. For that case this function - * isn't really useful, unless the residual just happens to be the - * full current segment. In other words, don't use this function in new - * code. Use blk_end_request() or __blk_end_request() to end a request. + * %false - we are done with this request + * %true - still buffers pending for this request **/ -void end_request(struct request *req, int uptodate) +bool __blk_end_bidi_request(struct request *rq, int error, + unsigned int nr_bytes, unsigned int bidi_bytes) { - int error = 0; + if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) + return true; - if (uptodate <= 0) - error = uptodate ? uptodate : -EIO; + blk_finish_request(rq, error); - __blk_end_request(req, error, req->hard_cur_sectors << 9); -} -EXPORT_SYMBOL(end_request); - -/** - * blk_update_request - Special helper function for request stacking drivers - * @rq: the request being processed - * @error: %0 for success, < %0 for error - * @nr_bytes: number of bytes to complete @rq - * - * Description: - * Ends I/O on a number of bytes attached to @rq, but doesn't complete - * the request structure even if @rq doesn't have leftover. - * If @rq has leftover, sets it up for the next range of segments. - * - * This special helper function is only for request stacking drivers - * (e.g. request-based dm) so that they can handle partial completion. - * Actual device drivers should use blk_end_request instead. - */ -void blk_update_request(struct request *rq, int error, unsigned int nr_bytes) -{ - if (!end_that_request_data(rq, error, nr_bytes, 0)) { - /* - * These members are not updated in end_that_request_data() - * when all bios are completed. - * Update them so that the request stacking driver can find - * how many bytes remain in the request later. - */ - rq->nr_sectors = rq->hard_nr_sectors = 0; - rq->current_nr_sectors = rq->hard_cur_sectors = 0; - } + return false; } -EXPORT_SYMBOL_GPL(blk_update_request); +EXPORT_SYMBOL_GPL(__blk_end_bidi_request); void blk_rq_bio_prep(struct request_queue *q, struct request *rq, struct bio *bio) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1fa9dcf..501f684 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -840,27 +840,97 @@ extern unsigned int blk_rq_bytes(struct request *rq); extern unsigned int blk_rq_cur_bytes(struct request *rq); /* - * blk_end_request() and friends. - * __blk_end_request() and end_request() must be called with - * the request queue spinlock acquired. + * Request completion related functions. + * + * blk_update_request() completes given number of bytes and updates + * the request without completing it. + * + * blk_end_request() and friends. __blk_end_request() and + * end_request() must be called with the request queue spinlock + * acquired. * * Several drivers define their own end_request and call * blk_end_request() for parts of the original function. * This prevents code duplication in drivers. */ -extern int blk_end_request(struct request *rq, int error, - unsigned int nr_bytes); -extern int __blk_end_request(struct request *rq, int error, - unsigned int nr_bytes); -extern int blk_end_bidi_request(struct request *rq, int error, - unsigned int nr_bytes, unsigned int bidi_bytes); -extern void end_request(struct request *, int); +extern bool blk_update_request(struct request *rq, int error, + unsigned int nr_bytes); +extern bool blk_end_bidi_request(struct request *rq, int error, + unsigned int nr_bytes, + unsigned int bidi_bytes); +extern bool __blk_end_bidi_request(struct request *rq, int error, + unsigned int nr_bytes, + unsigned int bidi_bytes); + +/** + * blk_end_request - Helper function for drivers to complete the request. + * @rq: the request being processed + * @error: %0 for success, < %0 for error + * @nr_bytes: number of bytes to complete + * + * Description: + * Ends I/O on a number of bytes attached to @rq. + * If @rq has leftover, sets it up for the next range of segments. + * + * Return: + * %false - we are done with this request + * %true - still buffers pending for this request + **/ +static inline bool blk_end_request(struct request *rq, int error, + unsigned int nr_bytes) +{ + return blk_end_bidi_request(rq, error, nr_bytes, 0); +} + +/** + * __blk_end_request - Helper function for drivers to complete the request. + * @rq: the request being processed + * @error: %0 for success, < %0 for error + * @nr_bytes: number of bytes to complete + * + * Description: + * Must be called with queue lock held unlike blk_end_request(). + * + * Return: + * %false - we are done with this request + * %true - still buffers pending for this request + **/ +static inline bool __blk_end_request(struct request *rq, int error, + unsigned int nr_bytes) +{ + return __blk_end_bidi_request(rq, error, nr_bytes, 0); +} + +/** + * end_request - end I/O on the current segment of the request + * @rq: the request being processed + * @uptodate: error value or %0/%1 uptodate flag + * + * Description: + * Ends I/O on the current segment of a request. If that is the only + * remaining segment, the request is also completed and freed. + * + * This is a remnant of how older block drivers handled I/O completions. + * Modern drivers typically end I/O on the full request in one go, unless + * they have a residual value to account for. For that case this function + * isn't really useful, unless the residual just happens to be the + * full current segment. In other words, don't use this function in new + * code. Use blk_end_request() or __blk_end_request() to end a request. + **/ +static inline void end_request(struct request *rq, int uptodate) +{ + int error = 0; + + if (uptodate <= 0) + error = uptodate ? uptodate : -EIO; + + __blk_end_bidi_request(rq, error, rq->hard_cur_sectors << 9, 0); +} + extern void blk_complete_request(struct request *); extern void __blk_complete_request(struct request *); extern void blk_abort_request(struct request *); extern void blk_abort_queue(struct request_queue *); -extern void blk_update_request(struct request *rq, int error, - unsigned int nr_bytes); /* * Access functions for manipulating queue properties -- cgit v0.10.2 From b243ddcbe9be146172baa544dadecebf156eda0e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 23 Apr 2009 11:05:18 +0900 Subject: block: move rq->start_time initialization to blk_rq_init() rq->start_time was initialized in init_request_from_bio() so special requests didn't have start_time set. This has been okay as start_time has been used only for fs requests; however, there is no indication of this actually is the case or not. Set rq->start_time in blk_rq_init() and guarantee that all initialized rq's have its start_time set. This improves consistency at virtually no cost and future changes will make use of the timestamp for !bio requests. [ Impact: rq->start_time is valid for all requests ] Signed-off-by: Tejun Heo diff --git a/block/blk-core.c b/block/blk-core.c index 89cc05d..b84250d 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -134,6 +134,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq) rq->cmd_len = BLK_MAX_CDB; rq->tag = -1; rq->ref_count = 1; + rq->start_time = jiffies; } EXPORT_SYMBOL(blk_rq_init); @@ -1099,7 +1100,6 @@ void init_request_from_bio(struct request *req, struct bio *bio) req->errors = 0; req->hard_sector = req->sector = bio->bi_sector; req->ioprio = bio_prio(bio); - req->start_time = jiffies; blk_rq_bio_prep(req->q, req, bio); } -- cgit v0.10.2 From 40cbbb781d3eba5d6ac0860db078af490e5c7c6b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 23 Apr 2009 11:05:19 +0900 Subject: block: implement and use [__]blk_end_request_all() There are many [__]blk_end_request() call sites which call it with full request length and expect full completion. Many of them ensure that the request actually completes by doing BUG_ON() the return value, which is awkward and error-prone. This patch adds [__]blk_end_request_all() which takes @rq and @error and fully completes the request. BUG_ON() is added to to ensure that this actually happens. Most conversions are simple but there are a few noteworthy ones. * cdrom/viocd: viocd_end_request() replaced with direct calls to __blk_end_request_all(). * s390/block/dasd: dasd_end_request() replaced with direct calls to __blk_end_request_all(). * s390/char/tape_block: tapeblock_end_request() replaced with direct calls to blk_end_request_all(). [ Impact: cleanup ] Signed-off-by: Tejun Heo Cc: Russell King Cc: Stephen Rothwell Cc: Mike Miller Cc: Martin Schwidefsky Cc: Jeff Garzik Cc: Rusty Russell Cc: Jeremy Fitzhardinge Cc: Alex Dubov Cc: James Bottomley diff --git a/arch/arm/plat-omap/mailbox.c b/arch/arm/plat-omap/mailbox.c index 0abfbaa..cf81bad 100644 --- a/arch/arm/plat-omap/mailbox.c +++ b/arch/arm/plat-omap/mailbox.c @@ -192,8 +192,7 @@ static void mbox_tx_work(struct work_struct *work) } spin_lock(q->queue_lock); - if (__blk_end_request(rq, 0, 0)) - BUG(); + __blk_end_request_all(rq, 0); spin_unlock(q->queue_lock); } } @@ -224,10 +223,7 @@ static void mbox_rx_work(struct work_struct *work) break; msg = (mbox_msg_t) rq->data; - - if (blk_end_request(rq, 0, 0)) - BUG(); - + blk_end_request_all(rq, 0); mbox->rxq->callback((void *)msg); } } @@ -337,8 +333,7 @@ omap_mbox_read(struct device *dev, struct device_attribute *attr, char *buf) *p = (mbox_msg_t) rq->data; - if (blk_end_request(rq, 0, 0)) - BUG(); + blk_end_request_all(rq, 0); if (unlikely(mbox_seq_test(mbox, *p))) { pr_info("mbox: Illegal seq bit!(%08x) ignored\n", *p); diff --git a/block/blk-barrier.c b/block/blk-barrier.c index 20b4111..c8d0876 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -106,10 +106,7 @@ bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) */ q->ordseq = 0; rq = q->orig_bar_rq; - - if (__blk_end_request(rq, q->orderr, blk_rq_bytes(rq))) - BUG(); - + __blk_end_request_all(rq, q->orderr); return true; } @@ -252,9 +249,7 @@ bool blk_do_ordered(struct request_queue *q, struct request **rqp) * with prejudice. */ elv_dequeue_request(q, rq); - if (__blk_end_request(rq, -EOPNOTSUPP, - blk_rq_bytes(rq))) - BUG(); + __blk_end_request_all(rq, -EOPNOTSUPP); *rqp = NULL; return false; } diff --git a/block/blk-core.c b/block/blk-core.c index b84250d..0520cc7 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1780,7 +1780,7 @@ struct request *elv_next_request(struct request_queue *q) break; } else if (ret == BLKPREP_KILL) { rq->cmd_flags |= REQ_QUIET; - __blk_end_request(rq, -EIO, blk_rq_bytes(rq)); + __blk_end_request_all(rq, -EIO); } else { printk(KERN_ERR "%s: bad return=%d\n", __func__, ret); break; diff --git a/block/elevator.c b/block/elevator.c index b03b875..1af5d9f 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -810,7 +810,7 @@ void elv_abort_queue(struct request_queue *q) rq = list_entry_rq(q->queue_head.next); rq->cmd_flags |= REQ_QUIET; trace_block_rq_abort(q, rq); - __blk_end_request(rq, -EIO, blk_rq_bytes(rq)); + __blk_end_request_all(rq, -EIO); } } EXPORT_SYMBOL(elv_abort_queue); diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index ca268ca..488a8f4 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -1024,8 +1024,7 @@ static inline void complete_command(cmdlist_t *cmd, int timeout) cmd->req.sg[i].size, ddir); DBGPX(printk("Done with %p\n", rq);); - if (__blk_end_request(rq, error, blk_rq_bytes(rq))) - BUG(); + __blk_end_request_all(rq, error); } /* diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c index ff0448e..60e85bb 100644 --- a/drivers/block/sx8.c +++ b/drivers/block/sx8.c @@ -749,8 +749,7 @@ static inline void carm_end_request_queued(struct carm_host *host, struct request *req = crq->rq; int rc; - rc = __blk_end_request(req, error, blk_rq_bytes(req)); - assert(rc == 0); + __blk_end_request_all(req, error); rc = carm_put_request(host, crq); assert(rc == 0); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 5d34764..50745e6 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -62,7 +62,7 @@ static void blk_done(struct virtqueue *vq) break; } - __blk_end_request(vbr->req, error, blk_rq_bytes(vbr->req)); + __blk_end_request_all(vbr->req, error); list_del(&vbr->list); mempool_free(vbr, vblk->pool); } diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 8f90508..cd6cfe3 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -551,7 +551,6 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) for (i = info->ring.rsp_cons; i != rp; i++) { unsigned long id; - int ret; bret = RING_GET_RESPONSE(&info->ring, i); id = bret->id; @@ -578,8 +577,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) dev_dbg(&info->xbdev->dev, "Bad return from blkdev data " "request: %x\n", bret->status); - ret = __blk_end_request(req, error, blk_rq_bytes(req)); - BUG_ON(ret); + __blk_end_request_all(req, error); break; default: BUG(); diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index 2eecb77..fee9a9e 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -632,7 +632,7 @@ static void gdrom_readdisk_dma(struct work_struct *work) * before handling ending the request */ spin_lock(&gdrom_lock); list_del_init(&req->queuelist); - __blk_end_request(req, err, blk_rq_bytes(req)); + __blk_end_request_all(req, err); } spin_unlock(&gdrom_lock); kfree(read_command); diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c index 1392935..cc3efa0 100644 --- a/drivers/cdrom/viocd.c +++ b/drivers/cdrom/viocd.c @@ -291,23 +291,6 @@ static int send_request(struct request *req) return 0; } -static void viocd_end_request(struct request *req, int error) -{ - int nsectors = req->hard_nr_sectors; - - /* - * Make sure it's fully ended, and ensure that we process - * at least one sector. - */ - if (blk_pc_request(req)) - nsectors = (req->data_len + 511) >> 9; - if (!nsectors) - nsectors = 1; - - if (__blk_end_request(req, error, nsectors << 9)) - BUG(); -} - static int rwreq; static void do_viocd_request(struct request_queue *q) @@ -316,11 +299,11 @@ static void do_viocd_request(struct request_queue *q) while ((rwreq == 0) && ((req = elv_next_request(q)) != NULL)) { if (!blk_fs_request(req)) - viocd_end_request(req, -EIO); + __blk_end_request_all(req, -EIO); else if (send_request(req) < 0) { printk(VIOCD_KERN_WARNING "unable to send message to OS/400!"); - viocd_end_request(req, -EIO); + __blk_end_request_all(req, -EIO); } else rwreq++; } @@ -531,9 +514,9 @@ return_complete: "with rc %d:0x%04X: %s\n", req, event->xRc, bevent->sub_result, err->msg); - viocd_end_request(req, -EIO); + __blk_end_request_all(req, -EIO); } else - viocd_end_request(req, 0); + __blk_end_request_all(req, 0); /* restart handling of incoming requests */ spin_unlock_irqrestore(&viocd_reqlock, flags); diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index de143de..a416346 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -826,7 +826,7 @@ static void mspro_block_submit_req(struct request_queue *q) if (msb->eject) { while ((req = elv_next_request(q)) != NULL) - __blk_end_request(req, -ENODEV, blk_rq_bytes(req)); + __blk_end_request_all(req, -ENODEV); return; } diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index d181527..fabec95 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1614,15 +1614,6 @@ void dasd_block_clear_timer(struct dasd_block *block) } /* - * posts the buffer_cache about a finalized request - */ -static inline void dasd_end_request(struct request *req, int error) -{ - if (__blk_end_request(req, error, blk_rq_bytes(req))) - BUG(); -} - -/* * Process finished error recovery ccw. */ static inline void __dasd_block_process_erp(struct dasd_block *block, @@ -1676,7 +1667,7 @@ static void __dasd_process_request_queue(struct dasd_block *block) "Rejecting write request %p", req); blkdev_dequeue_request(req); - dasd_end_request(req, -EIO); + __blk_end_request_all(req, -EIO); continue; } cqr = basedev->discipline->build_cp(basedev, block, req); @@ -1705,7 +1696,7 @@ static void __dasd_process_request_queue(struct dasd_block *block) "on request %p", PTR_ERR(cqr), req); blkdev_dequeue_request(req); - dasd_end_request(req, -EIO); + __blk_end_request_all(req, -EIO); continue; } /* @@ -1731,7 +1722,7 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr) status = cqr->block->base->discipline->free_cp(cqr, req); if (status <= 0) error = status ? status : -EIO; - dasd_end_request(req, error); + __blk_end_request_all(req, error); } /* @@ -2040,7 +2031,7 @@ static void dasd_flush_request_queue(struct dasd_block *block) spin_lock_irq(&block->request_queue_lock); while ((req = elv_next_request(block->request_queue))) { blkdev_dequeue_request(req); - dasd_end_request(req, -EIO); + __blk_end_request_all(req, -EIO); } spin_unlock_irq(&block->request_queue_lock); } diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c index f32e89e..86596d3 100644 --- a/drivers/s390/char/tape_block.c +++ b/drivers/s390/char/tape_block.c @@ -74,13 +74,6 @@ tapeblock_trigger_requeue(struct tape_device *device) * Post finished request. */ static void -tapeblock_end_request(struct request *req, int error) -{ - if (blk_end_request(req, error, blk_rq_bytes(req))) - BUG(); -} - -static void __tapeblock_end_request(struct tape_request *ccw_req, void *data) { struct tape_device *device; @@ -90,7 +83,7 @@ __tapeblock_end_request(struct tape_request *ccw_req, void *data) device = ccw_req->device; req = (struct request *) data; - tapeblock_end_request(req, (ccw_req->rc == 0) ? 0 : -EIO); + blk_end_request_all(req, (ccw_req->rc == 0) ? 0 : -EIO); if (ccw_req->rc == 0) /* Update position. */ device->blk_data.block_position = @@ -118,7 +111,7 @@ tapeblock_start_request(struct tape_device *device, struct request *req) ccw_req = device->discipline->bread(device, req); if (IS_ERR(ccw_req)) { DBF_EVENT(1, "TBLOCK: bread failed\n"); - tapeblock_end_request(req, -EIO); + blk_end_request_all(req, -EIO); return PTR_ERR(ccw_req); } ccw_req->callback = __tapeblock_end_request; @@ -131,7 +124,7 @@ tapeblock_start_request(struct tape_device *device, struct request *req) * Start/enqueueing failed. No retries in * this case. */ - tapeblock_end_request(req, -EIO); + blk_end_request_all(req, -EIO); device->discipline->free_bread(ccw_req); } @@ -177,7 +170,7 @@ tapeblock_requeue(struct work_struct *work) { DBF_EVENT(1, "TBLOCK: Rejecting write request\n"); blkdev_dequeue_request(req); spin_unlock_irq(&device->blk_data.request_queue_lock); - tapeblock_end_request(req, -EIO); + blk_end_request_all(req, -EIO); spin_lock_irq(&device->blk_data.request_queue_lock); continue; } diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index d1cb64a..756ac7c 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -922,7 +922,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) if (driver_byte(result) & DRIVER_SENSE) scsi_print_sense("", cmd); } - blk_end_request(req, -EIO, blk_rq_bytes(req)); + blk_end_request_all(req, -EIO); scsi_next_command(cmd); break; case ACTION_REPREP: diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 501f684..e33c835 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -883,6 +883,22 @@ static inline bool blk_end_request(struct request *rq, int error, } /** + * blk_end_request_all - Helper function for drives to finish the request. + * @rq: the request to finish + * @err: %0 for success, < %0 for error + * + * Description: + * Completely finish @rq. + */ +static inline void blk_end_request_all(struct request *rq, int error) +{ + bool pending; + + pending = blk_end_request(rq, error, blk_rq_bytes(rq)); + BUG_ON(pending); +} + +/** * __blk_end_request - Helper function for drivers to complete the request. * @rq: the request being processed * @error: %0 for success, < %0 for error @@ -902,6 +918,22 @@ static inline bool __blk_end_request(struct request *rq, int error, } /** + * __blk_end_request_all - Helper function for drives to finish the request. + * @rq: the request to finish + * @err: %0 for success, < %0 for error + * + * Description: + * Completely finish @rq. Must be called with queue lock held. + */ +static inline void __blk_end_request_all(struct request *rq, int error) +{ + bool pending; + + pending = __blk_end_request(rq, error, blk_rq_bytes(rq)); + BUG_ON(pending); +} + +/** * end_request - end I/O on the current segment of the request * @rq: the request being processed * @uptodate: error value or %0/%1 uptodate flag -- cgit v0.10.2 From f06d9a2b52e246a66b606130cea3f0d7b7be17a7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 23 Apr 2009 11:05:19 +0900 Subject: block: replace end_request() with [__]blk_end_request_cur() end_request() has been kept around for backward compatibility; however, it's about time for it to go away. * There aren't too many users left. * Its use of @updtodate is pretty confusing. * In some cases, newer code ends up using mixture of end_request() and [__]blk_end_request[_all](), which is way too confusing. So, add [__]blk_end_request_cur() and replace end_request() with it. Most conversions are straightforward. Noteworthy ones are... * paride/pcd: next_request() updated to take 0/-errno instead of 1/0. * paride/pf: pf_end_request() and next_request() updated to take 0/-errno instead of 1/0. * xd: xd_readwrite() updated to return 0/-errno instead of 1/0. * mtd/mtd_blkdevs: blktrans_discard_request() updated to return 0/-errno instead of 1/0. Unnecessary local variable res initialization removed from mtd_blktrans_thread(). [ Impact: cleanup ] Signed-off-by: Tejun Heo Acked-by: Joerg Dorchain Acked-by: Geert Uytterhoeven Acked-by: Grant Likely Acked-by: Laurent Vivier Cc: Tim Waugh Cc: Stephen Rothwell Cc: Paul Mackerras Cc: Jeremy Fitzhardinge Cc: Markus Lidel Cc: David Woodhouse Cc: Pete Zaitcev Cc: unsik Kim diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 8df436f..b99a2a6 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -1359,7 +1359,7 @@ static void redo_fd_request(void) #endif block = CURRENT->sector + cnt; if ((int)block > floppy->blocks) { - end_request(CURRENT, 0); + __blk_end_request_cur(CURRENT, -EIO); goto repeat; } @@ -1373,11 +1373,11 @@ static void redo_fd_request(void) if ((rq_data_dir(CURRENT) != READ) && (rq_data_dir(CURRENT) != WRITE)) { printk(KERN_WARNING "do_fd_request: unknown command\n"); - end_request(CURRENT, 0); + __blk_end_request_cur(CURRENT, -EIO); goto repeat; } if (get_track(drive, track) == -1) { - end_request(CURRENT, 0); + __blk_end_request_cur(CURRENT, -EIO); goto repeat; } @@ -1391,7 +1391,7 @@ static void redo_fd_request(void) /* keep the drive spinning while writes are scheduled */ if (!fd_motor_on(drive)) { - end_request(CURRENT, 0); + __blk_end_request_cur(CURRENT, -EIO); goto repeat; } /* @@ -1410,7 +1410,7 @@ static void redo_fd_request(void) CURRENT->nr_sectors -= CURRENT->current_nr_sectors; CURRENT->sector += CURRENT->current_nr_sectors; - end_request(CURRENT, 1); + __blk_end_request_cur(CURRENT, 0); goto repeat; } diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 4234c11..44a8702 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -612,7 +612,7 @@ static void fd_error( void ) CURRENT->errors++; if (CURRENT->errors >= MAX_ERRORS) { printk(KERN_ERR "fd%d: too many errors.\n", SelectedDrive ); - end_request(CURRENT, 0); + __blk_end_request_cur(CURRENT, -EIO); } else if (CURRENT->errors == RECALIBRATE_ERRORS) { printk(KERN_WARNING "fd%d: recalibrating\n", SelectedDrive ); @@ -734,7 +734,7 @@ static void do_fd_action( int drive ) /* all sectors finished */ CURRENT->nr_sectors -= CURRENT->current_nr_sectors; CURRENT->sector += CURRENT->current_nr_sectors; - end_request(CURRENT, 1); + __blk_end_request_cur(CURRENT, 0); redo_fd_request(); return; } @@ -1141,7 +1141,7 @@ static void fd_rwsec_done1(int status) /* all sectors finished */ CURRENT->nr_sectors -= CURRENT->current_nr_sectors; CURRENT->sector += CURRENT->current_nr_sectors; - end_request(CURRENT, 1); + __blk_end_request_cur(CURRENT, 0); redo_fd_request(); } return; @@ -1414,7 +1414,7 @@ repeat: if (!UD.connected) { /* drive not connected */ printk(KERN_ERR "Unknown Device: fd%d\n", drive ); - end_request(CURRENT, 0); + __blk_end_request_cur(CURRENT, -EIO); goto repeat; } @@ -1430,12 +1430,12 @@ repeat: /* user supplied disk type */ if (--type >= NUM_DISK_MINORS) { printk(KERN_WARNING "fd%d: invalid disk format", drive ); - end_request(CURRENT, 0); + __blk_end_request_cur(CURRENT, -EIO); goto repeat; } if (minor2disktype[type].drive_types > DriveType) { printk(KERN_WARNING "fd%d: unsupported disk format", drive ); - end_request(CURRENT, 0); + __blk_end_request_cur(CURRENT, -EIO); goto repeat; } type = minor2disktype[type].index; @@ -1445,7 +1445,7 @@ repeat: } if (CURRENT->sector + 1 > UDT->blocks) { - end_request(CURRENT, 0); + __blk_end_request_cur(CURRENT, -EIO); goto repeat; } diff --git a/drivers/block/hd.c b/drivers/block/hd.c index baaa9e4..5cb300b 100644 --- a/drivers/block/hd.c +++ b/drivers/block/hd.c @@ -410,7 +410,7 @@ static void bad_rw_intr(void) if (req != NULL) { struct hd_i_struct *disk = req->rq_disk->private_data; if (++req->errors >= MAX_ERRORS || (hd_error & BBD_ERR)) { - end_request(req, 0); + __blk_end_request_cur(req, -EIO); disk->special_op = disk->recalibrate = 1; } else if (req->errors % RESET_FREQ == 0) reset = 1; @@ -466,7 +466,7 @@ ok_to_read: req->buffer+512); #endif if (req->current_nr_sectors <= 0) - end_request(req, 1); + __blk_end_request_cur(req, 0); if (i > 0) { SET_HANDLER(&read_intr); return; @@ -505,7 +505,7 @@ ok_to_write: --req->current_nr_sectors; req->buffer += 512; if (!i || (req->bio && req->current_nr_sectors <= 0)) - end_request(req, 1); + __blk_end_request_cur(req, 0); if (i > 0) { SET_HANDLER(&write_intr); outsw(HD_DATA, req->buffer, 256); @@ -548,7 +548,7 @@ static void hd_times_out(unsigned long dummy) #ifdef DEBUG printk("%s: too many errors\n", name); #endif - end_request(CURRENT, 0); + __blk_end_request_cur(CURRENT, -EIO); } hd_request(); spin_unlock_irq(hd_queue->queue_lock); @@ -563,7 +563,7 @@ static int do_special_op(struct hd_i_struct *disk, struct request *req) } if (disk->head > 16) { printk("%s: cannot handle device with more than 16 heads - giving up\n", req->rq_disk->disk_name); - end_request(req, 0); + __blk_end_request_cur(req, -EIO); } disk->special_op = 0; return 1; @@ -607,7 +607,7 @@ repeat: ((block+nsect) > get_capacity(req->rq_disk))) { printk("%s: bad access: block=%d, count=%d\n", req->rq_disk->disk_name, block, nsect); - end_request(req, 0); + __blk_end_request_cur(req, -EIO); goto repeat; } @@ -647,7 +647,7 @@ repeat: break; default: printk("unknown hd-command\n"); - end_request(req, 0); + __blk_end_request_cur(req, -EIO); break; } } diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c index f389835..408c2bd 100644 --- a/drivers/block/mg_disk.c +++ b/drivers/block/mg_disk.c @@ -285,7 +285,7 @@ static void mg_bad_rw_intr(struct mg_host *host) if (req != NULL) if (++req->errors >= MG_MAX_ERRORS || host->error == MG_ERR_TIMEOUT) - end_request(req, 0); + __blk_end_request_cur(req, -EIO); } static unsigned int mg_out(struct mg_host *host, @@ -351,7 +351,7 @@ static void mg_read(struct request *req) if (req->current_nr_sectors <= 0) { MG_DBG("remain : %d sects\n", remains); - end_request(req, 1); + __blk_end_request_cur(req, 0); if (remains > 0) req = elv_next_request(host->breq); } @@ -395,7 +395,7 @@ static void mg_write(struct request *req) if (req->current_nr_sectors <= 0) { MG_DBG("remain : %d sects\n", remains); - end_request(req, 1); + __blk_end_request_cur(req, 0); if (remains > 0) req = elv_next_request(host->breq); } @@ -448,7 +448,7 @@ ok_to_read: /* let know if current segment done */ if (req->current_nr_sectors <= 0) - end_request(req, 1); + __blk_end_request_cur(req, 0); /* set handler if read remains */ if (i > 0) { @@ -497,7 +497,7 @@ ok_to_write: /* let know if current segment or all done */ if (!i || (req->bio && req->current_nr_sectors <= 0)) - end_request(req, 1); + __blk_end_request_cur(req, 0); /* write 1 sector and set handler if remains */ if (i > 0) { @@ -563,7 +563,7 @@ static void mg_request_poll(struct request_queue *q) default: printk(KERN_WARNING "%s:%d unknown command\n", __func__, __LINE__); - end_request(req, 0); + __blk_end_request_cur(req, -EIO); break; } } @@ -617,7 +617,7 @@ static unsigned int mg_issue_req(struct request *req, default: printk(KERN_WARNING "%s:%d unknown command\n", __func__, __LINE__); - end_request(req, 0); + __blk_end_request_cur(req, -EIO); break; } return MG_ERR_NONE; @@ -655,7 +655,7 @@ static void mg_request(struct request_queue *q) "%s: bad access: sector=%d, count=%d\n", req->rq_disk->disk_name, sect_num, sect_cnt); - end_request(req, 0); + __blk_end_request_cur(req, -EIO); continue; } diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index e91d4b4..9fd57c2 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -735,16 +735,16 @@ static void do_pcd_request(struct request_queue * q) ps_set_intr(do_pcd_read, NULL, 0, nice); return; } else - end_request(pcd_req, 0); + __blk_end_request_cur(pcd_req, -EIO); } } -static inline void next_request(int success) +static inline void next_request(int err) { unsigned long saved_flags; spin_lock_irqsave(&pcd_lock, saved_flags); - end_request(pcd_req, success); + __blk_end_request_cur(pcd_req, err); pcd_busy = 0; do_pcd_request(pcd_queue); spin_unlock_irqrestore(&pcd_lock, saved_flags); @@ -781,7 +781,7 @@ static void pcd_start(void) if (pcd_command(pcd_current, rd_cmd, 2048, "read block")) { pcd_bufblk = -1; - next_request(0); + next_request(-EIO); return; } @@ -796,7 +796,7 @@ static void do_pcd_read(void) pcd_retries = 0; pcd_transfer(); if (!pcd_count) { - next_request(1); + next_request(0); return; } @@ -815,7 +815,7 @@ static void do_pcd_read_drq(void) return; } pcd_bufblk = -1; - next_request(0); + next_request(-EIO); return; } diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 9299455..0732df4 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -410,7 +410,8 @@ static void run_fsm(void) pd_claimed = 0; phase = NULL; spin_lock_irqsave(&pd_lock, saved_flags); - end_request(pd_req, res); + __blk_end_request_cur(pd_req, + res == Ok ? 0 : -EIO); pd_req = elv_next_request(pd_queue); if (!pd_req) stop = 1; @@ -477,7 +478,7 @@ static int pd_next_buf(void) if (pd_count) return 0; spin_lock_irqsave(&pd_lock, saved_flags); - end_request(pd_req, 1); + __blk_end_request_cur(pd_req, 0); pd_count = pd_req->current_nr_sectors; pd_buf = pd_req->buffer; spin_unlock_irqrestore(&pd_lock, saved_flags); diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index bef3b99..3871e35 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c @@ -750,10 +750,10 @@ static int pf_ready(void) static struct request_queue *pf_queue; -static void pf_end_request(int uptodate) +static void pf_end_request(int err) { if (pf_req) { - end_request(pf_req, uptodate); + __blk_end_request_cur(pf_req, err); pf_req = NULL; } } @@ -773,7 +773,7 @@ repeat: pf_count = pf_req->current_nr_sectors; if (pf_block + pf_count > get_capacity(pf_req->rq_disk)) { - pf_end_request(0); + pf_end_request(-EIO); goto repeat; } @@ -788,7 +788,7 @@ repeat: pi_do_claimed(pf_current->pi, do_pf_write); else { pf_busy = 0; - pf_end_request(0); + pf_end_request(-EIO); goto repeat; } } @@ -805,7 +805,7 @@ static int pf_next_buf(void) return 1; if (!pf_count) { spin_lock_irqsave(&pf_spin_lock, saved_flags); - pf_end_request(1); + pf_end_request(0); pf_req = elv_next_request(pf_queue); spin_unlock_irqrestore(&pf_spin_lock, saved_flags); if (!pf_req) @@ -816,12 +816,12 @@ static int pf_next_buf(void) return 0; } -static inline void next_request(int success) +static inline void next_request(int err) { unsigned long saved_flags; spin_lock_irqsave(&pf_spin_lock, saved_flags); - pf_end_request(success); + pf_end_request(err); pf_busy = 0; do_pf_request(pf_queue); spin_unlock_irqrestore(&pf_spin_lock, saved_flags); @@ -844,7 +844,7 @@ static void do_pf_read_start(void) pi_do_claimed(pf_current->pi, do_pf_read_start); return; } - next_request(0); + next_request(-EIO); return; } pf_mask = STAT_DRQ; @@ -863,7 +863,7 @@ static void do_pf_read_drq(void) pi_do_claimed(pf_current->pi, do_pf_read_start); return; } - next_request(0); + next_request(-EIO); return; } pi_read_block(pf_current->pi, pf_buf, 512); @@ -871,7 +871,7 @@ static void do_pf_read_drq(void) break; } pi_disconnect(pf_current->pi); - next_request(1); + next_request(0); } static void do_pf_write(void) @@ -890,7 +890,7 @@ static void do_pf_write_start(void) pi_do_claimed(pf_current->pi, do_pf_write_start); return; } - next_request(0); + next_request(-EIO); return; } @@ -903,7 +903,7 @@ static void do_pf_write_start(void) pi_do_claimed(pf_current->pi, do_pf_write_start); return; } - next_request(0); + next_request(-EIO); return; } pi_write_block(pf_current->pi, pf_buf, 512); @@ -923,11 +923,11 @@ static void do_pf_write_done(void) pi_do_claimed(pf_current->pi, do_pf_write_start); return; } - next_request(0); + next_request(-EIO); return; } pi_disconnect(pf_current->pi); - next_request(1); + next_request(0); } static int __init pf_init(void) diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index bccc42b..d23b54b 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -158,7 +158,7 @@ static int ps3disk_submit_request_sg(struct ps3_storage_device *dev, if (res) { dev_err(&dev->sbd.core, "%s:%u: %s failed %d\n", __func__, __LINE__, op, res); - end_request(req, 0); + __blk_end_request_cur(req, -EIO); return 0; } @@ -180,7 +180,7 @@ static int ps3disk_submit_flush_request(struct ps3_storage_device *dev, if (res) { dev_err(&dev->sbd.core, "%s:%u: sync cache failed 0x%llx\n", __func__, __LINE__, res); - end_request(req, 0); + __blk_end_request_cur(req, -EIO); return 0; } @@ -205,7 +205,7 @@ static void ps3disk_do_request(struct ps3_storage_device *dev, break; } else { blk_dump_rq_flags(req, DEVICE_NAME " bad request"); - end_request(req, 0); + __blk_end_request_cur(req, -EIO); continue; } } diff --git a/drivers/block/swim.c b/drivers/block/swim.c index d22cc38..6544a7b 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -532,39 +532,39 @@ static void redo_fd_request(struct request_queue *q) fs = req->rq_disk->private_data; if (req->sector < 0 || req->sector >= fs->total_secs) { - end_request(req, 0); + __blk_end_request_cur(req, -EIO); continue; } if (req->current_nr_sectors == 0) { - end_request(req, 1); + __blk_end_request_cur(req, 0); continue; } if (!fs->disk_in) { - end_request(req, 0); + __blk_end_request_cur(req, -EIO); continue; } if (rq_data_dir(req) == WRITE) { if (fs->write_protected) { - end_request(req, 0); + __blk_end_request_cur(req, -EIO); continue; } } switch (rq_data_dir(req)) { case WRITE: /* NOT IMPLEMENTED */ - end_request(req, 0); + __blk_end_request_cur(req, -EIO); break; case READ: if (floppy_read_sectors(fs, req->sector, req->current_nr_sectors, req->buffer)) { - end_request(req, 0); + __blk_end_request_cur(req, -EIO); continue; } req->nr_sectors -= req->current_nr_sectors; req->sector += req->current_nr_sectors; req->buffer += req->current_nr_sectors * 512; - end_request(req, 1); + __blk_end_request_cur(req, 0); break; } } diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index 6129653..5904f7b 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -320,15 +320,15 @@ static void start_request(struct floppy_state *fs) #endif if (req->sector < 0 || req->sector >= fs->total_secs) { - end_request(req, 0); + __blk_end_request_cur(req, -EIO); continue; } if (req->current_nr_sectors == 0) { - end_request(req, 1); + __blk_end_request_cur(req, 0); continue; } if (fs->ejected) { - end_request(req, 0); + __blk_end_request_cur(req, -EIO); continue; } @@ -336,7 +336,7 @@ static void start_request(struct floppy_state *fs) if (fs->write_prot < 0) fs->write_prot = swim3_readbit(fs, WRITE_PROT); if (fs->write_prot) { - end_request(req, 0); + __blk_end_request_cur(req, -EIO); continue; } } @@ -508,7 +508,7 @@ static void act(struct floppy_state *fs) case do_transfer: if (fs->cur_cyl != fs->req_cyl) { if (fs->retries > 5) { - end_request(fd_req, 0); + __blk_end_request_cur(fd_req, -EIO); fs->state = idle; return; } @@ -540,7 +540,7 @@ static void scan_timeout(unsigned long data) out_8(&sw->intr_enable, 0); fs->cur_cyl = -1; if (fs->retries > 5) { - end_request(fd_req, 0); + __blk_end_request_cur(fd_req, -EIO); fs->state = idle; start_request(fs); } else { @@ -559,7 +559,7 @@ static void seek_timeout(unsigned long data) out_8(&sw->select, RELAX); out_8(&sw->intr_enable, 0); printk(KERN_ERR "swim3: seek timeout\n"); - end_request(fd_req, 0); + __blk_end_request_cur(fd_req, -EIO); fs->state = idle; start_request(fs); } @@ -583,7 +583,7 @@ static void settle_timeout(unsigned long data) return; } printk(KERN_ERR "swim3: seek settle timeout\n"); - end_request(fd_req, 0); + __blk_end_request_cur(fd_req, -EIO); fs->state = idle; start_request(fs); } @@ -615,7 +615,7 @@ static void xfer_timeout(unsigned long data) fd_req->current_nr_sectors -= s; printk(KERN_ERR "swim3: timeout %sing sector %ld\n", (rq_data_dir(fd_req)==WRITE? "writ": "read"), (long)fd_req->sector); - end_request(fd_req, 0); + __blk_end_request_cur(fd_req, -EIO); fs->state = idle; start_request(fs); } @@ -646,7 +646,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id) printk(KERN_ERR "swim3: seen sector but cyl=ff?\n"); fs->cur_cyl = -1; if (fs->retries > 5) { - end_request(fd_req, 0); + __blk_end_request_cur(fd_req, -EIO); fs->state = idle; start_request(fs); } else { @@ -731,7 +731,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id) printk("swim3: error %sing block %ld (err=%x)\n", rq_data_dir(fd_req) == WRITE? "writ": "read", (long)fd_req->sector, err); - end_request(fd_req, 0); + __blk_end_request_cur(fd_req, -EIO); fs->state = idle; } } else { @@ -740,7 +740,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id) printk(KERN_ERR "swim3: fd dma: stat=%x resid=%d\n", stat, resid); printk(KERN_ERR " state=%d, dir=%x, intr=%x, err=%x\n", fs->state, rq_data_dir(fd_req), intr, err); - end_request(fd_req, 0); + __blk_end_request_cur(fd_req, -EIO); fs->state = idle; start_request(fs); break; @@ -749,7 +749,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id) fd_req->current_nr_sectors -= fs->scount; fd_req->buffer += fs->scount * 512; if (fd_req->current_nr_sectors <= 0) { - end_request(fd_req, 1); + __blk_end_request_cur(fd_req, 0); fs->state = idle; } else { fs->req_sector += fs->scount; diff --git a/drivers/block/xd.c b/drivers/block/xd.c index 64b496f..6f6ad82 100644 --- a/drivers/block/xd.c +++ b/drivers/block/xd.c @@ -314,21 +314,22 @@ static void do_xd_request (struct request_queue * q) int retry; if (!blk_fs_request(req)) { - end_request(req, 0); + __blk_end_request_cur(req, -EIO); continue; } if (block + count > get_capacity(req->rq_disk)) { - end_request(req, 0); + __blk_end_request_cur(req, -EIO); continue; } if (rw != READ && rw != WRITE) { printk("do_xd_request: unknown request\n"); - end_request(req, 0); + __blk_end_request_cur(req, -EIO); continue; } for (retry = 0; (retry < XD_RETRIES) && !res; retry++) res = xd_readwrite(rw, disk, req->buffer, block, count); - end_request(req, res); /* wrap up, 0 = fail, 1 = success */ + /* wrap up, 0 = success, -errno = fail */ + __blk_end_request_cur(req, res); } } @@ -418,7 +419,7 @@ static int xd_readwrite (u_char operation,XD_INFO *p,char *buffer,u_int block,u_ printk("xd%c: %s timeout, recalibrating drive\n",'a'+drive,(operation == READ ? "read" : "write")); xd_recalibrate(drive); spin_lock_irq(&xd_lock); - return (0); + return -EIO; case 2: if (sense[0] & 0x30) { printk("xd%c: %s - ",'a'+drive,(operation == READ ? "reading" : "writing")); @@ -439,7 +440,7 @@ static int xd_readwrite (u_char operation,XD_INFO *p,char *buffer,u_int block,u_ else printk(" - no valid disk address\n"); spin_lock_irq(&xd_lock); - return (0); + return -EIO; } if (xd_dma_buffer) for (i=0; i < (temp * 0x200); i++) @@ -448,7 +449,7 @@ static int xd_readwrite (u_char operation,XD_INFO *p,char *buffer,u_int block,u_ count -= temp, buffer += temp * 0x200, block += temp; } spin_lock_irq(&xd_lock); - return (1); + return 0; } /* xd_recalibrate: recalibrate a given drive and reset controller if necessary */ diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index cd6cfe3..b456447 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -302,7 +302,7 @@ static void do_blkif_request(struct request_queue *rq) while ((req = elv_next_request(rq)) != NULL) { info = req->rq_disk->private_data; if (!blk_fs_request(req)) { - end_request(req, 0); + __blk_end_request_cur(req, -EIO); continue; } diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c index 4aecf5d..b1e1d7e 100644 --- a/drivers/block/xsysace.c +++ b/drivers/block/xsysace.c @@ -466,7 +466,7 @@ struct request *ace_get_next_request(struct request_queue * q) while ((req = elv_next_request(q)) != NULL) { if (blk_fs_request(req)) break; - end_request(req, 0); + __blk_end_request_cur(req, -EIO); } return req; } @@ -494,7 +494,7 @@ static void ace_fsm_dostate(struct ace_device *ace) /* Drop all pending requests */ while ((req = elv_next_request(ace->queue)) != NULL) - end_request(req, 0); + __blk_end_request_cur(req, -EIO); /* Drop back to IDLE state and notify waiters */ ace->fsm_state = ACE_FSM_STATE_IDLE; diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c index 80754cd..b66ad58 100644 --- a/drivers/block/z2ram.c +++ b/drivers/block/z2ram.c @@ -77,7 +77,7 @@ static void do_z2_request(struct request_queue *q) if (start + len > z2ram_size) { printk( KERN_ERR DEVICE_NAME ": bad access: block=%lu, count=%u\n", req->sector, req->current_nr_sectors); - end_request(req, 0); + __blk_end_request_cur(req, -EIO); continue; } while (len) { @@ -93,7 +93,7 @@ static void do_z2_request(struct request_queue *q) start += size; len -= size; } - end_request(req, 1); + __blk_end_request_cur(req, 0); } } diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index fee9a9e..cab2b1f 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -654,17 +654,17 @@ static void gdrom_request(struct request_queue *rq) while ((req = elv_next_request(rq)) != NULL) { if (!blk_fs_request(req)) { printk(KERN_DEBUG "GDROM: Non-fs request ignored\n"); - end_request(req, 0); + __blk_end_request_cur(req, -EIO); } if (rq_data_dir(req) != READ) { printk(KERN_NOTICE "GDROM: Read only device -"); printk(" write request ignored\n"); - end_request(req, 0); + __blk_end_request_cur(req, -EIO); } if (req->nr_sectors) gdrom_request_handler_dma(req); else - end_request(req, 0); + __blk_end_request_cur(req, -EIO); } } diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c index a443e13..221317e 100644 --- a/drivers/message/i2o/i2o_block.c +++ b/drivers/message/i2o/i2o_block.c @@ -923,7 +923,7 @@ static void i2o_block_request_fn(struct request_queue *q) break; } } else - end_request(req, 0); + __blk_end_request_cur(req, -EIO); } }; diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index a49a9c8..76c4c8d 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -54,33 +54,33 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr, if (req->cmd_type == REQ_TYPE_LINUX_BLOCK && req->cmd[0] == REQ_LB_OP_DISCARD) - return !tr->discard(dev, block, nsect); + return tr->discard(dev, block, nsect); if (!blk_fs_request(req)) - return 0; + return -EIO; if (req->sector + req->current_nr_sectors > get_capacity(req->rq_disk)) - return 0; + return -EIO; switch(rq_data_dir(req)) { case READ: for (; nsect > 0; nsect--, block++, buf += tr->blksize) if (tr->readsect(dev, block, buf)) - return 0; - return 1; + return -EIO; + return 0; case WRITE: if (!tr->writesect) - return 0; + return -EIO; for (; nsect > 0; nsect--, block++, buf += tr->blksize) if (tr->writesect(dev, block, buf)) - return 0; - return 1; + return -EIO; + return 0; default: printk(KERN_NOTICE "Unknown request %u\n", rq_data_dir(req)); - return 0; + return -EIO; } } @@ -96,7 +96,7 @@ static int mtd_blktrans_thread(void *arg) while (!kthread_should_stop()) { struct request *req; struct mtd_blktrans_dev *dev; - int res = 0; + int res; req = elv_next_request(rq); @@ -119,7 +119,7 @@ static int mtd_blktrans_thread(void *arg) spin_lock_irq(rq->queue_lock); - end_request(req, res); + __blk_end_request_cur(req, res); } spin_unlock_irq(rq->queue_lock); diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c index a85ad05..0961788 100644 --- a/drivers/sbus/char/jsflash.c +++ b/drivers/sbus/char/jsflash.c @@ -192,25 +192,25 @@ static void jsfd_do_request(struct request_queue *q) size_t len = req->current_nr_sectors << 9; if ((offset + len) > jdp->dsize) { - end_request(req, 0); + __blk_end_request_cur(req, -EIO); continue; } if (rq_data_dir(req) != READ) { printk(KERN_ERR "jsfd: write\n"); - end_request(req, 0); + __blk_end_request_cur(req, -EIO); continue; } if ((jdp->dbase & 0xff000000) != 0x20000000) { printk(KERN_ERR "jsfd: bad base %x\n", (int)jdp->dbase); - end_request(req, 0); + __blk_end_request_cur(req, -EIO); continue; } jsfd_read(req->buffer, jdp->dbase + offset, len); - end_request(req, 1); + __blk_end_request_cur(req, 0); } } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e33c835..cfeb3c2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -845,9 +845,8 @@ extern unsigned int blk_rq_cur_bytes(struct request *rq); * blk_update_request() completes given number of bytes and updates * the request without completing it. * - * blk_end_request() and friends. __blk_end_request() and - * end_request() must be called with the request queue spinlock - * acquired. + * blk_end_request() and friends. __blk_end_request() must be called + * with the request queue spinlock acquired. * * Several drivers define their own end_request and call * blk_end_request() for parts of the original function. @@ -899,6 +898,19 @@ static inline void blk_end_request_all(struct request *rq, int error) } /** + * blk_end_request_cur - Helper function to finish the current request chunk. + * @rq: the request to finish the current chunk for + * @err: %0 for success, < %0 for error + * + * Description: + * Complete the current consecutively mapped chunk from @rq. + */ +static inline void blk_end_request_cur(struct request *rq, int error) +{ + blk_end_request(rq, error, rq->hard_cur_sectors << 9); +} + +/** * __blk_end_request - Helper function for drivers to complete the request. * @rq: the request being processed * @error: %0 for success, < %0 for error @@ -934,29 +946,17 @@ static inline void __blk_end_request_all(struct request *rq, int error) } /** - * end_request - end I/O on the current segment of the request - * @rq: the request being processed - * @uptodate: error value or %0/%1 uptodate flag + * __blk_end_request_cur - Helper function to finish the current request chunk. + * @rq: the request to finish the current chunk for + * @err: %0 for success, < %0 for error * * Description: - * Ends I/O on the current segment of a request. If that is the only - * remaining segment, the request is also completed and freed. - * - * This is a remnant of how older block drivers handled I/O completions. - * Modern drivers typically end I/O on the full request in one go, unless - * they have a residual value to account for. For that case this function - * isn't really useful, unless the residual just happens to be the - * full current segment. In other words, don't use this function in new - * code. Use blk_end_request() or __blk_end_request() to end a request. - **/ -static inline void end_request(struct request *rq, int uptodate) + * Complete the current consecutively mapped chunk from @rq. Must + * be called with queue lock held. + */ +static inline void __blk_end_request_cur(struct request *rq, int error) { - int error = 0; - - if (uptodate <= 0) - error = uptodate ? uptodate : -EIO; - - __blk_end_bidi_request(rq, error, rq->hard_cur_sectors << 9, 0); + __blk_end_request(rq, error, rq->hard_cur_sectors << 9); } extern void blk_complete_request(struct request *); -- cgit v0.10.2 From ec24751a6b57e1373a12361e581b2458bc9bb791 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 23 Apr 2009 11:05:20 +0900 Subject: arm-omap: don't abuse rq->data omap mailbox uses rq->data as the second opaque pointer to carry mbox_msg_t and rq->special message argument which is needed only for tx. Add and use omap_msg_tx_data struct for tx and use rq->special for mbox_msg_t for rx such that only rq->special is used as opaque pointer. [ Impact: cleanup rq->data usage, extra kmalloc in msg_send ] Signed-off-by: Tejun Heo Cc: Russell King diff --git a/arch/arm/plat-omap/mailbox.c b/arch/arm/plat-omap/mailbox.c index cf81bad..538ba75 100644 --- a/arch/arm/plat-omap/mailbox.c +++ b/arch/arm/plat-omap/mailbox.c @@ -147,24 +147,40 @@ static int __mbox_msg_send(struct omap_mbox *mbox, mbox_msg_t msg, void *arg) return ret; } +struct omap_msg_tx_data { + mbox_msg_t msg; + void *arg; +}; + +static void omap_msg_tx_end_io(struct request *rq, int error) +{ + kfree(rq->special); + __blk_put_request(rq->q, rq); +} + int omap_mbox_msg_send(struct omap_mbox *mbox, mbox_msg_t msg, void* arg) { + struct omap_msg_tx_data *tx_data; struct request *rq; struct request_queue *q = mbox->txq->queue; - int ret = 0; + + tx_data = kmalloc(sizeof(*tx_data), GFP_ATOMIC); + if (unlikely(!tx_data)) + return -ENOMEM; rq = blk_get_request(q, WRITE, GFP_ATOMIC); if (unlikely(!rq)) { - ret = -ENOMEM; - goto fail; + kfree(tx_data); + return -ENOMEM; } - rq->data = (void *)msg; - blk_insert_request(q, rq, 0, arg); + tx_data->msg = msg; + tx_data->arg = arg; + rq->end_io = omap_msg_tx_end_io; + blk_insert_request(q, rq, 0, tx_data); schedule_work(&mbox->txq->work); - fail: - return ret; + return 0; } EXPORT_SYMBOL(omap_mbox_msg_send); @@ -178,6 +194,8 @@ static void mbox_tx_work(struct work_struct *work) struct request_queue *q = mbox->txq->queue; while (1) { + struct omap_msg_tx_data *tx_data; + spin_lock(q->queue_lock); rq = elv_next_request(q); spin_unlock(q->queue_lock); @@ -185,7 +203,9 @@ static void mbox_tx_work(struct work_struct *work) if (!rq) break; - ret = __mbox_msg_send(mbox, (mbox_msg_t) rq->data, rq->special); + tx_data = rq->special; + + ret = __mbox_msg_send(mbox, tx_data->msg, tx_data->arg); if (ret) { enable_mbox_irq(mbox, IRQ_TX); return; @@ -222,7 +242,7 @@ static void mbox_rx_work(struct work_struct *work) if (!rq) break; - msg = (mbox_msg_t) rq->data; + msg = (mbox_msg_t)rq->special; blk_end_request_all(rq, 0); mbox->rxq->callback((void *)msg); } @@ -260,7 +280,6 @@ static void __mbox_rx_interrupt(struct omap_mbox *mbox) goto nomem; msg = mbox_fifo_read(mbox); - rq->data = (void *)msg; if (unlikely(mbox_seq_test(mbox, msg))) { pr_info("mbox: Illegal seq bit!(%08x)\n", msg); @@ -268,7 +287,7 @@ static void __mbox_rx_interrupt(struct omap_mbox *mbox) mbox->err_notify(); } - blk_insert_request(q, rq, 0, NULL); + blk_insert_request(q, rq, 0, (void *)msg); if (mbox->ops->type == OMAP_MBOX_TYPE1) break; } @@ -331,7 +350,7 @@ omap_mbox_read(struct device *dev, struct device_attribute *attr, char *buf) if (!rq) break; - *p = (mbox_msg_t) rq->data; + *p = (mbox_msg_t)rq->special; blk_end_request_all(rq, 0); -- cgit v0.10.2 From 731ec497e5888c6792ad62613ae9be97eebcd7ca Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 23 Apr 2009 11:05:20 +0900 Subject: block: kill rq->data Now that all block request data transfer is done via bio, rq->data isn't used. Kill it. While at it, make the roles of rq->special and buffer clear. [ Impact: drop now unncessary field from struct request ] Signed-off-by: Tejun Heo Cc: Boaz Harrosh diff --git a/block/blk-core.c b/block/blk-core.c index 0520cc7..6dd180c 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -189,10 +189,9 @@ void blk_dump_rq_flags(struct request *rq, char *msg) (unsigned long long)rq->sector, rq->nr_sectors, rq->current_nr_sectors); - printk(KERN_INFO " bio %p, biotail %p, buffer %p, data %p, len %u\n", + printk(KERN_INFO " bio %p, biotail %p, buffer %p, len %u\n", rq->bio, rq->biotail, - rq->buffer, rq->data, - rq->data_len); + rq->buffer, rq->data_len); if (blk_pc_request(rq)) { printk(KERN_INFO " cdb: "); diff --git a/block/blk-map.c b/block/blk-map.c index f103729..694fefa 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -156,7 +156,7 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq, if (!bio_flagged(bio, BIO_USER_MAPPED)) rq->cmd_flags |= REQ_COPY_USER; - rq->buffer = rq->data = NULL; + rq->buffer = NULL; return 0; unmap_rq: blk_rq_unmap_user(bio); @@ -235,7 +235,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, blk_queue_bounce(q, &bio); bio_get(bio); blk_rq_bio_prep(q, rq, bio); - rq->buffer = rq->data = NULL; + rq->buffer = NULL; return 0; } EXPORT_SYMBOL(blk_rq_map_user_iov); @@ -313,7 +313,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, blk_rq_bio_prep(q, rq, bio); blk_queue_bounce(q, &rq->bio); - rq->buffer = rq->data = NULL; + rq->buffer = NULL; return 0; } EXPORT_SYMBOL(blk_rq_map_kern); diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 82a0ca2..bb9aa43 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -500,7 +500,6 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk, rq = blk_get_request(q, WRITE, __GFP_WAIT); rq->cmd_type = REQ_TYPE_BLOCK_PC; - rq->data = NULL; rq->data_len = 0; rq->extra_len = 0; rq->timeout = BLK_DEFAULT_SG_TIMEOUT; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 756ac7c..aa9fc57 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1088,7 +1088,6 @@ int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req) return ret; } else { BUG_ON(req->data_len); - BUG_ON(req->data); memset(&cmd->sdb, 0, sizeof(cmd->sdb)); req->buffer = NULL; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index cfeb3c2..12c545e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -211,8 +211,8 @@ struct request { unsigned short ioprio; - void *special; - char *buffer; + void *special; /* opaque pointer available for LLD use */ + char *buffer; /* kaddr of the current segment if available */ int tag; int errors; @@ -229,7 +229,6 @@ struct request { unsigned int data_len; unsigned int extra_len; /* length of alignment and padding */ unsigned int sense_len; - void *data; void *sense; unsigned long deadline; -- cgit v0.10.2 From c2553b5844b06910435e40cfab9e6f384840cb97 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 24 Apr 2009 08:10:11 +0200 Subject: block: make blk_do_io_stat() do the full "is this rq accountable" checks We currently check for file system requests outside of blk_do_io_stat(rq), but we may as well just include it. Signed-off-by: Jens Axboe diff --git a/block/blk-core.c b/block/blk-core.c index 6dd180c..1e3b97f 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -68,7 +68,7 @@ static void drive_stat_acct(struct request *rq, int new_io) int rw = rq_data_dir(rq); int cpu; - if (!blk_fs_request(rq) || !blk_do_io_stat(rq)) + if (!blk_do_io_stat(rq)) return; cpu = part_stat_lock(); @@ -1639,10 +1639,7 @@ EXPORT_SYMBOL(blkdev_dequeue_request); static void blk_account_io_completion(struct request *req, unsigned int bytes) { - if (!blk_do_io_stat(req)) - return; - - if (blk_fs_request(req)) { + if (blk_do_io_stat(req)) { const int rw = rq_data_dir(req); struct hd_struct *part; int cpu; @@ -1656,15 +1653,12 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes) static void blk_account_io_done(struct request *req) { - if (!blk_do_io_stat(req)) - return; - /* * Account IO completion. bar_rq isn't accounted as a normal * IO on queueing nor completion. Accounting the containing * request is enough. */ - if (blk_fs_request(req) && req != &req->q->bar_rq) { + if (blk_do_io_stat(req) && req != &req->q->bar_rq) { unsigned long duration = jiffies - req->start_time; const int rw = rq_data_dir(req); struct hd_struct *part; diff --git a/block/blk.h b/block/blk.h index 9b2c324..404c10b 100644 --- a/block/blk.h +++ b/block/blk.h @@ -149,9 +149,16 @@ static inline int blk_cpu_to_group(int cpu) #endif } +/* + * Contribute to IO statistics IFF: + * + * a) it's attached to a gendisk, and + * b) the queue had IO stats enabled when this request was started, and + * c) it's a file system request + */ static inline int blk_do_io_stat(struct request *rq) { - return rq->rq_disk && blk_rq_io_stat(rq); + return rq->rq_disk && blk_rq_io_stat(rq) && blk_fs_request(rq); } #endif -- cgit v0.10.2 From c69d48540c201394d08cb4d48b905e001313d9b8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 24 Apr 2009 08:12:19 +0200 Subject: block: include discard requests in IO accounting We currently don't do merging on discard requests, but we potentially could. If we do, then we need to include discard requests in the IO accounting, or merging would end up decrementing in_flight IO counters for an IO which never incremented them. So enable accounting for discard requests. Problem found by Nikanth Karthikesan Signed-off-by: Jens Axboe diff --git a/block/blk.h b/block/blk.h index 404c10b..5111559 100644 --- a/block/blk.h +++ b/block/blk.h @@ -158,7 +158,8 @@ static inline int blk_cpu_to_group(int cpu) */ static inline int blk_do_io_stat(struct request *rq) { - return rq->rq_disk && blk_rq_io_stat(rq) && blk_fs_request(rq); + return rq->rq_disk && blk_rq_io_stat(rq) && blk_fs_request(rq) && + blk_discard_rq(rq); } #endif -- cgit v0.10.2 From 9eb55b030c4b3227334ee4482402096cd1d1a6fe Mon Sep 17 00:00:00 2001 From: Nikanth Karthikesan Date: Mon, 27 Apr 2009 14:53:54 +0200 Subject: block: catch trying to use more bits than request->cmd_flags has Signed-off-by: Nikanth Karthikesan Signed-off-by: Jens Axboe diff --git a/block/blk-core.c b/block/blk-core.c index 1e3b97f..394c5bd 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2097,6 +2097,9 @@ EXPORT_SYMBOL(kblockd_schedule_work); int __init blk_dev_init(void) { + BUILD_BUG_ON(__REQ_NR_BITS > 8 * + sizeof(((struct request *)0)->cmd_flags)); + kblockd_workqueue = create_workqueue("kblockd"); if (!kblockd_workqueue) panic("Failed to create kblockd\n"); -- cgit v0.10.2 From 9fd8d0e1bcb848257968d9a7d73ca4d890ea8bd1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 28 Apr 2009 13:06:04 +0900 Subject: block: make blk_end_request_cur() return bool In the process of mindlessly copying [__]blk_end_request_all(), [__]blk_end_request_cur() ended up returning void even though they're partial completion functions. Fix it. [ Impact: fix braindead API ] Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 12c545e..3a5b1bd 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -903,10 +903,14 @@ static inline void blk_end_request_all(struct request *rq, int error) * * Description: * Complete the current consecutively mapped chunk from @rq. + * + * Return: + * %false - we are done with this request + * %true - still buffers pending for this request */ -static inline void blk_end_request_cur(struct request *rq, int error) +static inline bool blk_end_request_cur(struct request *rq, int error) { - blk_end_request(rq, error, rq->hard_cur_sectors << 9); + return blk_end_request(rq, error, rq->hard_cur_sectors << 9); } /** @@ -952,10 +956,14 @@ static inline void __blk_end_request_all(struct request *rq, int error) * Description: * Complete the current consecutively mapped chunk from @rq. Must * be called with queue lock held. + * + * Return: + * %false - we are done with this request + * %true - still buffers pending for this request */ -static inline void __blk_end_request_cur(struct request *rq, int error) +static inline bool __blk_end_request_cur(struct request *rq, int error) { - __blk_end_request(rq, error, rq->hard_cur_sectors << 9); + return __blk_end_request(rq, error, rq->hard_cur_sectors << 9); } extern void blk_complete_request(struct request *); -- cgit v0.10.2 From 4c94dece1baf320d925cedb231489c4e0358ac5a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 28 Apr 2009 13:06:05 +0900 Subject: block: don't init rq fields unnecessarily blk_get_request() always returns properly zeroed requests. Don't set fields to zero/NULL unnecessarily. [ Impact: cleanup ] Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index bb9aa43..58cf456 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -500,8 +500,6 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk, rq = blk_get_request(q, WRITE, __GFP_WAIT); rq->cmd_type = REQ_TYPE_BLOCK_PC; - rq->data_len = 0; - rq->extra_len = 0; rq->timeout = BLK_DEFAULT_SG_TIMEOUT; rq->cmd[0] = cmd; rq->cmd[4] = data; -- cgit v0.10.2 From 5b5c5d12b91cb6b2a2967f06aef35d59008dc2e7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 28 Apr 2009 13:06:06 +0900 Subject: amiflop,ataflop,xd,mg_disk: clean up unnecessary stuff from block drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit rq_data_dir() can only be READ or WRITE and rq->sector and nr_sectors are always automatically updated after partial request completion. Don't worry about rq_data_dir() not being either READ or WRITE or manually update sector and nr_sectors. [ Impact: cleanup ] Signed-off-by: Tejun Heo Cc: Jörg Dorchain Cc: Geert Uytterhoeven Cc: unsik Kim Signed-off-by: Jens Axboe diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index b99a2a6..8ff95f2 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -1371,11 +1371,6 @@ static void redo_fd_request(void) "0x%08lx\n", track, sector, data); #endif - if ((rq_data_dir(CURRENT) != READ) && (rq_data_dir(CURRENT) != WRITE)) { - printk(KERN_WARNING "do_fd_request: unknown command\n"); - __blk_end_request_cur(CURRENT, -EIO); - goto repeat; - } if (get_track(drive, track) == -1) { __blk_end_request_cur(CURRENT, -EIO); goto repeat; @@ -1407,8 +1402,6 @@ static void redo_fd_request(void) break; } } - CURRENT->nr_sectors -= CURRENT->current_nr_sectors; - CURRENT->sector += CURRENT->current_nr_sectors; __blk_end_request_cur(CURRENT, 0); goto repeat; diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 44a8702..2506728 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -732,8 +732,6 @@ static void do_fd_action( int drive ) } else { /* all sectors finished */ - CURRENT->nr_sectors -= CURRENT->current_nr_sectors; - CURRENT->sector += CURRENT->current_nr_sectors; __blk_end_request_cur(CURRENT, 0); redo_fd_request(); return; @@ -1139,8 +1137,6 @@ static void fd_rwsec_done1(int status) } else { /* all sectors finished */ - CURRENT->nr_sectors -= CURRENT->current_nr_sectors; - CURRENT->sector += CURRENT->current_nr_sectors; __blk_end_request_cur(CURRENT, 0); redo_fd_request(); } diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c index 408c2bd..2c00ad9 100644 --- a/drivers/block/mg_disk.c +++ b/drivers/block/mg_disk.c @@ -560,11 +560,6 @@ static void mg_request_poll(struct request_queue *q) case WRITE: mg_write(req); break; - default: - printk(KERN_WARNING "%s:%d unknown command\n", - __func__, __LINE__); - __blk_end_request_cur(req, -EIO); - break; } } } @@ -614,11 +609,6 @@ static unsigned int mg_issue_req(struct request *req, outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND); break; - default: - printk(KERN_WARNING "%s:%d unknown command\n", - __func__, __LINE__); - __blk_end_request_cur(req, -EIO); - break; } return MG_ERR_NONE; } diff --git a/drivers/block/xd.c b/drivers/block/xd.c index 6f6ad82..14be4c1 100644 --- a/drivers/block/xd.c +++ b/drivers/block/xd.c @@ -308,7 +308,6 @@ static void do_xd_request (struct request_queue * q) while ((req = elv_next_request(q)) != NULL) { unsigned block = req->sector; unsigned count = req->nr_sectors; - int rw = rq_data_dir(req); XD_INFO *disk = req->rq_disk->private_data; int res = 0; int retry; @@ -321,13 +320,9 @@ static void do_xd_request (struct request_queue * q) __blk_end_request_cur(req, -EIO); continue; } - if (rw != READ && rw != WRITE) { - printk("do_xd_request: unknown request\n"); - __blk_end_request_cur(req, -EIO); - continue; - } for (retry = 0; (retry < XD_RETRIES) && !res; retry++) - res = xd_readwrite(rw, disk, req->buffer, block, count); + res = xd_readwrite(rq_data_dir(req), disk, req->buffer, + block, count); /* wrap up, 0 = success, -errno = fail */ __blk_end_request_cur(req, res); } -- cgit v0.10.2 From cd4c34ebec155e5c10f897d9bebf618248121e70 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 28 Apr 2009 13:06:07 +0900 Subject: ps3disk: simplify request completion ps3disk_interrupt() always completes requests fully but it uses rq->hard_cur_sectors for FLUSH requests for some reason. Drop them and simply use __blk_end_request_all(). [ Impact: cleanup ] Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index d23b54b..f6586e4 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -231,7 +231,6 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data) struct request *req; int res, read, error; u64 tag, status; - unsigned long num_sectors; const char *op; res = lv1_storage_get_async_status(dev->sbd.dev_id, &tag, &status); @@ -261,11 +260,9 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data) if (req->cmd_type == REQ_TYPE_LINUX_BLOCK && req->cmd[0] == REQ_LB_OP_FLUSH) { read = 0; - num_sectors = req->hard_cur_sectors; op = "flush"; } else { read = !rq_data_dir(req); - num_sectors = req->nr_sectors; op = read ? "read" : "write"; } if (status) { @@ -281,7 +278,7 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data) } spin_lock(&priv->lock); - __blk_end_request(req, error, num_sectors << 9); + __blk_end_request_all(req, error); priv->req = NULL; ps3disk_do_request(dev, priv->queue); spin_unlock(&priv->lock); -- cgit v0.10.2 From 044208506d35bd62396c4673176e2c12393905b8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 28 Apr 2009 13:06:08 +0900 Subject: sunvdc: kill vdc_end_request() vdc_end_request() is a thin silly wrapper on top of __blk_end_request(). Kill it. [ Impact: cleanup ] Signed-off-by: Tejun Heo Acked-by: David S. Miller Signed-off-by: Jens Axboe diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index 5861e33..f59887c 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -212,11 +212,6 @@ static void vdc_end_special(struct vdc_port *port, struct vio_disk_desc *desc) vdc_finish(&port->vio, -err, WAITING_FOR_GEN_CMD); } -static void vdc_end_request(struct request *req, int error, int num_sectors) -{ - __blk_end_request(req, error, num_sectors << 9); -} - static void vdc_end_one(struct vdc_port *port, struct vio_dring_state *dr, unsigned int index) { @@ -239,7 +234,7 @@ static void vdc_end_one(struct vdc_port *port, struct vio_dring_state *dr, rqe->req = NULL; - vdc_end_request(req, (desc->status ? -EIO : 0), desc->size >> 9); + __blk_end_request(req, (desc->status ? -EIO : 0), desc->size); if (blk_queue_stopped(port->disk->queue)) blk_start_queue(port->disk->queue); @@ -453,7 +448,7 @@ static void do_vdc_request(struct request_queue *q) blkdev_dequeue_request(req); if (__send_request(req) < 0) - vdc_end_request(req, -EIO, req->hard_nr_sectors); + __blk_end_request_all(req, -EIO); } } -- cgit v0.10.2 From 4d6c84d91d1a539ebc47d1a36a35e9390ba11fdc Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 28 Apr 2009 13:06:09 +0900 Subject: ubd: cleanup completion path ubd had its own block request partial completion mechanism, which is unnecessary as block layer already does it. Kill ubd_end_request() and ubd_finish() and replace them with direct call to blk_end_request(). [ Impact: cleanup ] Signed-off-by: Tejun Heo Cc: Jeff Dike Signed-off-by: Jens Axboe diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index f934225..36ca9fa 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -451,23 +451,6 @@ static void do_ubd_request(struct request_queue * q); /* Only changed by ubd_init, which is an initcall. */ static int thread_fd = -1; - -static void ubd_end_request(struct request *req, int bytes, int error) -{ - blk_end_request(req, error, bytes); -} - -/* Callable only from interrupt context - otherwise you need to do - * spin_lock_irq()/spin_lock_irqsave() */ -static inline void ubd_finish(struct request *req, int bytes) -{ - if(bytes < 0){ - ubd_end_request(req, 0, -EIO); - return; - } - ubd_end_request(req, bytes, 0); -} - static LIST_HEAD(restart); /* XXX - move this inside ubd_intr. */ @@ -475,7 +458,6 @@ static LIST_HEAD(restart); static void ubd_handler(void) { struct io_thread_req *req; - struct request *rq; struct ubd *ubd; struct list_head *list, *next_ele; unsigned long flags; @@ -492,10 +474,7 @@ static void ubd_handler(void) return; } - rq = req->req; - rq->nr_sectors -= req->length >> 9; - if(rq->nr_sectors == 0) - ubd_finish(rq, rq->hard_nr_sectors << 9); + blk_end_request(req->req, 0, req->length); kfree(req); } reactivate_fd(thread_fd, UBD_IRQ); -- cgit v0.10.2 From f81f2f7c9fee307e371f37424577d46f9eaf8692 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 28 Apr 2009 13:06:10 +0900 Subject: ubd: drop unnecessary rq->sector manipulation ubd curiously updates rq->sector while issuing the request in multiple pieces. Don't do it and simply use local copy of sector. [ Impact: cleanup ] Signed-off-by: Tejun Heo Cc: Jeff Dike Signed-off-by: Jens Axboe diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 36ca9fa..4330127 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -1222,7 +1222,8 @@ static void do_ubd_request(struct request_queue *q) { struct io_thread_req *io_req; struct request *req; - int n, last_sectors; + sector_t sector; + int n; while(1){ struct ubd *dev = q->queuedata; @@ -1238,11 +1239,10 @@ static void do_ubd_request(struct request_queue *q) } req = dev->request; - last_sectors = 0; + sector = req->sector; while(dev->start_sg < dev->end_sg){ struct scatterlist *sg = &dev->sg[dev->start_sg]; - req->sector += last_sectors; io_req = kmalloc(sizeof(struct io_thread_req), GFP_ATOMIC); if(io_req == NULL){ @@ -1251,10 +1251,10 @@ static void do_ubd_request(struct request_queue *q) return; } prepare_request(req, io_req, - (unsigned long long) req->sector << 9, + (unsigned long long)sector << 9, sg->offset, sg->length, sg_page(sg)); - last_sectors = sg->length >> 9; + sector += sg->length >> 9; n = os_write_file(thread_fd, &io_req, sizeof(struct io_thread_req *)); if(n != sizeof(struct io_thread_req *)){ -- cgit v0.10.2 From e091eb67af957bac4e4f7410c5d1aa263ee483a4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 28 Apr 2009 13:06:11 +0900 Subject: hd: clean up request completion paths hd read/write_intr() functions manually manipulate request to incrementally complete it, which block layer already supports. Simply use block layer completion routines instead of manual partial completion. While at it, clear unnecessary elv_next_request() check at the tail of read_intr(). This also makes read and write_intr() more consistent. [ Impact: cleanup ] Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe diff --git a/drivers/block/hd.c b/drivers/block/hd.c index 5cb300b..75b9ca9 100644 --- a/drivers/block/hd.c +++ b/drivers/block/hd.c @@ -452,32 +452,25 @@ static void read_intr(void) bad_rw_intr(); hd_request(); return; + ok_to_read: req = CURRENT; insw(HD_DATA, req->buffer, 256); - req->sector++; - req->buffer += 512; - req->errors = 0; - i = --req->nr_sectors; - --req->current_nr_sectors; #ifdef DEBUG printk("%s: read: sector %ld, remaining = %ld, buffer=%p\n", - req->rq_disk->disk_name, req->sector, req->nr_sectors, + req->rq_disk->disk_name, req->sector + 1, req->nr_sectors - 1, req->buffer+512); #endif - if (req->current_nr_sectors <= 0) - __blk_end_request_cur(req, 0); - if (i > 0) { + if (__blk_end_request(req, 0, 512)) { SET_HANDLER(&read_intr); return; } + (void) inb_p(HD_STATUS); #if (HD_DELAY > 0) last_req = read_timer(); #endif - if (elv_next_request(QUEUE)) - hd_request(); - return; + hd_request(); } static void write_intr(void) @@ -499,23 +492,18 @@ static void write_intr(void) bad_rw_intr(); hd_request(); return; + ok_to_write: - req->sector++; - i = --req->nr_sectors; - --req->current_nr_sectors; - req->buffer += 512; - if (!i || (req->bio && req->current_nr_sectors <= 0)) - __blk_end_request_cur(req, 0); - if (i > 0) { + if (__blk_end_request(req, 0, 512)) { SET_HANDLER(&write_intr); outsw(HD_DATA, req->buffer, 256); - } else { + return; + } + #if (HD_DELAY > 0) - last_req = read_timer(); + last_req = read_timer(); #endif - hd_request(); - } - return; + hd_request(); } static void recal_intr(void) -- cgit v0.10.2 From 467ca759fc83fc35cb7d15aec0d74c62cffc4481 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 28 Apr 2009 13:06:12 +0900 Subject: swim3: clean up request completion paths swim3 curiously tries to update request parameters before calling __blk_end_request() when __blk_end_request() will do it anyway, and it updates request for partial completion manually instead of using blk_update_request(). Also, it does some spurious checks on rq such as testing whether rq->sector is negative or current_nr_sectors is zero right after fetching. Drop unnecessary stuff and use standard block layer mechanisms. [ Impact: cleanup ] Signed-off-by: Tejun Heo Cc: Benjamin Herrenschmidt Signed-off-by: Jens Axboe diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index 5904f7b..4248559 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -319,14 +319,10 @@ static void start_request(struct floppy_state *fs) req->errors, req->current_nr_sectors); #endif - if (req->sector < 0 || req->sector >= fs->total_secs) { + if (req->sector >= fs->total_secs) { __blk_end_request_cur(req, -EIO); continue; } - if (req->current_nr_sectors == 0) { - __blk_end_request_cur(req, 0); - continue; - } if (fs->ejected) { __blk_end_request_cur(req, -EIO); continue; @@ -593,8 +589,6 @@ static void xfer_timeout(unsigned long data) struct floppy_state *fs = (struct floppy_state *) data; struct swim3 __iomem *sw = fs->swim3; struct dbdma_regs __iomem *dr = fs->dma; - struct dbdma_cmd *cp = fs->dma_cmd; - unsigned long s; int n; fs->timeout_pending = 0; @@ -605,14 +599,6 @@ static void xfer_timeout(unsigned long data) out_8(&sw->intr_enable, 0); out_8(&sw->control_bic, WRITE_SECTORS | DO_ACTION); out_8(&sw->select, RELAX); - if (rq_data_dir(fd_req) == WRITE) - ++cp; - if (ld_le16(&cp->xfer_status) != 0) - s = fs->scount - ((ld_le16(&cp->res_count) + 511) >> 9); - else - s = 0; - fd_req->sector += s; - fd_req->current_nr_sectors -= s; printk(KERN_ERR "swim3: timeout %sing sector %ld\n", (rq_data_dir(fd_req)==WRITE? "writ": "read"), (long)fd_req->sector); __blk_end_request_cur(fd_req, -EIO); @@ -719,9 +705,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id) if (intr & ERROR_INTR) { n = fs->scount - 1 - resid / 512; if (n > 0) { - fd_req->sector += n; - fd_req->current_nr_sectors -= n; - fd_req->buffer += n * 512; + blk_update_request(fd_req, 0, n << 9); fs->req_sector += n; } if (fs->retries < 5) { @@ -745,13 +729,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id) start_request(fs); break; } - fd_req->sector += fs->scount; - fd_req->current_nr_sectors -= fs->scount; - fd_req->buffer += fs->scount * 512; - if (fd_req->current_nr_sectors <= 0) { - __blk_end_request_cur(fd_req, 0); - fs->state = idle; - } else { + if (__blk_end_request(fd_req, 0, fs->scount << 9)) { fs->req_sector += fs->scount; if (fs->req_sector > fs->secpertrack) { fs->req_sector -= fs->secpertrack; @@ -761,7 +739,8 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id) } } act(fs); - } + } else + fs->state = idle; } if (fs->state == idle) start_request(fs); -- cgit v0.10.2 From e138b4e08ef65771000fbe6d93d67e3960ff862b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 28 Apr 2009 13:06:13 +0900 Subject: swim: clean up request completion paths swim curiously tries to update request parameters before calling __blk_end_request() when __blk_end_request() will do it anyway and unnecessarily checks whether current_nr_sectors is zero right after fetching. Drop unnecessary stuff and use standard block layer mechanisms. [ Impact: cleanup ] Signed-off-by: Tejun Heo Cc: Laurent Vivier Signed-off-by: Jens Axboe diff --git a/drivers/block/swim.c b/drivers/block/swim.c index 6544a7b..97ef426 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -535,10 +535,6 @@ static void redo_fd_request(struct request_queue *q) __blk_end_request_cur(req, -EIO); continue; } - if (req->current_nr_sectors == 0) { - __blk_end_request_cur(req, 0); - continue; - } if (!fs->disk_in) { __blk_end_request_cur(req, -EIO); continue; @@ -561,9 +557,6 @@ static void redo_fd_request(struct request_queue *q) __blk_end_request_cur(req, -EIO); continue; } - req->nr_sectors -= req->current_nr_sectors; - req->sector += req->current_nr_sectors; - req->buffer += req->current_nr_sectors * 512; __blk_end_request_cur(req, 0); break; } -- cgit v0.10.2 From eec9462088a26c046d4db3100796a340a50890b8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 28 Apr 2009 13:06:14 +0900 Subject: mg_disk: fold mg_disk.h into mg_disk.c include/linux/mg_disk.h is used only by drivers/block/mg_disk.c. No reason to put it in a separate header. Fold it into mg_disk.c. [ Impact: cleanup ] Signed-off-by: Tejun Heo Cc: unsik Kim Signed-off-by: Jens Axboe diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c index 2c00ad9..2c6127e 100644 --- a/drivers/block/mg_disk.c +++ b/drivers/block/mg_disk.c @@ -22,10 +22,194 @@ #include #include #include -#include #define MG_RES_SEC (CONFIG_MG_DISK_RES << 1) +/* name for block device */ +#define MG_DISK_NAME "mgd" +/* name for platform device */ +#define MG_DEV_NAME "mg_disk" + +#define MG_DISK_MAJ 0 +#define MG_DISK_MAX_PART 16 +#define MG_SECTOR_SIZE 512 +#define MG_MAX_SECTS 256 + +/* Register offsets */ +#define MG_BUFF_OFFSET 0x8000 +#define MG_STORAGE_BUFFER_SIZE 0x200 +#define MG_REG_OFFSET 0xC000 +#define MG_REG_FEATURE (MG_REG_OFFSET + 2) /* write case */ +#define MG_REG_ERROR (MG_REG_OFFSET + 2) /* read case */ +#define MG_REG_SECT_CNT (MG_REG_OFFSET + 4) +#define MG_REG_SECT_NUM (MG_REG_OFFSET + 6) +#define MG_REG_CYL_LOW (MG_REG_OFFSET + 8) +#define MG_REG_CYL_HIGH (MG_REG_OFFSET + 0xA) +#define MG_REG_DRV_HEAD (MG_REG_OFFSET + 0xC) +#define MG_REG_COMMAND (MG_REG_OFFSET + 0xE) /* write case */ +#define MG_REG_STATUS (MG_REG_OFFSET + 0xE) /* read case */ +#define MG_REG_DRV_CTRL (MG_REG_OFFSET + 0x10) +#define MG_REG_BURST_CTRL (MG_REG_OFFSET + 0x12) + +/* "Drive Select/Head Register" bit values */ +#define MG_REG_HEAD_MUST_BE_ON 0xA0 /* These 2 bits are always on */ +#define MG_REG_HEAD_DRIVE_MASTER (0x00 | MG_REG_HEAD_MUST_BE_ON) +#define MG_REG_HEAD_DRIVE_SLAVE (0x10 | MG_REG_HEAD_MUST_BE_ON) +#define MG_REG_HEAD_LBA_MODE (0x40 | MG_REG_HEAD_MUST_BE_ON) + + +/* "Device Control Register" bit values */ +#define MG_REG_CTRL_INTR_ENABLE 0x0 +#define MG_REG_CTRL_INTR_DISABLE (0x1<<1) +#define MG_REG_CTRL_RESET (0x1<<2) +#define MG_REG_CTRL_INTR_POLA_ACTIVE_HIGH 0x0 +#define MG_REG_CTRL_INTR_POLA_ACTIVE_LOW (0x1<<4) +#define MG_REG_CTRL_DPD_POLA_ACTIVE_LOW 0x0 +#define MG_REG_CTRL_DPD_POLA_ACTIVE_HIGH (0x1<<5) +#define MG_REG_CTRL_DPD_DISABLE 0x0 +#define MG_REG_CTRL_DPD_ENABLE (0x1<<6) + +/* Status register bit */ +/* error bit in status register */ +#define MG_REG_STATUS_BIT_ERROR 0x01 +/* corrected error in status register */ +#define MG_REG_STATUS_BIT_CORRECTED_ERROR 0x04 +/* data request bit in status register */ +#define MG_REG_STATUS_BIT_DATA_REQ 0x08 +/* DSC - Drive Seek Complete */ +#define MG_REG_STATUS_BIT_SEEK_DONE 0x10 +/* DWF - Drive Write Fault */ +#define MG_REG_STATUS_BIT_WRITE_FAULT 0x20 +#define MG_REG_STATUS_BIT_READY 0x40 +#define MG_REG_STATUS_BIT_BUSY 0x80 + +/* handy status */ +#define MG_STAT_READY (MG_REG_STATUS_BIT_READY | MG_REG_STATUS_BIT_SEEK_DONE) +#define MG_READY_OK(s) (((s) & (MG_STAT_READY | \ + (MG_REG_STATUS_BIT_BUSY | \ + MG_REG_STATUS_BIT_WRITE_FAULT | \ + MG_REG_STATUS_BIT_ERROR))) == MG_STAT_READY) + +/* Error register */ +#define MG_REG_ERR_AMNF 0x01 +#define MG_REG_ERR_ABRT 0x04 +#define MG_REG_ERR_IDNF 0x10 +#define MG_REG_ERR_UNC 0x40 +#define MG_REG_ERR_BBK 0x80 + +/* error code for others */ +#define MG_ERR_NONE 0 +#define MG_ERR_TIMEOUT 0x100 +#define MG_ERR_INIT_STAT 0x101 +#define MG_ERR_TRANSLATION 0x102 +#define MG_ERR_CTRL_RST 0x103 +#define MG_ERR_INV_STAT 0x104 +#define MG_ERR_RSTOUT 0x105 + +#define MG_MAX_ERRORS 6 /* Max read/write errors */ + +/* command */ +#define MG_CMD_RD 0x20 +#define MG_CMD_WR 0x30 +#define MG_CMD_SLEEP 0x99 +#define MG_CMD_WAKEUP 0xC3 +#define MG_CMD_ID 0xEC +#define MG_CMD_WR_CONF 0x3C +#define MG_CMD_RD_CONF 0x40 + +/* operation mode */ +#define MG_OP_CASCADE (1 << 0) +#define MG_OP_CASCADE_SYNC_RD (1 << 1) +#define MG_OP_CASCADE_SYNC_WR (1 << 2) +#define MG_OP_INTERLEAVE (1 << 3) + +/* synchronous */ +#define MG_BURST_LAT_4 (3 << 4) +#define MG_BURST_LAT_5 (4 << 4) +#define MG_BURST_LAT_6 (5 << 4) +#define MG_BURST_LAT_7 (6 << 4) +#define MG_BURST_LAT_8 (7 << 4) +#define MG_BURST_LEN_4 (1 << 1) +#define MG_BURST_LEN_8 (2 << 1) +#define MG_BURST_LEN_16 (3 << 1) +#define MG_BURST_LEN_32 (4 << 1) +#define MG_BURST_LEN_CONT (0 << 1) + +/* timeout value (unit: ms) */ +#define MG_TMAX_CONF_TO_CMD 1 +#define MG_TMAX_WAIT_RD_DRQ 10 +#define MG_TMAX_WAIT_WR_DRQ 500 +#define MG_TMAX_RST_TO_BUSY 10 +#define MG_TMAX_HDRST_TO_RDY 500 +#define MG_TMAX_SWRST_TO_RDY 500 +#define MG_TMAX_RSTOUT 3000 + +/* device attribution */ +/* use mflash as boot device */ +#define MG_BOOT_DEV (1 << 0) +/* use mflash as storage device */ +#define MG_STORAGE_DEV (1 << 1) +/* same as MG_STORAGE_DEV, but bootloader already done reset sequence */ +#define MG_STORAGE_DEV_SKIP_RST (1 << 2) + +#define MG_DEV_MASK (MG_BOOT_DEV | MG_STORAGE_DEV | MG_STORAGE_DEV_SKIP_RST) + +/* names of GPIO resource */ +#define MG_RST_PIN "mg_rst" +/* except MG_BOOT_DEV, reset-out pin should be assigned */ +#define MG_RSTOUT_PIN "mg_rstout" + +/* private driver data */ +struct mg_drv_data { + /* disk resource */ + u32 use_polling; + + /* device attribution */ + u32 dev_attr; + + /* internally used */ + struct mg_host *host; +}; + +/* main structure for mflash driver */ +struct mg_host { + struct device *dev; + + struct request_queue *breq; + spinlock_t lock; + struct gendisk *gd; + + struct timer_list timer; + void (*mg_do_intr) (struct mg_host *); + + u16 id[ATA_ID_WORDS]; + + u16 cyls; + u16 heads; + u16 sectors; + u32 n_sectors; + u32 nres_sectors; + + void __iomem *dev_base; + unsigned int irq; + unsigned int rst; + unsigned int rstout; + + u32 major; + u32 error; +}; + +/* + * Debugging macro and defines + */ +#undef DO_MG_DEBUG +#ifdef DO_MG_DEBUG +# define MG_DBG(fmt, args...) \ + printk(KERN_DEBUG "%s:%d "fmt, __func__, __LINE__, ##args) +#else /* CONFIG_MG_DEBUG */ +# define MG_DBG(fmt, args...) do { } while (0) +#endif /* CONFIG_MG_DEBUG */ + static void mg_request(struct request_queue *); static void mg_dump_status(const char *msg, unsigned int stat, diff --git a/include/linux/mg_disk.h b/include/linux/mg_disk.h deleted file mode 100644 index 1f76b1e..0000000 --- a/include/linux/mg_disk.h +++ /dev/null @@ -1,206 +0,0 @@ -/* - * include/linux/mg_disk.c - * - * Support for the mGine m[g]flash IO mode. - * Based on legacy hd.c - * - * (c) 2008 mGine Co.,LTD - * (c) 2008 unsik Kim - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef __MG_DISK_H__ -#define __MG_DISK_H__ - -#include -#include - -/* name for block device */ -#define MG_DISK_NAME "mgd" -/* name for platform device */ -#define MG_DEV_NAME "mg_disk" - -#define MG_DISK_MAJ 0 -#define MG_DISK_MAX_PART 16 -#define MG_SECTOR_SIZE 512 -#define MG_MAX_SECTS 256 - -/* Register offsets */ -#define MG_BUFF_OFFSET 0x8000 -#define MG_STORAGE_BUFFER_SIZE 0x200 -#define MG_REG_OFFSET 0xC000 -#define MG_REG_FEATURE (MG_REG_OFFSET + 2) /* write case */ -#define MG_REG_ERROR (MG_REG_OFFSET + 2) /* read case */ -#define MG_REG_SECT_CNT (MG_REG_OFFSET + 4) -#define MG_REG_SECT_NUM (MG_REG_OFFSET + 6) -#define MG_REG_CYL_LOW (MG_REG_OFFSET + 8) -#define MG_REG_CYL_HIGH (MG_REG_OFFSET + 0xA) -#define MG_REG_DRV_HEAD (MG_REG_OFFSET + 0xC) -#define MG_REG_COMMAND (MG_REG_OFFSET + 0xE) /* write case */ -#define MG_REG_STATUS (MG_REG_OFFSET + 0xE) /* read case */ -#define MG_REG_DRV_CTRL (MG_REG_OFFSET + 0x10) -#define MG_REG_BURST_CTRL (MG_REG_OFFSET + 0x12) - -/* "Drive Select/Head Register" bit values */ -#define MG_REG_HEAD_MUST_BE_ON 0xA0 /* These 2 bits are always on */ -#define MG_REG_HEAD_DRIVE_MASTER (0x00 | MG_REG_HEAD_MUST_BE_ON) -#define MG_REG_HEAD_DRIVE_SLAVE (0x10 | MG_REG_HEAD_MUST_BE_ON) -#define MG_REG_HEAD_LBA_MODE (0x40 | MG_REG_HEAD_MUST_BE_ON) - - -/* "Device Control Register" bit values */ -#define MG_REG_CTRL_INTR_ENABLE 0x0 -#define MG_REG_CTRL_INTR_DISABLE (0x1<<1) -#define MG_REG_CTRL_RESET (0x1<<2) -#define MG_REG_CTRL_INTR_POLA_ACTIVE_HIGH 0x0 -#define MG_REG_CTRL_INTR_POLA_ACTIVE_LOW (0x1<<4) -#define MG_REG_CTRL_DPD_POLA_ACTIVE_LOW 0x0 -#define MG_REG_CTRL_DPD_POLA_ACTIVE_HIGH (0x1<<5) -#define MG_REG_CTRL_DPD_DISABLE 0x0 -#define MG_REG_CTRL_DPD_ENABLE (0x1<<6) - -/* Status register bit */ -/* error bit in status register */ -#define MG_REG_STATUS_BIT_ERROR 0x01 -/* corrected error in status register */ -#define MG_REG_STATUS_BIT_CORRECTED_ERROR 0x04 -/* data request bit in status register */ -#define MG_REG_STATUS_BIT_DATA_REQ 0x08 -/* DSC - Drive Seek Complete */ -#define MG_REG_STATUS_BIT_SEEK_DONE 0x10 -/* DWF - Drive Write Fault */ -#define MG_REG_STATUS_BIT_WRITE_FAULT 0x20 -#define MG_REG_STATUS_BIT_READY 0x40 -#define MG_REG_STATUS_BIT_BUSY 0x80 - -/* handy status */ -#define MG_STAT_READY (MG_REG_STATUS_BIT_READY | MG_REG_STATUS_BIT_SEEK_DONE) -#define MG_READY_OK(s) (((s) & (MG_STAT_READY | \ - (MG_REG_STATUS_BIT_BUSY | \ - MG_REG_STATUS_BIT_WRITE_FAULT | \ - MG_REG_STATUS_BIT_ERROR))) == MG_STAT_READY) - -/* Error register */ -#define MG_REG_ERR_AMNF 0x01 -#define MG_REG_ERR_ABRT 0x04 -#define MG_REG_ERR_IDNF 0x10 -#define MG_REG_ERR_UNC 0x40 -#define MG_REG_ERR_BBK 0x80 - -/* error code for others */ -#define MG_ERR_NONE 0 -#define MG_ERR_TIMEOUT 0x100 -#define MG_ERR_INIT_STAT 0x101 -#define MG_ERR_TRANSLATION 0x102 -#define MG_ERR_CTRL_RST 0x103 -#define MG_ERR_INV_STAT 0x104 -#define MG_ERR_RSTOUT 0x105 - -#define MG_MAX_ERRORS 6 /* Max read/write errors */ - -/* command */ -#define MG_CMD_RD 0x20 -#define MG_CMD_WR 0x30 -#define MG_CMD_SLEEP 0x99 -#define MG_CMD_WAKEUP 0xC3 -#define MG_CMD_ID 0xEC -#define MG_CMD_WR_CONF 0x3C -#define MG_CMD_RD_CONF 0x40 - -/* operation mode */ -#define MG_OP_CASCADE (1 << 0) -#define MG_OP_CASCADE_SYNC_RD (1 << 1) -#define MG_OP_CASCADE_SYNC_WR (1 << 2) -#define MG_OP_INTERLEAVE (1 << 3) - -/* synchronous */ -#define MG_BURST_LAT_4 (3 << 4) -#define MG_BURST_LAT_5 (4 << 4) -#define MG_BURST_LAT_6 (5 << 4) -#define MG_BURST_LAT_7 (6 << 4) -#define MG_BURST_LAT_8 (7 << 4) -#define MG_BURST_LEN_4 (1 << 1) -#define MG_BURST_LEN_8 (2 << 1) -#define MG_BURST_LEN_16 (3 << 1) -#define MG_BURST_LEN_32 (4 << 1) -#define MG_BURST_LEN_CONT (0 << 1) - -/* timeout value (unit: ms) */ -#define MG_TMAX_CONF_TO_CMD 1 -#define MG_TMAX_WAIT_RD_DRQ 10 -#define MG_TMAX_WAIT_WR_DRQ 500 -#define MG_TMAX_RST_TO_BUSY 10 -#define MG_TMAX_HDRST_TO_RDY 500 -#define MG_TMAX_SWRST_TO_RDY 500 -#define MG_TMAX_RSTOUT 3000 - -/* device attribution */ -/* use mflash as boot device */ -#define MG_BOOT_DEV (1 << 0) -/* use mflash as storage device */ -#define MG_STORAGE_DEV (1 << 1) -/* same as MG_STORAGE_DEV, but bootloader already done reset sequence */ -#define MG_STORAGE_DEV_SKIP_RST (1 << 2) - -#define MG_DEV_MASK (MG_BOOT_DEV | MG_STORAGE_DEV | MG_STORAGE_DEV_SKIP_RST) - -/* names of GPIO resource */ -#define MG_RST_PIN "mg_rst" -/* except MG_BOOT_DEV, reset-out pin should be assigned */ -#define MG_RSTOUT_PIN "mg_rstout" - -/* private driver data */ -struct mg_drv_data { - /* disk resource */ - u32 use_polling; - - /* device attribution */ - u32 dev_attr; - - /* internally used */ - struct mg_host *host; -}; - -/* main structure for mflash driver */ -struct mg_host { - struct device *dev; - - struct request_queue *breq; - spinlock_t lock; - struct gendisk *gd; - - struct timer_list timer; - void (*mg_do_intr) (struct mg_host *); - - u16 id[ATA_ID_WORDS]; - - u16 cyls; - u16 heads; - u16 sectors; - u32 n_sectors; - u32 nres_sectors; - - void __iomem *dev_base; - unsigned int irq; - unsigned int rst; - unsigned int rstout; - - u32 major; - u32 error; -}; - -/* - * Debugging macro and defines - */ -#undef DO_MG_DEBUG -#ifdef DO_MG_DEBUG -# define MG_DBG(fmt, args...) \ - printk(KERN_DEBUG "%s:%d "fmt, __func__, __LINE__, ##args) -#else /* CONFIG_MG_DEBUG */ -# define MG_DBG(fmt, args...) do { } while (0) -#endif /* CONFIG_MG_DEBUG */ - -#endif -- cgit v0.10.2 From a03bb5a32fff4aa23f081a3cff7e98d4084104cd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 28 Apr 2009 13:06:15 +0900 Subject: mg_disk: clean up request completion paths mg_disk implements its own partial completion. Convert to standard block layer partial completion. [ Impact: cleanup ] Signed-off-by: Tejun Heo Cc: unsik Kim Signed-off-by: Jens Axboe diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c index 2c6127e..1a4cc96 100644 --- a/drivers/block/mg_disk.c +++ b/drivers/block/mg_disk.c @@ -503,95 +503,68 @@ static unsigned int mg_out(struct mg_host *host, static void mg_read(struct request *req) { - u32 remains, j; + u32 j; struct mg_host *host = req->rq_disk->private_data; - remains = req->nr_sectors; - if (mg_out(host, req->sector, req->nr_sectors, MG_CMD_RD, NULL) != MG_ERR_NONE) mg_bad_rw_intr(host); MG_DBG("requested %d sects (from %ld), buffer=0x%p\n", - remains, req->sector, req->buffer); + req->nr_sectors, req->sector, req->buffer); + + do { + u16 *buff = (u16 *)req->buffer; - while (remains) { if (mg_wait(host, MG_REG_STATUS_BIT_DATA_REQ, MG_TMAX_WAIT_RD_DRQ) != MG_ERR_NONE) { mg_bad_rw_intr(host); return; } - for (j = 0; j < MG_SECTOR_SIZE >> 1; j++) { - *(u16 *)req->buffer = - inw((unsigned long)host->dev_base + - MG_BUFF_OFFSET + (j << 1)); - req->buffer += 2; - } - - req->sector++; - req->errors = 0; - remains = --req->nr_sectors; - --req->current_nr_sectors; - - if (req->current_nr_sectors <= 0) { - MG_DBG("remain : %d sects\n", remains); - __blk_end_request_cur(req, 0); - if (remains > 0) - req = elv_next_request(host->breq); - } + for (j = 0; j < MG_SECTOR_SIZE >> 1; j++) + *buff++ = inw((unsigned long)host->dev_base + + MG_BUFF_OFFSET + (j << 1)); outb(MG_CMD_RD_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND); - } + } while (__blk_end_request(req, 0, MG_SECTOR_SIZE)); } static void mg_write(struct request *req) { - u32 remains, j; + u32 j; struct mg_host *host = req->rq_disk->private_data; - remains = req->nr_sectors; - if (mg_out(host, req->sector, req->nr_sectors, MG_CMD_WR, NULL) != MG_ERR_NONE) { mg_bad_rw_intr(host); return; } - MG_DBG("requested %d sects (from %ld), buffer=0x%p\n", - remains, req->sector, req->buffer); - while (remains) { + req->nr_sectors, req->sector, req->buffer); + + do { + u16 *buff = (u16 *)req->buffer; + if (mg_wait(host, MG_REG_STATUS_BIT_DATA_REQ, MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) { mg_bad_rw_intr(host); return; } - for (j = 0; j < MG_SECTOR_SIZE >> 1; j++) { - outw(*(u16 *)req->buffer, - (unsigned long)host->dev_base + - MG_BUFF_OFFSET + (j << 1)); - req->buffer += 2; - } - req->sector++; - remains = --req->nr_sectors; - --req->current_nr_sectors; - - if (req->current_nr_sectors <= 0) { - MG_DBG("remain : %d sects\n", remains); - __blk_end_request_cur(req, 0); - if (remains > 0) - req = elv_next_request(host->breq); - } + for (j = 0; j < MG_SECTOR_SIZE >> 1; j++) + outw(*buff++, (unsigned long)host->dev_base + + MG_BUFF_OFFSET + (j << 1)); outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND); - } + } while (__blk_end_request(req, 0, MG_SECTOR_SIZE)); } static void mg_read_intr(struct mg_host *host) { u32 i; + u16 *buff; struct request *req; /* check status */ @@ -612,39 +585,24 @@ static void mg_read_intr(struct mg_host *host) ok_to_read: /* get current segment of request */ req = elv_next_request(host->breq); + buff = (u16 *)req->buffer; /* read 1 sector */ - for (i = 0; i < MG_SECTOR_SIZE >> 1; i++) { - *(u16 *)req->buffer = - inw((unsigned long)host->dev_base + MG_BUFF_OFFSET + - (i << 1)); - req->buffer += 2; - } + for (i = 0; i < MG_SECTOR_SIZE >> 1; i++) + *buff++ = inw((unsigned long)host->dev_base + MG_BUFF_OFFSET + + (i << 1)); - /* manipulate request */ MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n", req->sector, req->nr_sectors - 1, req->buffer); - req->sector++; - req->errors = 0; - i = --req->nr_sectors; - --req->current_nr_sectors; - - /* let know if current segment done */ - if (req->current_nr_sectors <= 0) - __blk_end_request_cur(req, 0); - - /* set handler if read remains */ - if (i > 0) { - host->mg_do_intr = mg_read_intr; - mod_timer(&host->timer, jiffies + 3 * HZ); - } - /* send read confirm */ outb(MG_CMD_RD_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND); - /* goto next request */ - if (!i) + if (__blk_end_request(req, 0, MG_SECTOR_SIZE)) { + /* set handler if read remains */ + host->mg_do_intr = mg_read_intr; + mod_timer(&host->timer, jiffies + 3 * HZ); + } else /* goto next request */ mg_request(host->breq); } @@ -653,6 +611,7 @@ static void mg_write_intr(struct mg_host *host) u32 i, j; u16 *buff; struct request *req; + bool rem; /* get current segment of request */ req = elv_next_request(host->breq); @@ -673,18 +632,8 @@ static void mg_write_intr(struct mg_host *host) return; ok_to_write: - /* manipulate request */ - req->sector++; - i = --req->nr_sectors; - --req->current_nr_sectors; - req->buffer += MG_SECTOR_SIZE; - - /* let know if current segment or all done */ - if (!i || (req->bio && req->current_nr_sectors <= 0)) - __blk_end_request_cur(req, 0); - - /* write 1 sector and set handler if remains */ - if (i > 0) { + if ((rem = __blk_end_request(req, 0, MG_SECTOR_SIZE))) { + /* write 1 sector and set handler if remains */ buff = (u16 *)req->buffer; for (j = 0; j < MG_STORAGE_BUFFER_SIZE >> 1; j++) { outw(*buff, (unsigned long)host->dev_base + @@ -700,7 +649,7 @@ ok_to_write: /* send write confirm */ outb(MG_CMD_WR_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND); - if (!i) + if (!rem) mg_request(host->breq); } -- cgit v0.10.2 From 8a11a789c39cd6f61de4243234cf44a46c23ffd0 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 28 Apr 2009 13:06:16 +0900 Subject: mg_disk: fix dependency on libata Add local copies of ata_id_string() and ata_id_c_string() to mg_disk so there is no need for the driver to depend on ATA and SCSI. [ Impact: break dependency on libata by copying ata id string functions ] Cc: unsik Kim Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index ddea8e4..f42fa50 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -412,7 +412,7 @@ config ATA_OVER_ETH config MG_DISK tristate "mGine mflash, gflash support" - depends on ARM && ATA && GPIOLIB + depends on ARM && GPIOLIB help mGine mFlash(gFlash) block device driver diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c index 1a4cc96..8e53cae 100644 --- a/drivers/block/mg_disk.c +++ b/drivers/block/mg_disk.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include @@ -357,6 +357,42 @@ static irqreturn_t mg_irq(int irq, void *dev_id) return IRQ_HANDLED; } +/* local copy of ata_id_string() */ +static void mg_id_string(const u16 *id, unsigned char *s, + unsigned int ofs, unsigned int len) +{ + unsigned int c; + + BUG_ON(len & 1); + + while (len > 0) { + c = id[ofs] >> 8; + *s = c; + s++; + + c = id[ofs] & 0xff; + *s = c; + s++; + + ofs++; + len -= 2; + } +} + +/* local copy of ata_id_c_string() */ +static void mg_id_c_string(const u16 *id, unsigned char *s, + unsigned int ofs, unsigned int len) +{ + unsigned char *p; + + mg_id_string(id, s, ofs, len - 1); + + p = s + strnlen(s, len - 1); + while (p > s && p[-1] == ' ') + p--; + *p = '\0'; +} + static int mg_get_disk_id(struct mg_host *host) { u32 i; @@ -403,9 +439,9 @@ static int mg_get_disk_id(struct mg_host *host) host->n_sectors -= host->nres_sectors; } - ata_id_c_string(id, fwrev, ATA_ID_FW_REV, sizeof(fwrev)); - ata_id_c_string(id, model, ATA_ID_PROD, sizeof(model)); - ata_id_c_string(id, serial, ATA_ID_SERNO, sizeof(serial)); + mg_id_c_string(id, fwrev, ATA_ID_FW_REV, sizeof(fwrev)); + mg_id_c_string(id, model, ATA_ID_PROD, sizeof(model)); + mg_id_c_string(id, serial, ATA_ID_SERNO, sizeof(serial)); printk(KERN_INFO "mg_disk: model: %s\n", model); printk(KERN_INFO "mg_disk: firm: %.8s\n", fwrev); printk(KERN_INFO "mg_disk: serial: %s\n", serial); -- cgit v0.10.2 From f68adec3c7155a8bbf32a90cb4c4d0737df045d9 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 28 Apr 2009 13:06:17 +0900 Subject: mg_disk: use defines from While at it: - remove MG_REG_HEAD_MUST_BE_ON define - remove MG_REG_CTRL_INTR_ENABLE define - remove MG_REG_HEAD_LBA_MODE define - remove unused defines Cc: unsik Kim Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c index 8e53cae..71e56cc 100644 --- a/drivers/block/mg_disk.c +++ b/drivers/block/mg_disk.c @@ -51,51 +51,10 @@ #define MG_REG_DRV_CTRL (MG_REG_OFFSET + 0x10) #define MG_REG_BURST_CTRL (MG_REG_OFFSET + 0x12) -/* "Drive Select/Head Register" bit values */ -#define MG_REG_HEAD_MUST_BE_ON 0xA0 /* These 2 bits are always on */ -#define MG_REG_HEAD_DRIVE_MASTER (0x00 | MG_REG_HEAD_MUST_BE_ON) -#define MG_REG_HEAD_DRIVE_SLAVE (0x10 | MG_REG_HEAD_MUST_BE_ON) -#define MG_REG_HEAD_LBA_MODE (0x40 | MG_REG_HEAD_MUST_BE_ON) - - -/* "Device Control Register" bit values */ -#define MG_REG_CTRL_INTR_ENABLE 0x0 -#define MG_REG_CTRL_INTR_DISABLE (0x1<<1) -#define MG_REG_CTRL_RESET (0x1<<2) -#define MG_REG_CTRL_INTR_POLA_ACTIVE_HIGH 0x0 -#define MG_REG_CTRL_INTR_POLA_ACTIVE_LOW (0x1<<4) -#define MG_REG_CTRL_DPD_POLA_ACTIVE_LOW 0x0 -#define MG_REG_CTRL_DPD_POLA_ACTIVE_HIGH (0x1<<5) -#define MG_REG_CTRL_DPD_DISABLE 0x0 -#define MG_REG_CTRL_DPD_ENABLE (0x1<<6) - -/* Status register bit */ -/* error bit in status register */ -#define MG_REG_STATUS_BIT_ERROR 0x01 -/* corrected error in status register */ -#define MG_REG_STATUS_BIT_CORRECTED_ERROR 0x04 -/* data request bit in status register */ -#define MG_REG_STATUS_BIT_DATA_REQ 0x08 -/* DSC - Drive Seek Complete */ -#define MG_REG_STATUS_BIT_SEEK_DONE 0x10 -/* DWF - Drive Write Fault */ -#define MG_REG_STATUS_BIT_WRITE_FAULT 0x20 -#define MG_REG_STATUS_BIT_READY 0x40 -#define MG_REG_STATUS_BIT_BUSY 0x80 - /* handy status */ -#define MG_STAT_READY (MG_REG_STATUS_BIT_READY | MG_REG_STATUS_BIT_SEEK_DONE) -#define MG_READY_OK(s) (((s) & (MG_STAT_READY | \ - (MG_REG_STATUS_BIT_BUSY | \ - MG_REG_STATUS_BIT_WRITE_FAULT | \ - MG_REG_STATUS_BIT_ERROR))) == MG_STAT_READY) - -/* Error register */ -#define MG_REG_ERR_AMNF 0x01 -#define MG_REG_ERR_ABRT 0x04 -#define MG_REG_ERR_IDNF 0x10 -#define MG_REG_ERR_UNC 0x40 -#define MG_REG_ERR_BBK 0x80 +#define MG_STAT_READY (ATA_DRDY | ATA_DSC) +#define MG_READY_OK(s) (((s) & (MG_STAT_READY | (ATA_BUSY | ATA_DF | \ + ATA_ERR))) == MG_STAT_READY) /* error code for others */ #define MG_ERR_NONE 0 @@ -225,41 +184,39 @@ static void mg_dump_status(const char *msg, unsigned int stat, } printk(KERN_ERR "%s: %s: status=0x%02x { ", name, msg, stat & 0xff); - if (stat & MG_REG_STATUS_BIT_BUSY) + if (stat & ATA_BUSY) printk("Busy "); - if (stat & MG_REG_STATUS_BIT_READY) + if (stat & ATA_DRDY) printk("DriveReady "); - if (stat & MG_REG_STATUS_BIT_WRITE_FAULT) + if (stat & ATA_DF) printk("WriteFault "); - if (stat & MG_REG_STATUS_BIT_SEEK_DONE) + if (stat & ATA_DSC) printk("SeekComplete "); - if (stat & MG_REG_STATUS_BIT_DATA_REQ) + if (stat & ATA_DRQ) printk("DataRequest "); - if (stat & MG_REG_STATUS_BIT_CORRECTED_ERROR) + if (stat & ATA_CORR) printk("CorrectedError "); - if (stat & MG_REG_STATUS_BIT_ERROR) + if (stat & ATA_ERR) printk("Error "); printk("}\n"); - if ((stat & MG_REG_STATUS_BIT_ERROR) == 0) { + if ((stat & ATA_ERR) == 0) { host->error = 0; } else { host->error = inb((unsigned long)host->dev_base + MG_REG_ERROR); printk(KERN_ERR "%s: %s: error=0x%02x { ", name, msg, host->error & 0xff); - if (host->error & MG_REG_ERR_BBK) + if (host->error & ATA_BBK) printk("BadSector "); - if (host->error & MG_REG_ERR_UNC) + if (host->error & ATA_UNC) printk("UncorrectableError "); - if (host->error & MG_REG_ERR_IDNF) + if (host->error & ATA_IDNF) printk("SectorIdNotFound "); - if (host->error & MG_REG_ERR_ABRT) + if (host->error & ATA_ABORTED) printk("DriveStatusError "); - if (host->error & MG_REG_ERR_AMNF) + if (host->error & ATA_AMNF) printk("AddrMarkNotFound "); printk("}"); - if (host->error & - (MG_REG_ERR_BBK | MG_REG_ERR_UNC | - MG_REG_ERR_IDNF | MG_REG_ERR_AMNF)) { + if (host->error & (ATA_BBK | ATA_UNC | ATA_IDNF | ATA_AMNF)) { if (host->breq) { req = elv_next_request(host->breq); if (req) @@ -284,12 +241,12 @@ static unsigned int mg_wait(struct mg_host *host, u32 expect, u32 msec) do { cur_jiffies = jiffies; - if (status & MG_REG_STATUS_BIT_BUSY) { - if (expect == MG_REG_STATUS_BIT_BUSY) + if (status & ATA_BUSY) { + if (expect == ATA_BUSY) break; } else { /* Check the error condition! */ - if (status & MG_REG_STATUS_BIT_ERROR) { + if (status & ATA_ERR) { mg_dump_status("mg_wait", status, host); break; } @@ -298,8 +255,8 @@ static unsigned int mg_wait(struct mg_host *host, u32 expect, u32 msec) if (MG_READY_OK(status)) break; - if (expect == MG_REG_STATUS_BIT_DATA_REQ) - if (status & MG_REG_STATUS_BIT_DATA_REQ) + if (expect == ATA_DRQ) + if (status & ATA_DRQ) break; } if (!msec) { @@ -404,12 +361,10 @@ static int mg_get_disk_id(struct mg_host *host) char serial[ATA_ID_SERNO_LEN + 1]; if (!prv_data->use_polling) - outb(MG_REG_CTRL_INTR_DISABLE, - (unsigned long)host->dev_base + - MG_REG_DRV_CTRL); + outb(ATA_NIEN, (unsigned long)host->dev_base + MG_REG_DRV_CTRL); outb(MG_CMD_ID, (unsigned long)host->dev_base + MG_REG_COMMAND); - err = mg_wait(host, MG_REG_STATUS_BIT_DATA_REQ, MG_TMAX_WAIT_RD_DRQ); + err = mg_wait(host, ATA_DRQ, MG_TMAX_WAIT_RD_DRQ); if (err) return err; @@ -449,8 +404,7 @@ static int mg_get_disk_id(struct mg_host *host) host->n_sectors, host->nres_sectors); if (!prv_data->use_polling) - outb(MG_REG_CTRL_INTR_ENABLE, (unsigned long)host->dev_base + - MG_REG_DRV_CTRL); + outb(0, (unsigned long)host->dev_base + MG_REG_DRV_CTRL); return err; } @@ -464,7 +418,7 @@ static int mg_disk_init(struct mg_host *host) /* hdd rst low */ gpio_set_value(host->rst, 0); - err = mg_wait(host, MG_REG_STATUS_BIT_BUSY, MG_TMAX_RST_TO_BUSY); + err = mg_wait(host, ATA_BUSY, MG_TMAX_RST_TO_BUSY); if (err) return err; @@ -475,17 +429,14 @@ static int mg_disk_init(struct mg_host *host) return err; /* soft reset on */ - outb(MG_REG_CTRL_RESET | - (prv_data->use_polling ? MG_REG_CTRL_INTR_DISABLE : - MG_REG_CTRL_INTR_ENABLE), + outb(ATA_SRST | (prv_data->use_polling ? ATA_NIEN : 0), (unsigned long)host->dev_base + MG_REG_DRV_CTRL); - err = mg_wait(host, MG_REG_STATUS_BIT_BUSY, MG_TMAX_RST_TO_BUSY); + err = mg_wait(host, ATA_BUSY, MG_TMAX_RST_TO_BUSY); if (err) return err; /* soft reset off */ - outb(prv_data->use_polling ? MG_REG_CTRL_INTR_DISABLE : - MG_REG_CTRL_INTR_ENABLE, + outb(prv_data->use_polling ? ATA_NIEN : 0, (unsigned long)host->dev_base + MG_REG_DRV_CTRL); err = mg_wait(host, MG_STAT_READY, MG_TMAX_SWRST_TO_RDY); if (err) @@ -531,7 +482,7 @@ static unsigned int mg_out(struct mg_host *host, MG_REG_CYL_LOW); outb((u8)(sect_num >> 16), (unsigned long)host->dev_base + MG_REG_CYL_HIGH); - outb((u8)((sect_num >> 24) | MG_REG_HEAD_LBA_MODE), + outb((u8)((sect_num >> 24) | ATA_LBA | ATA_DEVICE_OBS), (unsigned long)host->dev_base + MG_REG_DRV_HEAD); outb(cmd, (unsigned long)host->dev_base + MG_REG_COMMAND); return MG_ERR_NONE; @@ -552,8 +503,8 @@ static void mg_read(struct request *req) do { u16 *buff = (u16 *)req->buffer; - if (mg_wait(host, MG_REG_STATUS_BIT_DATA_REQ, - MG_TMAX_WAIT_RD_DRQ) != MG_ERR_NONE) { + if (mg_wait(host, ATA_DRQ, + MG_TMAX_WAIT_RD_DRQ) != MG_ERR_NONE) { mg_bad_rw_intr(host); return; } @@ -583,8 +534,7 @@ static void mg_write(struct request *req) do { u16 *buff = (u16 *)req->buffer; - if (mg_wait(host, MG_REG_STATUS_BIT_DATA_REQ, - MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) { + if (mg_wait(host, ATA_DRQ, MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) { mg_bad_rw_intr(host); return; } @@ -606,11 +556,11 @@ static void mg_read_intr(struct mg_host *host) /* check status */ do { i = inb((unsigned long)host->dev_base + MG_REG_STATUS); - if (i & MG_REG_STATUS_BIT_BUSY) + if (i & ATA_BUSY) break; if (!MG_READY_OK(i)) break; - if (i & MG_REG_STATUS_BIT_DATA_REQ) + if (i & ATA_DRQ) goto ok_to_read; } while (0); mg_dump_status("mg_read_intr", i, host); @@ -655,11 +605,11 @@ static void mg_write_intr(struct mg_host *host) /* check status */ do { i = inb((unsigned long)host->dev_base + MG_REG_STATUS); - if (i & MG_REG_STATUS_BIT_BUSY) + if (i & ATA_BUSY) break; if (!MG_READY_OK(i)) break; - if ((req->nr_sectors <= 1) || (i & MG_REG_STATUS_BIT_DATA_REQ)) + if ((req->nr_sectors <= 1) || (i & ATA_DRQ)) goto ok_to_write; } while (0); mg_dump_status("mg_write_intr", i, host); @@ -752,18 +702,15 @@ static unsigned int mg_issue_req(struct request *req, break; case WRITE: /* TODO : handler */ - outb(MG_REG_CTRL_INTR_DISABLE, - (unsigned long)host->dev_base + - MG_REG_DRV_CTRL); + outb(ATA_NIEN, (unsigned long)host->dev_base + MG_REG_DRV_CTRL); if (mg_out(host, sect_num, sect_cnt, MG_CMD_WR, &mg_write_intr) != MG_ERR_NONE) { mg_bad_rw_intr(host); return host->error; } del_timer(&host->timer); - mg_wait(host, MG_REG_STATUS_BIT_DATA_REQ, MG_TMAX_WAIT_WR_DRQ); - outb(MG_REG_CTRL_INTR_ENABLE, (unsigned long)host->dev_base + - MG_REG_DRV_CTRL); + mg_wait(host, ATA_DRQ, MG_TMAX_WAIT_WR_DRQ); + outb(0, (unsigned long)host->dev_base + MG_REG_DRV_CTRL); if (host->error) { mg_bad_rw_intr(host); return host->error; @@ -849,9 +796,7 @@ static int mg_suspend(struct platform_device *plat_dev, pm_message_t state) return -EIO; if (!prv_data->use_polling) - outb(MG_REG_CTRL_INTR_DISABLE, - (unsigned long)host->dev_base + - MG_REG_DRV_CTRL); + outb(ATA_NIEN, (unsigned long)host->dev_base + MG_REG_DRV_CTRL); outb(MG_CMD_SLEEP, (unsigned long)host->dev_base + MG_REG_COMMAND); /* wait until mflash deep sleep */ @@ -859,9 +804,7 @@ static int mg_suspend(struct platform_device *plat_dev, pm_message_t state) if (mg_wait(host, MG_STAT_READY, MG_TMAX_CONF_TO_CMD)) { if (!prv_data->use_polling) - outb(MG_REG_CTRL_INTR_ENABLE, - (unsigned long)host->dev_base + - MG_REG_DRV_CTRL); + outb(0, (unsigned long)host->dev_base + MG_REG_DRV_CTRL); return -EIO; } @@ -884,8 +827,7 @@ static int mg_resume(struct platform_device *plat_dev) return -EIO; if (!prv_data->use_polling) - outb(MG_REG_CTRL_INTR_ENABLE, (unsigned long)host->dev_base + - MG_REG_DRV_CTRL); + outb(0, (unsigned long)host->dev_base + MG_REG_DRV_CTRL); return 0; } -- cgit v0.10.2 From 5b644c7a218702668d7b610994e7dcbc3d4705d3 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 28 Apr 2009 08:17:54 +0000 Subject: clocksource: improve sh_cmt clocksource overflow handling This patch improves the sh_cmt clocksource handling. Currently the counter value is ignored in the case of overflow. With this patch the overflow flag is read before and after reading the counter, removing any counter value and overflow flag mismatch issues. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c index c247564..d607ac2 100644 --- a/drivers/clocksource/sh_cmt.c +++ b/drivers/clocksource/sh_cmt.c @@ -111,16 +111,21 @@ static unsigned long sh_cmt_get_counter(struct sh_cmt_priv *p, int *has_wrapped) { unsigned long v1, v2, v3; + int o1, o2; + + o1 = sh_cmt_read(p, CMCSR) & p->overflow_bit; /* Make sure the timer value is stable. Stolen from acpi_pm.c */ do { + o2 = o1; v1 = sh_cmt_read(p, CMCNT); v2 = sh_cmt_read(p, CMCNT); v3 = sh_cmt_read(p, CMCNT); - } while (unlikely((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1) - || (v3 > v1 && v3 < v2))); + o1 = sh_cmt_read(p, CMCSR) & p->overflow_bit; + } while (unlikely((o1 != o2) || (v1 > v2 && v1 < v3) + || (v2 > v3 && v2 < v1) || (v3 > v1 && v3 < v2))); - *has_wrapped = sh_cmt_read(p, CMCSR) & p->overflow_bit; + *has_wrapped = o1; return v2; } @@ -394,7 +399,7 @@ static cycle_t sh_cmt_clocksource_read(struct clocksource *cs) raw = sh_cmt_get_counter(p, &has_wrapped); if (unlikely(has_wrapped)) - raw = p->match_value; + raw += p->match_value; spin_unlock_irqrestore(&p->lock, flags); return value + raw; -- cgit v0.10.2 From 8e0b842948156e3463879caed12b4ce51bed772e Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 28 Apr 2009 08:19:50 +0000 Subject: sh: setup timers in late_time_init() This patch moves the SuperH timer setup code from time_init() to late_time_init(). Good things about this change: - interrupts: they are enabled at late_time_init() - mm: regular kmalloc() can be used at late_time_init() Together with moving to late_time_init() this patch changes the sh_cmt driver to always allocate with kmalloc(). This simplifies the code a bit and also fixes section mismatches. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt diff --git a/arch/sh/kernel/time_32.c b/arch/sh/kernel/time_32.c index 109409f..a2458bf 100644 --- a/arch/sh/kernel/time_32.c +++ b/arch/sh/kernel/time_32.c @@ -237,21 +237,8 @@ unsigned long long sched_clock(void) } #endif -void __init time_init(void) +static void __init sh_late_time_init(void) { - if (board_time_init) - board_time_init(); - - clk_init(); - - rtc_sh_get_time(&xtime); - set_normalized_timespec(&wall_to_monotonic, - -xtime.tv_sec, -xtime.tv_nsec); - -#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST - local_timer_setup(smp_processor_id()); -#endif - /* * Make sure all compiled-in early timers register themselves. * Run probe() for one "earlytimer" device. @@ -270,3 +257,22 @@ void __init time_init(void) printk(KERN_INFO "Using %s for system timer\n", sys_timer->name); } + +void __init time_init(void) +{ + if (board_time_init) + board_time_init(); + + clk_init(); + + rtc_sh_get_time(&xtime); + set_normalized_timespec(&wall_to_monotonic, + -xtime.tv_sec, -xtime.tv_nsec); + +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST + local_timer_setup(smp_processor_id()); +#endif + + late_time_init = sh_late_time_init; +} + diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c index d607ac2..bf3e4c1 100644 --- a/drivers/clocksource/sh_cmt.c +++ b/drivers/clocksource/sh_cmt.c @@ -18,7 +18,6 @@ */ #include -#include #include #include #include @@ -645,11 +644,7 @@ static int __devinit sh_cmt_probe(struct platform_device *pdev) return 0; } - if (is_early_platform_device(pdev)) - p = alloc_bootmem(sizeof(*p)); - else - p = kmalloc(sizeof(*p), GFP_KERNEL); - + p = kmalloc(sizeof(*p), GFP_KERNEL); if (p == NULL) { dev_err(&pdev->dev, "failed to allocate driver data\n"); return -ENOMEM; @@ -657,11 +652,7 @@ static int __devinit sh_cmt_probe(struct platform_device *pdev) ret = sh_cmt_setup(p, pdev); if (ret) { - if (is_early_platform_device(pdev)) - free_bootmem(__pa(p), sizeof(*p)); - else - kfree(p); - + kfree(p); platform_set_drvdata(pdev, NULL); } return ret; -- cgit v0.10.2 From 9ec4fa271faf2db3b8e1419c998da1ca6b094eb6 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 27 Apr 2009 17:57:18 -0700 Subject: irq, cpumask: correct CPUMASKS_OFFSTACK typo and fix fallout CPUMASKS_OFFSTACK is not defined anywhere (it is CPUMASK_OFFSTACK). It is a typo and init_allocate_desc_masks() is called before it set affinity to all cpus... Split init_alloc_desc_masks() into all_desc_masks() and init_desc_masks(). Also use CPUMASK_OFFSTACK in alloc_desc_masks(). [ Impact: fix smp_affinity copying/setup when moving irq_desc between CPUs ] Signed-off-by: Yinghai Lu Acked-by: Rusty Russell Cc: Andrew Morton Cc: Suresh Siddha Cc: "Eric W. Biederman" LKML-Reference: <49F6546E.3040406@kernel.org> Signed-off-by: Ingo Molnar diff --git a/include/linux/irq.h b/include/linux/irq.h index b7cbeed..c4953cf 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -424,27 +424,25 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); #ifdef CONFIG_SMP /** - * init_alloc_desc_masks - allocate cpumasks for irq_desc + * alloc_desc_masks - allocate cpumasks for irq_desc * @desc: pointer to irq_desc struct * @cpu: cpu which will be handling the cpumasks * @boot: true if need bootmem * * Allocates affinity and pending_mask cpumask if required. * Returns true if successful (or not required). - * Side effect: affinity has all bits set, pending_mask has all bits clear. */ -static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu, +static inline bool alloc_desc_masks(struct irq_desc *desc, int cpu, bool boot) { +#ifdef CONFIG_CPUMASK_OFFSTACK int node; if (boot) { alloc_bootmem_cpumask_var(&desc->affinity); - cpumask_setall(desc->affinity); #ifdef CONFIG_GENERIC_PENDING_IRQ alloc_bootmem_cpumask_var(&desc->pending_mask); - cpumask_clear(desc->pending_mask); #endif return true; } @@ -453,18 +451,25 @@ static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu, if (!alloc_cpumask_var_node(&desc->affinity, GFP_ATOMIC, node)) return false; - cpumask_setall(desc->affinity); #ifdef CONFIG_GENERIC_PENDING_IRQ if (!alloc_cpumask_var_node(&desc->pending_mask, GFP_ATOMIC, node)) { free_cpumask_var(desc->affinity); return false; } - cpumask_clear(desc->pending_mask); +#endif #endif return true; } +static inline void init_desc_masks(struct irq_desc *desc) +{ + cpumask_setall(desc->affinity); +#ifdef CONFIG_GENERIC_PENDING_IRQ + cpumask_clear(desc->pending_mask); +#endif +} + /** * init_copy_desc_masks - copy cpumasks for irq_desc * @old_desc: pointer to old irq_desc struct @@ -478,7 +483,7 @@ static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu, static inline void init_copy_desc_masks(struct irq_desc *old_desc, struct irq_desc *new_desc) { -#ifdef CONFIG_CPUMASKS_OFFSTACK +#ifdef CONFIG_CPUMASK_OFFSTACK cpumask_copy(new_desc->affinity, old_desc->affinity); #ifdef CONFIG_GENERIC_PENDING_IRQ @@ -499,12 +504,16 @@ static inline void free_desc_masks(struct irq_desc *old_desc, #else /* !CONFIG_SMP */ -static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu, +static inline bool alloc_desc_masks(struct irq_desc *desc, int cpu, bool boot) { return true; } +static inline void init_desc_masks(struct irq_desc *desc) +{ +} + static inline void init_copy_desc_masks(struct irq_desc *old_desc, struct irq_desc *new_desc) { diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index d82142b..882c798 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -115,10 +115,11 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu) printk(KERN_ERR "can not alloc kstat_irqs\n"); BUG_ON(1); } - if (!init_alloc_desc_masks(desc, cpu, false)) { + if (!alloc_desc_masks(desc, cpu, false)) { printk(KERN_ERR "can not alloc irq_desc cpumasks\n"); BUG_ON(1); } + init_desc_masks(desc); arch_init_chip_data(desc, cpu); } @@ -169,7 +170,8 @@ int __init early_irq_init(void) desc[i].irq = i; desc[i].kstat_irqs = kstat_irqs_legacy + i * nr_cpu_ids; lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); - init_alloc_desc_masks(&desc[i], 0, true); + alloc_desc_masks(&desc[i], 0, true); + init_desc_masks(&desc[i]); irq_desc_ptrs[i] = desc + i; } @@ -256,7 +258,8 @@ int __init early_irq_init(void) for (i = 0; i < count; i++) { desc[i].irq = i; - init_alloc_desc_masks(&desc[i], 0, true); + alloc_desc_masks(&desc[i], 0, true); + init_desc_masks(&desc[i]); desc[i].kstat_irqs = kstat_irqs_all[i]; } return arch_early_irq_init(); diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c index 44bbdcb..5760d72 100644 --- a/kernel/irq/numa_migrate.c +++ b/kernel/irq/numa_migrate.c @@ -37,7 +37,7 @@ static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc, struct irq_desc *desc, int cpu) { memcpy(desc, old_desc, sizeof(struct irq_desc)); - if (!init_alloc_desc_masks(desc, cpu, false)) { + if (!alloc_desc_masks(desc, cpu, false)) { printk(KERN_ERR "irq %d: can not get new irq_desc cpumask " "for migration.\n", irq); return false; -- cgit v0.10.2 From fcef5911c7ea89b80d5bfc727f402f37c9eefd57 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 27 Apr 2009 17:58:23 -0700 Subject: x86/irq: remove leftover code from NUMA_MIGRATE_IRQ_DESC The original feature of migrating irq_desc dynamic was too fragile and was causing problems: it caused crashes on systems with lots of cards with MSI-X when user-space irq-balancer was enabled. We now have new patches that create irq_desc according to device numa node. This patch removes the leftover bits of the dynamic balancer. [ Impact: remove dead code ] Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: Suresh Siddha Cc: "Eric W. Biederman" Cc: Rusty Russell LKML-Reference: <49F654AF.8000808@kernel.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index df9e885..e1b2543 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -274,16 +274,6 @@ config SPARSE_IRQ If you don't know what to do here, say N. -config NUMA_MIGRATE_IRQ_DESC - bool "Move irq desc when changing irq smp_affinity" - depends on SPARSE_IRQ && NUMA - depends on BROKEN - default n - ---help--- - This enables moving irq_desc to cpu/node that irq will use handled. - - If you don't know what to do here, say N. - config X86_MPPARSE bool "Enable MPS table" if ACPI default y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 9fe5d21..27b8ce0 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -195,7 +195,6 @@ CONFIG_HIGH_RES_TIMERS=y CONFIG_GENERIC_CLOCKEVENTS_BUILD=y CONFIG_SMP=y CONFIG_SPARSE_IRQ=y -# CONFIG_NUMA_MIGRATE_IRQ_DESC is not set CONFIG_X86_FIND_SMP_CONFIG=y CONFIG_X86_MPPARSE=y # CONFIG_X86_ELAN is not set diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 30da617..9fbf0f7 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -148,9 +148,6 @@ struct irq_cfg { unsigned move_cleanup_count; u8 vector; u8 move_in_progress : 1; -#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC - u8 move_desc_pending : 1; -#endif }; /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ @@ -254,8 +251,7 @@ int arch_init_chip_data(struct irq_desc *desc, int cpu) return 0; } -#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC - +/* for move_irq_desc */ static void init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu) { @@ -356,19 +352,7 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) old_desc->chip_data = NULL; } } - -static void -set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask) -{ - struct irq_cfg *cfg = desc->chip_data; - - if (!cfg->move_in_progress) { - /* it means that domain is not changed */ - if (!cpumask_intersects(desc->affinity, mask)) - cfg->move_desc_pending = 1; - } -} -#endif +/* end for move_irq_desc */ #else static struct irq_cfg *irq_cfg(unsigned int irq) @@ -378,13 +362,6 @@ static struct irq_cfg *irq_cfg(unsigned int irq) #endif -#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC -static inline void -set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask) -{ -} -#endif - struct io_apic { unsigned int index; unsigned int unused[3]; @@ -592,9 +569,6 @@ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) if (assign_irq_vector(irq, cfg, mask)) return BAD_APICID; - /* check that before desc->addinity get updated */ - set_extra_move_desc(desc, mask); - cpumask_copy(desc->affinity, mask); return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain); @@ -2393,8 +2367,6 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) if (assign_irq_vector(irq, cfg, mask)) return; - set_extra_move_desc(desc, mask); - dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask); irte.vector = cfg->vector; @@ -2491,34 +2463,14 @@ static void irq_complete_move(struct irq_desc **descp) struct irq_cfg *cfg = desc->chip_data; unsigned vector, me; - if (likely(!cfg->move_in_progress)) { -#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC - if (likely(!cfg->move_desc_pending)) - return; - - /* domain has not changed, but affinity did */ - me = smp_processor_id(); - if (cpumask_test_cpu(me, desc->affinity)) { - *descp = desc = move_irq_desc(desc, me); - /* get the new one */ - cfg = desc->chip_data; - cfg->move_desc_pending = 0; - } -#endif + if (likely(!cfg->move_in_progress)) return; - } vector = ~get_irq_regs()->orig_ax; me = smp_processor_id(); - if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) { -#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC - *descp = desc = move_irq_desc(desc, me); - /* get the new one */ - cfg = desc->chip_data; -#endif + if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) send_cleanup_vector(cfg); - } } #else static inline void irq_complete_move(struct irq_desc **descp) {} diff --git a/include/linux/irq.h b/include/linux/irq.h index c4953cf..2a34cd6 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -212,16 +212,6 @@ extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu); extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu); -static inline struct irq_desc * -irq_remap_to_desc(unsigned int irq, struct irq_desc *desc) -{ -#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC - return irq_to_desc(irq); -#else - return desc; -#endif -} - /* * Migration helpers for obsolete names, they will go away: */ diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile index 3394f8f..2f06527 100644 --- a/kernel/irq/Makefile +++ b/kernel/irq/Makefile @@ -3,5 +3,5 @@ obj-y := handle.o manage.o spurious.o resend.o chip.o devres.o obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o -obj-$(CONFIG_NUMA_MIGRATE_IRQ_DESC) += numa_migrate.o +obj-$(CONFIG_SPARSE_IRQ) += numa_migrate.o obj-$(CONFIG_PM_SLEEP) += pm.o diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index c687ba4..13c68e7 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -359,7 +359,6 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc) spin_lock(&desc->lock); mask_ack_irq(desc, irq); - desc = irq_remap_to_desc(irq, desc); if (unlikely(desc->status & IRQ_INPROGRESS)) goto out_unlock; @@ -438,7 +437,6 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) desc->status &= ~IRQ_INPROGRESS; out: desc->chip->eoi(irq); - desc = irq_remap_to_desc(irq, desc); spin_unlock(&desc->lock); } @@ -475,7 +473,6 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc) !desc->action)) { desc->status |= (IRQ_PENDING | IRQ_MASKED); mask_ack_irq(desc, irq); - desc = irq_remap_to_desc(irq, desc); goto out_unlock; } kstat_incr_irqs_this_cpu(irq, desc); @@ -483,7 +480,6 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc) /* Start handling the irq */ if (desc->chip->ack) desc->chip->ack(irq); - desc = irq_remap_to_desc(irq, desc); /* Mark the IRQ currently in progress.*/ desc->status |= IRQ_INPROGRESS; @@ -544,10 +540,8 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc) if (!noirqdebug) note_interrupt(irq, desc, action_ret); - if (desc->chip->eoi) { + if (desc->chip->eoi) desc->chip->eoi(irq); - desc = irq_remap_to_desc(irq, desc); - } } void @@ -582,10 +576,8 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, /* Uninstall? */ if (handle == handle_bad_irq) { - if (desc->chip != &no_irq_chip) { + if (desc->chip != &no_irq_chip) mask_ack_irq(desc, irq); - desc = irq_remap_to_desc(irq, desc); - } desc->status |= IRQ_DISABLED; desc->depth = 1; } diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 882c798..3e0cbc4 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -458,11 +458,8 @@ unsigned int __do_IRQ(unsigned int irq) /* * No locking required for CPU-local interrupts: */ - if (desc->chip->ack) { + if (desc->chip->ack) desc->chip->ack(irq); - /* get new one */ - desc = irq_remap_to_desc(irq, desc); - } if (likely(!(desc->status & IRQ_DISABLED))) { action_ret = handle_IRQ_event(irq, desc->action); if (!noirqdebug) @@ -473,10 +470,8 @@ unsigned int __do_IRQ(unsigned int irq) } spin_lock(&desc->lock); - if (desc->chip->ack) { + if (desc->chip->ack) desc->chip->ack(irq); - desc = irq_remap_to_desc(irq, desc); - } /* * REPLAY is when Linux resends an IRQ that was dropped earlier * WAITING is used by probe to mark irqs that are being tested diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c index 5760d72..ce72bc3 100644 --- a/kernel/irq/numa_migrate.c +++ b/kernel/irq/numa_migrate.c @@ -97,9 +97,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, /* free the old one */ free_one_irq_desc(old_desc, desc); - spin_unlock(&old_desc->lock); kfree(old_desc); - spin_lock(&desc->lock); return desc; -- cgit v0.10.2 From d5dedd4507d307eb3f35f21b6e16f336fdc0d82a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 27 Apr 2009 17:59:21 -0700 Subject: irq: change ->set_affinity() to return status according to Ingo, change set_affinity() in irq_chip should return int, because that way we can handle failure cases in a much cleaner way, in the genirq layer. v2: fix two typos [ Impact: extend API ] Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: Suresh Siddha Cc: "Eric W. Biederman" Cc: Rusty Russell Cc: linux-arch@vger.kernel.org LKML-Reference: <49F654E9.4070809@kernel.org> Signed-off-by: Ingo Molnar diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c index 9c9d1fd..5bd5259 100644 --- a/arch/alpha/kernel/sys_dp264.c +++ b/arch/alpha/kernel/sys_dp264.c @@ -176,22 +176,26 @@ cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity) } } -static void +static int dp264_set_affinity(unsigned int irq, const struct cpumask *affinity) { spin_lock(&dp264_irq_lock); cpu_set_irq_affinity(irq, *affinity); tsunami_update_irq_hw(cached_irq_mask); spin_unlock(&dp264_irq_lock); + + return 0; } -static void +static int clipper_set_affinity(unsigned int irq, const struct cpumask *affinity) { spin_lock(&dp264_irq_lock); cpu_set_irq_affinity(irq - 16, *affinity); tsunami_update_irq_hw(cached_irq_mask); spin_unlock(&dp264_irq_lock); + + return 0; } static struct hw_interrupt_type dp264_irq_type = { diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c index 27f840a..8dd239e 100644 --- a/arch/alpha/kernel/sys_titan.c +++ b/arch/alpha/kernel/sys_titan.c @@ -157,13 +157,15 @@ titan_cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity) } -static void +static int titan_set_irq_affinity(unsigned int irq, const struct cpumask *affinity) { spin_lock(&titan_irq_lock); titan_cpu_set_irq_affinity(irq - 16, *affinity); titan_update_irq_hw(titan_cached_irq_mask); spin_unlock(&titan_irq_lock); + + return 0; } static void diff --git a/arch/arm/common/gic.c b/arch/arm/common/gic.c index c6884ba..90f6b7f 100644 --- a/arch/arm/common/gic.c +++ b/arch/arm/common/gic.c @@ -109,7 +109,7 @@ static void gic_unmask_irq(unsigned int irq) } #ifdef CONFIG_SMP -static void gic_set_cpu(unsigned int irq, const struct cpumask *mask_val) +static int gic_set_cpu(unsigned int irq, const struct cpumask *mask_val) { void __iomem *reg = gic_dist_base(irq) + GIC_DIST_TARGET + (gic_irq(irq) & ~3); unsigned int shift = (irq % 4) * 8; @@ -122,6 +122,8 @@ static void gic_set_cpu(unsigned int irq, const struct cpumask *mask_val) val |= 1 << (cpu + shift); writel(val, reg); spin_unlock(&irq_controller_lock); + + return 0; } #endif diff --git a/arch/cris/arch-v32/kernel/irq.c b/arch/cris/arch-v32/kernel/irq.c index df3925c..d70b445 100644 --- a/arch/cris/arch-v32/kernel/irq.c +++ b/arch/cris/arch-v32/kernel/irq.c @@ -325,12 +325,14 @@ static void end_crisv32_irq(unsigned int irq) { } -void set_affinity_crisv32_irq(unsigned int irq, const struct cpumask *dest) +int set_affinity_crisv32_irq(unsigned int irq, const struct cpumask *dest) { unsigned long flags; spin_lock_irqsave(&irq_lock, flags); irq_allocations[irq - FIRST_IRQ].mask = *dest; spin_unlock_irqrestore(&irq_lock, flags); + + return 0; } static struct irq_chip crisv32_irq_type = { diff --git a/arch/ia64/hp/sim/hpsim_irq.c b/arch/ia64/hp/sim/hpsim_irq.c index cc0a318..acb5047 100644 --- a/arch/ia64/hp/sim/hpsim_irq.c +++ b/arch/ia64/hp/sim/hpsim_irq.c @@ -21,9 +21,10 @@ hpsim_irq_noop (unsigned int irq) { } -static void +static int hpsim_set_affinity_noop(unsigned int a, const struct cpumask *b) { + return 0; } static struct hw_interrupt_type irq_type_hp_sim = { diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index 166e0d8..f92cef4 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -329,7 +329,7 @@ unmask_irq (unsigned int irq) } -static void +static int iosapic_set_affinity(unsigned int irq, const struct cpumask *mask) { #ifdef CONFIG_SMP @@ -343,15 +343,15 @@ iosapic_set_affinity(unsigned int irq, const struct cpumask *mask) cpu = cpumask_first_and(cpu_online_mask, mask); if (cpu >= nr_cpu_ids) - return; + return -1; if (irq_prepare_move(irq, cpu)) - return; + return -1; dest = cpu_physical_id(cpu); if (!iosapic_intr_info[irq].count) - return; /* not an IOSAPIC interrupt */ + return -1; /* not an IOSAPIC interrupt */ set_irq_affinity_info(irq, dest, redir); @@ -376,7 +376,9 @@ iosapic_set_affinity(unsigned int irq, const struct cpumask *mask) iosapic_write(iosapic, IOSAPIC_RTE_HIGH(rte_index), high32); iosapic_write(iosapic, IOSAPIC_RTE_LOW(rte_index), low32); } + #endif + return 0; } /* diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c index 2b15e23..0f8ade9 100644 --- a/arch/ia64/kernel/msi_ia64.c +++ b/arch/ia64/kernel/msi_ia64.c @@ -12,7 +12,7 @@ static struct irq_chip ia64_msi_chip; #ifdef CONFIG_SMP -static void ia64_set_msi_irq_affinity(unsigned int irq, +static int ia64_set_msi_irq_affinity(unsigned int irq, const cpumask_t *cpu_mask) { struct msi_msg msg; @@ -20,10 +20,10 @@ static void ia64_set_msi_irq_affinity(unsigned int irq, int cpu = first_cpu(*cpu_mask); if (!cpu_online(cpu)) - return; + return -1; if (irq_prepare_move(irq, cpu)) - return; + return -1; read_msi_msg(irq, &msg); @@ -39,6 +39,8 @@ static void ia64_set_msi_irq_affinity(unsigned int irq, write_msi_msg(irq, &msg); cpumask_copy(irq_desc[irq].affinity, cpumask_of(cpu)); + + return 0; } #endif /* CONFIG_SMP */ @@ -130,17 +132,17 @@ void arch_teardown_msi_irq(unsigned int irq) #ifdef CONFIG_DMAR #ifdef CONFIG_SMP -static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) +static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_cfg *cfg = irq_cfg + irq; struct msi_msg msg; int cpu = cpumask_first(mask); if (!cpu_online(cpu)) - return; + return -1; if (irq_prepare_move(irq, cpu)) - return; + return -1; dmar_msi_read(irq, &msg); @@ -151,6 +153,8 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) dmar_msi_write(irq, &msg); cpumask_copy(irq_desc[irq].affinity, mask); + + return 0; } #endif /* CONFIG_SMP */ diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c index 66fd705..764f26a 100644 --- a/arch/ia64/sn/kernel/irq.c +++ b/arch/ia64/sn/kernel/irq.c @@ -227,7 +227,7 @@ finish_up: return new_irq_info; } -static void sn_set_affinity_irq(unsigned int irq, const struct cpumask *mask) +static int sn_set_affinity_irq(unsigned int irq, const struct cpumask *mask) { struct sn_irq_info *sn_irq_info, *sn_irq_info_safe; nasid_t nasid; @@ -239,6 +239,8 @@ static void sn_set_affinity_irq(unsigned int irq, const struct cpumask *mask) list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe, sn_irq_lh[irq], list) (void)sn_retarget_vector(sn_irq_info, nasid, slice); + + return 0; } #ifdef CONFIG_SMP diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c index 81e4289..fbbfb97 100644 --- a/arch/ia64/sn/kernel/msi_sn.c +++ b/arch/ia64/sn/kernel/msi_sn.c @@ -151,7 +151,7 @@ int sn_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *entry) } #ifdef CONFIG_SMP -static void sn_set_msi_irq_affinity(unsigned int irq, +static int sn_set_msi_irq_affinity(unsigned int irq, const struct cpumask *cpu_mask) { struct msi_msg msg; @@ -168,7 +168,7 @@ static void sn_set_msi_irq_affinity(unsigned int irq, cpu = cpumask_first(cpu_mask); sn_irq_info = sn_msi_info[irq].sn_irq_info; if (sn_irq_info == NULL || sn_irq_info->irq_int_bit >= 0) - return; + return -1; /* * Release XIO resources for the old MSI PCI address @@ -189,7 +189,7 @@ static void sn_set_msi_irq_affinity(unsigned int irq, new_irq_info = sn_retarget_vector(sn_irq_info, nasid, slice); sn_msi_info[irq].sn_irq_info = new_irq_info; if (new_irq_info == NULL) - return; + return -1; /* * Map the xio address into bus space @@ -206,6 +206,8 @@ static void sn_set_msi_irq_affinity(unsigned int irq, write_msi_msg(irq, &msg); cpumask_copy(irq_desc[irq].affinity, cpu_mask); + + return 0; } #endif /* CONFIG_SMP */ diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index 1c19af8..d3a0c81 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -177,7 +177,7 @@ static void octeon_irq_ciu0_disable(unsigned int irq) } #ifdef CONFIG_SMP -static void octeon_irq_ciu0_set_affinity(unsigned int irq, const struct cpumask *dest) +static int octeon_irq_ciu0_set_affinity(unsigned int irq, const struct cpumask *dest) { int cpu; int bit = irq - OCTEON_IRQ_WORKQ0; /* Bit 0-63 of EN0 */ @@ -199,6 +199,8 @@ static void octeon_irq_ciu0_set_affinity(unsigned int irq, const struct cpumask */ cvmx_read_csr(CVMX_CIU_INTX_EN0(cvmx_get_core_num() * 2)); write_unlock(&octeon_irq_ciu0_rwlock); + + return 0; } #endif @@ -292,7 +294,7 @@ static void octeon_irq_ciu1_disable(unsigned int irq) } #ifdef CONFIG_SMP -static void octeon_irq_ciu1_set_affinity(unsigned int irq, const struct cpumask *dest) +static int octeon_irq_ciu1_set_affinity(unsigned int irq, const struct cpumask *dest) { int cpu; int bit = irq - OCTEON_IRQ_WDOG0; /* Bit 0-63 of EN1 */ @@ -315,6 +317,8 @@ static void octeon_irq_ciu1_set_affinity(unsigned int irq, const struct cpumask */ cvmx_read_csr(CVMX_CIU_INTX_EN1(cvmx_get_core_num() * 2 + 1)); write_unlock(&octeon_irq_ciu1_rwlock); + + return 0; } #endif diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h index 3214ade..4f1eed1 100644 --- a/arch/mips/include/asm/irq.h +++ b/arch/mips/include/asm/irq.h @@ -49,7 +49,7 @@ static inline void smtc_im_ack_irq(unsigned int irq) #ifdef CONFIG_MIPS_MT_SMTC_IRQAFF #include -extern void plat_set_irq_affinity(unsigned int irq, +extern int plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity); extern void smtc_forward_irq(unsigned int irq); diff --git a/arch/mips/kernel/irq-gic.c b/arch/mips/kernel/irq-gic.c index 87deb8f..3f43c2e 100644 --- a/arch/mips/kernel/irq-gic.c +++ b/arch/mips/kernel/irq-gic.c @@ -155,7 +155,7 @@ static void gic_unmask_irq(unsigned int irq) static DEFINE_SPINLOCK(gic_lock); -static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask) +static int gic_set_affinity(unsigned int irq, const struct cpumask *cpumask) { cpumask_t tmp = CPU_MASK_NONE; unsigned long flags; @@ -166,7 +166,7 @@ static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask) cpumask_and(&tmp, cpumask, cpu_online_mask); if (cpus_empty(tmp)) - return; + return -1; /* Assumption : cpumask refers to a single CPU */ spin_lock_irqsave(&gic_lock, flags); @@ -190,6 +190,7 @@ static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask) cpumask_copy(irq_desc[irq].affinity, cpumask); spin_unlock_irqrestore(&gic_lock, flags); + return 0; } #endif diff --git a/arch/mips/mti-malta/malta-smtc.c b/arch/mips/mti-malta/malta-smtc.c index 5ba3188..499ffe5 100644 --- a/arch/mips/mti-malta/malta-smtc.c +++ b/arch/mips/mti-malta/malta-smtc.c @@ -114,7 +114,7 @@ struct plat_smp_ops msmtc_smp_ops = { */ -void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity) +int plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity) { cpumask_t tmask; int cpu = 0; @@ -156,5 +156,7 @@ void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity) /* Do any generic SMTC IRQ affinity setup */ smtc_set_irq_affinity(irq, tmask); + + return 0; } #endif /* CONFIG_MIPS_MT_SMTC_IRQAFF */ diff --git a/arch/mips/sibyte/bcm1480/irq.c b/arch/mips/sibyte/bcm1480/irq.c index 352352b..4f256a1 100644 --- a/arch/mips/sibyte/bcm1480/irq.c +++ b/arch/mips/sibyte/bcm1480/irq.c @@ -50,7 +50,7 @@ static void enable_bcm1480_irq(unsigned int irq); static void disable_bcm1480_irq(unsigned int irq); static void ack_bcm1480_irq(unsigned int irq); #ifdef CONFIG_SMP -static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask); +static int bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask); #endif #ifdef CONFIG_PCI @@ -109,7 +109,7 @@ void bcm1480_unmask_irq(int cpu, int irq) } #ifdef CONFIG_SMP -static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask) +static int bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask) { int i = 0, old_cpu, cpu, int_on, k; u64 cur_ints; @@ -119,7 +119,7 @@ static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask) if (cpumask_weight(mask) != 1) { printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq); - return; + return -1; } i = cpumask_first(mask); @@ -155,6 +155,8 @@ static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask) } spin_unlock(&bcm1480_imr_lock); spin_unlock_irqrestore(&desc->lock, flags); + + return 0; } #endif diff --git a/arch/mips/sibyte/sb1250/irq.c b/arch/mips/sibyte/sb1250/irq.c index c08ff58..e389507 100644 --- a/arch/mips/sibyte/sb1250/irq.c +++ b/arch/mips/sibyte/sb1250/irq.c @@ -50,7 +50,7 @@ static void enable_sb1250_irq(unsigned int irq); static void disable_sb1250_irq(unsigned int irq); static void ack_sb1250_irq(unsigned int irq); #ifdef CONFIG_SMP -static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask); +static int sb1250_set_affinity(unsigned int irq, const struct cpumask *mask); #endif #ifdef CONFIG_SIBYTE_HAS_LDT @@ -103,7 +103,7 @@ void sb1250_unmask_irq(int cpu, int irq) } #ifdef CONFIG_SMP -static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask) +static int sb1250_set_affinity(unsigned int irq, const struct cpumask *mask) { int i = 0, old_cpu, cpu, int_on; u64 cur_ints; @@ -114,7 +114,7 @@ static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask) if (cpumask_weight(mask) > 1) { printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq); - return; + return -1; } /* Convert logical CPU to physical CPU */ @@ -146,6 +146,8 @@ static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask) } spin_unlock(&sb1250_imr_lock); spin_unlock_irqrestore(&desc->lock, flags); + + return 0; } #endif diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index 4ea4229..8007f1e 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c @@ -130,15 +130,17 @@ int cpu_check_affinity(unsigned int irq, const struct cpumask *dest) return cpu_dest; } -static void cpu_set_affinity_irq(unsigned int irq, const struct cpumask *dest) +static int cpu_set_affinity_irq(unsigned int irq, const struct cpumask *dest) { int cpu_dest; cpu_dest = cpu_check_affinity(irq, dest); if (cpu_dest < 0) - return; + return -1; cpumask_copy(&irq_desc[irq].affinity, dest); + + return 0; } #endif diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c index 80b5134..be3581a 100644 --- a/arch/powerpc/platforms/pseries/xics.c +++ b/arch/powerpc/platforms/pseries/xics.c @@ -333,7 +333,7 @@ static void xics_eoi_lpar(unsigned int virq) lpar_xirr_info_set((0xff << 24) | irq); } -static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask) +static int xics_set_affinity(unsigned int virq, const struct cpumask *cpumask) { unsigned int irq; int status; @@ -342,14 +342,14 @@ static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask) irq = (unsigned int)irq_map[virq].hwirq; if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS) - return; + return -1; status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq); if (status) { printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n", __func__, irq, status); - return; + return -1; } /* @@ -363,7 +363,7 @@ static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask) printk(KERN_WARNING "%s: No online cpus in the mask %s for irq %d\n", __func__, cpulist, virq); - return; + return -1; } status = rtas_call(ibm_set_xive, 3, 1, NULL, @@ -372,8 +372,10 @@ static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask) if (status) { printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n", __func__, irq, status); - return; + return -1; } + + return 0; } static struct irq_chip xics_pic_direct = { diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 21b9567..f4cbd15 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -807,7 +807,7 @@ static void mpic_end_ipi(unsigned int irq) #endif /* CONFIG_SMP */ -void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask) +int mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask) { struct mpic *mpic = mpic_from_irq(irq); unsigned int src = mpic_irq_to_hw(irq); @@ -824,6 +824,8 @@ void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask) mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION), mpic_physmask(cpus_addr(tmp)[0])); } + + return 0; } static unsigned int mpic_type_to_vecpri(struct mpic *mpic, unsigned int type) diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index 5deabe9..e5e78f9 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -318,10 +318,12 @@ static void sun4u_irq_enable(unsigned int virt_irq) } } -static void sun4u_set_affinity(unsigned int virt_irq, +static int sun4u_set_affinity(unsigned int virt_irq, const struct cpumask *mask) { sun4u_irq_enable(virt_irq); + + return 0; } /* Don't do anything. The desc->status check for IRQ_DISABLED in @@ -377,7 +379,7 @@ static void sun4v_irq_enable(unsigned int virt_irq) ino, err); } -static void sun4v_set_affinity(unsigned int virt_irq, +static int sun4v_set_affinity(unsigned int virt_irq, const struct cpumask *mask) { unsigned int ino = virt_irq_table[virt_irq].dev_ino; @@ -388,6 +390,8 @@ static void sun4v_set_affinity(unsigned int virt_irq, if (err != HV_EOK) printk(KERN_ERR "sun4v_intr_settarget(%x,%lu): " "err(%d)\n", ino, cpuid, err); + + return 0; } static void sun4v_irq_disable(unsigned int virt_irq) @@ -445,7 +449,7 @@ static void sun4v_virq_enable(unsigned int virt_irq) dev_handle, dev_ino, err); } -static void sun4v_virt_set_affinity(unsigned int virt_irq, +static int sun4v_virt_set_affinity(unsigned int virt_irq, const struct cpumask *mask) { unsigned long cpuid, dev_handle, dev_ino; @@ -461,6 +465,8 @@ static void sun4v_virt_set_affinity(unsigned int virt_irq, printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): " "err(%d)\n", dev_handle, dev_ino, cpuid, err); + + return 0; } static void sun4v_virq_disable(unsigned int virt_irq) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 9fbf0f7..5c7630b 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -574,13 +574,14 @@ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain); } -static void +static int set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) { struct irq_cfg *cfg; unsigned long flags; unsigned int dest; unsigned int irq; + int ret = -1; irq = desc->irq; cfg = desc->chip_data; @@ -591,18 +592,21 @@ set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) /* Only the high 8 bits are valid. */ dest = SET_APIC_LOGICAL_ID(dest); __target_IO_APIC_irq(irq, dest, cfg); + ret = 0; } spin_unlock_irqrestore(&ioapic_lock, flags); + + return ret; } -static void +static int set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc; desc = irq_to_desc(irq); - set_ioapic_affinity_irq_desc(desc, mask); + return set_ioapic_affinity_irq_desc(desc, mask); } #endif /* CONFIG_SMP */ @@ -2348,24 +2352,25 @@ static int ioapic_retrigger_irq(unsigned int irq) * Real vector that is used for interrupting cpu will be coming from * the interrupt-remapping table entry. */ -static void +static int migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) { struct irq_cfg *cfg; struct irte irte; unsigned int dest; unsigned int irq; + int ret = -1; if (!cpumask_intersects(mask, cpu_online_mask)) - return; + return ret; irq = desc->irq; if (get_irte(irq, &irte)) - return; + return ret; cfg = desc->chip_data; if (assign_irq_vector(irq, cfg, mask)) - return; + return ret; dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask); @@ -2381,27 +2386,30 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) send_cleanup_vector(cfg); cpumask_copy(desc->affinity, mask); + + return 0; } /* * Migrates the IRQ destination in the process context. */ -static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, +static int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) { - migrate_ioapic_irq_desc(desc, mask); + return migrate_ioapic_irq_desc(desc, mask); } -static void set_ir_ioapic_affinity_irq(unsigned int irq, +static int set_ir_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); - set_ir_ioapic_affinity_irq_desc(desc, mask); + return set_ir_ioapic_affinity_irq_desc(desc, mask); } #else -static inline void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, +static inline int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) { + return 0; } #endif @@ -3318,7 +3326,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms } #ifdef CONFIG_SMP -static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) +static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; @@ -3327,7 +3335,7 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) dest = set_desc_affinity(desc, mask); if (dest == BAD_APICID) - return; + return -1; cfg = desc->chip_data; @@ -3339,13 +3347,15 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); write_msi_msg_desc(desc, &msg); + + return 0; } #ifdef CONFIG_INTR_REMAP /* * Migrate the MSI irq to another cpumask. This migration is * done in the process context using interrupt-remapping hardware. */ -static void +static int ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); @@ -3354,11 +3364,11 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) struct irte irte; if (get_irte(irq, &irte)) - return; + return -1; dest = set_desc_affinity(desc, mask); if (dest == BAD_APICID) - return; + return -1; irte.vector = cfg->vector; irte.dest_id = IRTE_DEST(dest); @@ -3375,6 +3385,8 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) */ if (cfg->move_in_progress) send_cleanup_vector(cfg); + + return 0; } #endif @@ -3528,7 +3540,7 @@ void arch_teardown_msi_irq(unsigned int irq) #if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP) #ifdef CONFIG_SMP -static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) +static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; @@ -3537,7 +3549,7 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) dest = set_desc_affinity(desc, mask); if (dest == BAD_APICID) - return; + return -1; cfg = desc->chip_data; @@ -3549,6 +3561,8 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); dmar_msi_write(irq, &msg); + + return 0; } #endif /* CONFIG_SMP */ @@ -3582,7 +3596,7 @@ int arch_setup_dmar_msi(unsigned int irq) #ifdef CONFIG_HPET_TIMER #ifdef CONFIG_SMP -static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) +static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; @@ -3591,7 +3605,7 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) dest = set_desc_affinity(desc, mask); if (dest == BAD_APICID) - return; + return -1; cfg = desc->chip_data; @@ -3603,6 +3617,8 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); hpet_msi_write(irq, &msg); + + return 0; } #endif /* CONFIG_SMP */ @@ -3659,7 +3675,7 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) write_ht_irq_msg(irq, &msg); } -static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) +static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) { struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; @@ -3667,11 +3683,13 @@ static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) dest = set_desc_affinity(desc, mask); if (dest == BAD_APICID) - return; + return -1; cfg = desc->chip_data; target_ht_irq(irq, dest, cfg->vector); + + return 0; } #endif diff --git a/drivers/parisc/iosapic.c b/drivers/parisc/iosapic.c index 73348c4..4a9cc92 100644 --- a/drivers/parisc/iosapic.c +++ b/drivers/parisc/iosapic.c @@ -702,7 +702,7 @@ static unsigned int iosapic_startup_irq(unsigned int irq) } #ifdef CONFIG_SMP -static void iosapic_set_affinity_irq(unsigned int irq, +static int iosapic_set_affinity_irq(unsigned int irq, const struct cpumask *dest) { struct vector_info *vi = iosapic_get_vector(irq); @@ -712,7 +712,7 @@ static void iosapic_set_affinity_irq(unsigned int irq, dest_cpu = cpu_check_affinity(irq, dest); if (dest_cpu < 0) - return; + return -1; cpumask_copy(irq_desc[irq].affinity, cpumask_of(dest_cpu)); vi->txn_addr = txn_affinity_addr(irq, dest_cpu); @@ -724,6 +724,8 @@ static void iosapic_set_affinity_irq(unsigned int irq, iosapic_set_irt_data(vi, &dummy_d0, &d1); iosapic_wr_irt_entry(vi, d0, d1); spin_unlock_irqrestore(&iosapic_lock, flags); + + return 0; } #endif diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 30963af..3338988 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -688,13 +688,13 @@ void rebind_evtchn_irq(int evtchn, int irq) } /* Rebind an evtchn so that it gets delivered to a specific cpu */ -static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu) +static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu) { struct evtchn_bind_vcpu bind_vcpu; int evtchn = evtchn_from_irq(irq); if (!VALID_EVTCHN(evtchn)) - return; + return -1; /* Send future instances of this interrupt to other vcpu. */ bind_vcpu.port = evtchn; @@ -707,13 +707,15 @@ static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu) */ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0) bind_evtchn_to_cpu(evtchn, tcpu); -} + return 0; +} -static void set_affinity_irq(unsigned irq, const struct cpumask *dest) +static int set_affinity_irq(unsigned irq, const struct cpumask *dest) { unsigned tcpu = cpumask_first(dest); - rebind_irq_to_cpu(irq, tcpu); + + return rebind_irq_to_cpu(irq, tcpu); } int resend_irq_on_evtchn(unsigned int irq) diff --git a/include/linux/irq.h b/include/linux/irq.h index 2a34cd6..8e4c18b 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -117,7 +117,7 @@ struct irq_chip { void (*eoi)(unsigned int irq); void (*end)(unsigned int irq); - void (*set_affinity)(unsigned int irq, + int (*set_affinity)(unsigned int irq, const struct cpumask *dest); int (*retrigger)(unsigned int irq); int (*set_type)(unsigned int irq, unsigned int flow_type); -- cgit v0.10.2 From 57b150cce8e004ddd36330490a68bfb59b7271e9 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 27 Apr 2009 17:59:53 -0700 Subject: irq: only update affinity if ->set_affinity() is sucessfull irq_set_affinity() and move_masked_irq() try to assign affinity before calling chip set_affinity(). Some archs are assigning it in ->set_affinity() again. We do something like: cpumask_cpy(desc->affinity, mask); desc->chip->set_affinity(mask); But in the failure path, affinity should not be touched - otherwise we'll end up with a different affinity mask despite the failure to migrate the IRQ. So try to update the afffinity only if set_affinity returns with 0. Also call irq_set_thread_affinity accordingly. v2: update after "irq, x86: Remove IRQ_DISABLED check in process context IRQ move" v3: according to Ingo, change set_affinity() in irq_chip should return int. v4: update comments by removing moving irq_desc code. [ Impact: fix /proc/irq/*/smp_affinity setting corner case bug ] Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: Suresh Siddha Cc: "Eric W. Biederman" Cc: Rusty Russell LKML-Reference: <49F65509.60307@kernel.org> Signed-off-by: Ingo Molnar diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 01ce20e..de5f412 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -42,6 +42,9 @@ static inline void unregister_handler_proc(unsigned int irq, extern int irq_select_affinity_usr(unsigned int irq); +extern void +irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask); + /* * Debugging printout: */ diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 2734eca..aaf5c9d 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -80,7 +80,7 @@ int irq_can_set_affinity(unsigned int irq) return 1; } -static void +void irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask) { struct irqaction *action = desc->action; @@ -109,17 +109,22 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) spin_lock_irqsave(&desc->lock, flags); #ifdef CONFIG_GENERIC_PENDING_IRQ - if (desc->status & IRQ_MOVE_PCNTXT) - desc->chip->set_affinity(irq, cpumask); + if (desc->status & IRQ_MOVE_PCNTXT) { + if (!desc->chip->set_affinity(irq, cpumask)) { + cpumask_copy(desc->affinity, cpumask); + irq_set_thread_affinity(desc, cpumask); + } + } else { desc->status |= IRQ_MOVE_PENDING; cpumask_copy(desc->pending_mask, cpumask); } #else - cpumask_copy(desc->affinity, cpumask); - desc->chip->set_affinity(irq, cpumask); + if (!desc->chip->set_affinity(irq, cpumask)) { + cpumask_copy(desc->affinity, cpumask); + irq_set_thread_affinity(desc, cpumask); + } #endif - irq_set_thread_affinity(desc, cpumask); desc->status |= IRQ_AFFINITY_SET; spin_unlock_irqrestore(&desc->lock, flags); return 0; diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index e05ad9b..cfe767c 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c @@ -1,5 +1,8 @@ #include +#include + +#include "internals.h" void move_masked_irq(int irq) { @@ -39,11 +42,12 @@ void move_masked_irq(int irq) * masking the irqs. */ if (likely(cpumask_any_and(desc->pending_mask, cpu_online_mask) - < nr_cpu_ids)) { - cpumask_and(desc->affinity, - desc->pending_mask, cpu_online_mask); - desc->chip->set_affinity(irq, desc->affinity); - } + < nr_cpu_ids)) + if (!desc->chip->set_affinity(irq, desc->pending_mask)) { + cpumask_copy(desc->affinity, desc->pending_mask); + irq_set_thread_affinity(desc, desc->pending_mask); + } + cpumask_clear(desc->pending_mask); } -- cgit v0.10.2 From 85ac16d033370caf6f48d743c8dc8103700f5cc5 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 27 Apr 2009 18:00:38 -0700 Subject: x86/irq: change irq_desc_alloc() to take node instead of cpu This simplifies the node awareness of the code. All our allocators only deal with a NUMA node ID locality not with CPU ids anyway - so there's no need to maintain (and transform) a CPU id all across the IRq layer. v2: keep move_irq_desc related [ Impact: cleanup, prepare IRQ code to be NUMA-aware ] Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: Suresh Siddha Cc: "Eric W. Biederman" Cc: Rusty Russell Cc: Jeremy Fitzhardinge LKML-Reference: <49F65536.2020300@kernel.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 5c7630b..560b887 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -129,12 +129,9 @@ struct irq_pin_list { struct irq_pin_list *next; }; -static struct irq_pin_list *get_one_free_irq_2_pin(int cpu) +static struct irq_pin_list *get_one_free_irq_2_pin(int node) { struct irq_pin_list *pin; - int node; - - node = cpu_to_node(cpu); pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node); @@ -209,12 +206,9 @@ static struct irq_cfg *irq_cfg(unsigned int irq) return cfg; } -static struct irq_cfg *get_one_free_irq_cfg(int cpu) +static struct irq_cfg *get_one_free_irq_cfg(int node) { struct irq_cfg *cfg; - int node; - - node = cpu_to_node(cpu); cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); if (cfg) { @@ -235,13 +229,13 @@ static struct irq_cfg *get_one_free_irq_cfg(int cpu) return cfg; } -int arch_init_chip_data(struct irq_desc *desc, int cpu) +int arch_init_chip_data(struct irq_desc *desc, int node) { struct irq_cfg *cfg; cfg = desc->chip_data; if (!cfg) { - desc->chip_data = get_one_free_irq_cfg(cpu); + desc->chip_data = get_one_free_irq_cfg(node); if (!desc->chip_data) { printk(KERN_ERR "can not alloc irq_cfg\n"); BUG_ON(1); @@ -253,7 +247,7 @@ int arch_init_chip_data(struct irq_desc *desc, int cpu) /* for move_irq_desc */ static void -init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu) +init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int node) { struct irq_pin_list *old_entry, *head, *tail, *entry; @@ -262,7 +256,7 @@ init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu) if (!old_entry) return; - entry = get_one_free_irq_2_pin(cpu); + entry = get_one_free_irq_2_pin(node); if (!entry) return; @@ -272,7 +266,7 @@ init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu) tail = entry; old_entry = old_entry->next; while (old_entry) { - entry = get_one_free_irq_2_pin(cpu); + entry = get_one_free_irq_2_pin(node); if (!entry) { entry = head; while (entry) { @@ -312,12 +306,12 @@ static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg) } void arch_init_copy_chip_data(struct irq_desc *old_desc, - struct irq_desc *desc, int cpu) + struct irq_desc *desc, int node) { struct irq_cfg *cfg; struct irq_cfg *old_cfg; - cfg = get_one_free_irq_cfg(cpu); + cfg = get_one_free_irq_cfg(node); if (!cfg) return; @@ -328,7 +322,7 @@ void arch_init_copy_chip_data(struct irq_desc *old_desc, memcpy(cfg, old_cfg, sizeof(struct irq_cfg)); - init_copy_irq_2_pin(old_cfg, cfg, cpu); + init_copy_irq_2_pin(old_cfg, cfg, node); } static void free_irq_cfg(struct irq_cfg *old_cfg) @@ -615,13 +609,13 @@ set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask) * shared ISA-space IRQs, so we have to support them. We are super * fast in the common case, and fast for shared ISA-space IRQs. */ -static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin) +static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) { struct irq_pin_list *entry; entry = cfg->irq_2_pin; if (!entry) { - entry = get_one_free_irq_2_pin(cpu); + entry = get_one_free_irq_2_pin(node); if (!entry) { printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n", apic, pin); @@ -641,7 +635,7 @@ static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin) entry = entry->next; } - entry->next = get_one_free_irq_2_pin(cpu); + entry->next = get_one_free_irq_2_pin(node); entry = entry->next; entry->apic = apic; entry->pin = pin; @@ -650,7 +644,7 @@ static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin) /* * Reroute an IRQ to a different pin. */ -static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu, +static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node, int oldapic, int oldpin, int newapic, int newpin) { @@ -670,7 +664,7 @@ static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu, /* why? call replace before add? */ if (!replaced) - add_pin_to_irq_cpu(cfg, cpu, newapic, newpin); + add_pin_to_irq_node(cfg, node, newapic, newpin); } static inline void io_apic_modify_irq(struct irq_cfg *cfg, @@ -1612,7 +1606,7 @@ static void __init setup_IO_APIC_irqs(void) int notcon = 0; struct irq_desc *desc; struct irq_cfg *cfg; - int cpu = boot_cpu_id; + int node = cpu_to_node(boot_cpu_id); apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); @@ -1647,13 +1641,13 @@ static void __init setup_IO_APIC_irqs(void) apic->multi_timer_check(apic_id, irq)) continue; - desc = irq_to_desc_alloc_cpu(irq, cpu); + desc = irq_to_desc_alloc_node(irq, node); if (!desc) { printk(KERN_INFO "can not get irq_desc for %d\n", irq); continue; } cfg = desc->chip_data; - add_pin_to_irq_cpu(cfg, cpu, apic_id, pin); + add_pin_to_irq_node(cfg, node, apic_id, pin); setup_IO_APIC_irq(apic_id, pin, irq, desc, irq_trigger(idx), irq_polarity(idx)); @@ -2863,7 +2857,7 @@ static inline void __init check_timer(void) { struct irq_desc *desc = irq_to_desc(0); struct irq_cfg *cfg = desc->chip_data; - int cpu = boot_cpu_id; + int node = cpu_to_node(boot_cpu_id); int apic1, pin1, apic2, pin2; unsigned long flags; int no_pin1 = 0; @@ -2929,7 +2923,7 @@ static inline void __init check_timer(void) * Ok, does IRQ0 through the IOAPIC work? */ if (no_pin1) { - add_pin_to_irq_cpu(cfg, cpu, apic1, pin1); + add_pin_to_irq_node(cfg, node, apic1, pin1); setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); } else { /* for edge trigger, setup_IO_APIC_irq already @@ -2966,7 +2960,7 @@ static inline void __init check_timer(void) /* * legacy devices should be connected to IO APIC #0 */ - replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2); + replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2); setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); enable_8259A_irq(0); if (timer_irq_works()) { @@ -3185,7 +3179,7 @@ unsigned int create_irq_nr(unsigned int irq_want) unsigned int new; unsigned long flags; struct irq_cfg *cfg_new = NULL; - int cpu = boot_cpu_id; + int node = cpu_to_node(boot_cpu_id); struct irq_desc *desc_new = NULL; irq = 0; @@ -3194,7 +3188,7 @@ unsigned int create_irq_nr(unsigned int irq_want) spin_lock_irqsave(&vector_lock, flags); for (new = irq_want; new < nr_irqs; new++) { - desc_new = irq_to_desc_alloc_cpu(new, cpu); + desc_new = irq_to_desc_alloc_node(new, node); if (!desc_new) { printk(KERN_INFO "can not get irq_desc for %d\n", new); continue; @@ -3968,7 +3962,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int p { struct irq_desc *desc; struct irq_cfg *cfg; - int cpu = boot_cpu_id; + int node = cpu_to_node(boot_cpu_id); if (!IO_APIC_IRQ(irq)) { apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", @@ -3976,7 +3970,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int p return -EINVAL; } - desc = irq_to_desc_alloc_cpu(irq, cpu); + desc = irq_to_desc_alloc_node(irq, node); if (!desc) { printk(KERN_INFO "can not get irq_desc %d\n", irq); return 0; @@ -3987,7 +3981,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int p */ if (irq >= NR_IRQS_LEGACY) { cfg = desc->chip_data; - add_pin_to_irq_cpu(cfg, cpu, ioapic, pin); + add_pin_to_irq_node(cfg, node, ioapic, pin); } setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity); diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index ca7ec44..45acbcf 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -636,7 +636,7 @@ static void __init lguest_init_IRQ(void) void lguest_setup_irq(unsigned int irq) { - irq_to_desc_alloc_cpu(irq, 0); + irq_to_desc_alloc_node(irq, 0); set_irq_chip_and_handler_name(irq, &lguest_irq_controller, handle_level_irq, "level"); } diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index f5e0ea7..9eff36a 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -23,15 +23,12 @@ struct irq_2_iommu { }; #ifdef CONFIG_GENERIC_HARDIRQS -static struct irq_2_iommu *get_one_free_irq_2_iommu(int cpu) +static struct irq_2_iommu *get_one_free_irq_2_iommu(int node) { struct irq_2_iommu *iommu; - int node; - - node = cpu_to_node(cpu); iommu = kzalloc_node(sizeof(*iommu), GFP_ATOMIC, node); - printk(KERN_DEBUG "alloc irq_2_iommu on cpu %d node %d\n", cpu, node); + printk(KERN_DEBUG "alloc irq_2_iommu on node %d\n", node); return iommu; } @@ -48,7 +45,7 @@ static struct irq_2_iommu *irq_2_iommu(unsigned int irq) return desc->irq_2_iommu; } -static struct irq_2_iommu *irq_2_iommu_alloc_cpu(unsigned int irq, int cpu) +static struct irq_2_iommu *irq_2_iommu_alloc_node(unsigned int irq, int node) { struct irq_desc *desc; struct irq_2_iommu *irq_iommu; @@ -56,7 +53,7 @@ static struct irq_2_iommu *irq_2_iommu_alloc_cpu(unsigned int irq, int cpu) /* * alloc irq desc if not allocated already. */ - desc = irq_to_desc_alloc_cpu(irq, cpu); + desc = irq_to_desc_alloc_node(irq, node); if (!desc) { printk(KERN_INFO "can not get irq_desc for %d\n", irq); return NULL; @@ -65,14 +62,14 @@ static struct irq_2_iommu *irq_2_iommu_alloc_cpu(unsigned int irq, int cpu) irq_iommu = desc->irq_2_iommu; if (!irq_iommu) - desc->irq_2_iommu = get_one_free_irq_2_iommu(cpu); + desc->irq_2_iommu = get_one_free_irq_2_iommu(node); return desc->irq_2_iommu; } static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq) { - return irq_2_iommu_alloc_cpu(irq, boot_cpu_id); + return irq_2_iommu_alloc_node(irq, cpu_to_node(boot_cpu_id)); } #else /* !CONFIG_SPARSE_IRQ */ diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 3338988..be437c2 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -335,7 +335,7 @@ static int find_unbound_irq(void) if (irq == nr_irqs) panic("No available IRQ to bind to: increase nr_irqs!\n"); - desc = irq_to_desc_alloc_cpu(irq, 0); + desc = irq_to_desc_alloc_node(irq, 0); if (WARN_ON(desc == NULL)) return -1; diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 91bb76f..ff374ce 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -566,6 +566,6 @@ struct irq_desc; extern int early_irq_init(void); extern int arch_probe_nr_irqs(void); extern int arch_early_irq_init(void); -extern int arch_init_chip_data(struct irq_desc *desc, int cpu); +extern int arch_init_chip_data(struct irq_desc *desc, int node); #endif diff --git a/include/linux/irq.h b/include/linux/irq.h index 8e4c18b..a09baf8 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -187,7 +187,7 @@ struct irq_desc { spinlock_t lock; #ifdef CONFIG_SMP cpumask_var_t affinity; - unsigned int cpu; + unsigned int node; #ifdef CONFIG_GENERIC_PENDING_IRQ cpumask_var_t pending_mask; #endif @@ -201,16 +201,16 @@ struct irq_desc { } ____cacheline_internodealigned_in_smp; extern void arch_init_copy_chip_data(struct irq_desc *old_desc, - struct irq_desc *desc, int cpu); + struct irq_desc *desc, int node); extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc); #ifndef CONFIG_SPARSE_IRQ extern struct irq_desc irq_desc[NR_IRQS]; #else /* CONFIG_SPARSE_IRQ */ -extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu); +extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int node); #endif /* CONFIG_SPARSE_IRQ */ -extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu); +extern struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node); /* * Migration helpers for obsolete names, they will go away: @@ -422,12 +422,10 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); * Allocates affinity and pending_mask cpumask if required. * Returns true if successful (or not required). */ -static inline bool alloc_desc_masks(struct irq_desc *desc, int cpu, +static inline bool alloc_desc_masks(struct irq_desc *desc, int node, bool boot) { #ifdef CONFIG_CPUMASK_OFFSTACK - int node; - if (boot) { alloc_bootmem_cpumask_var(&desc->affinity); @@ -437,8 +435,6 @@ static inline bool alloc_desc_masks(struct irq_desc *desc, int cpu, return true; } - node = cpu_to_node(cpu); - if (!alloc_cpumask_var_node(&desc->affinity, GFP_ATOMIC, node)) return false; @@ -494,7 +490,7 @@ static inline void free_desc_masks(struct irq_desc *old_desc, #else /* !CONFIG_SMP */ -static inline bool alloc_desc_masks(struct irq_desc *desc, int cpu, +static inline bool alloc_desc_masks(struct irq_desc *desc, int node, bool boot) { return true; diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 3e0cbc4..a6368db 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -81,12 +81,10 @@ static struct irq_desc irq_desc_init = { .lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock), }; -void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr) +void init_kstat_irqs(struct irq_desc *desc, int node, int nr) { - int node; void *ptr; - node = cpu_to_node(cpu); ptr = kzalloc_node(nr * sizeof(*desc->kstat_irqs), GFP_ATOMIC, node); /* @@ -94,33 +92,32 @@ void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr) * init_copy_kstat_irqs() could still use old one */ if (ptr) { - printk(KERN_DEBUG " alloc kstat_irqs on cpu %d node %d\n", - cpu, node); + printk(KERN_DEBUG " alloc kstat_irqs on node %d\n", node); desc->kstat_irqs = ptr; } } -static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu) +static void init_one_irq_desc(int irq, struct irq_desc *desc, int node) { memcpy(desc, &irq_desc_init, sizeof(struct irq_desc)); spin_lock_init(&desc->lock); desc->irq = irq; #ifdef CONFIG_SMP - desc->cpu = cpu; + desc->node = node; #endif lockdep_set_class(&desc->lock, &irq_desc_lock_class); - init_kstat_irqs(desc, cpu, nr_cpu_ids); + init_kstat_irqs(desc, node, nr_cpu_ids); if (!desc->kstat_irqs) { printk(KERN_ERR "can not alloc kstat_irqs\n"); BUG_ON(1); } - if (!alloc_desc_masks(desc, cpu, false)) { + if (!alloc_desc_masks(desc, node, false)) { printk(KERN_ERR "can not alloc irq_desc cpumasks\n"); BUG_ON(1); } init_desc_masks(desc); - arch_init_chip_data(desc, cpu); + arch_init_chip_data(desc, node); } /* @@ -189,11 +186,10 @@ struct irq_desc *irq_to_desc(unsigned int irq) return NULL; } -struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) +struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node) { struct irq_desc *desc; unsigned long flags; - int node; if (irq >= nr_irqs) { WARN(1, "irq (%d) >= nr_irqs (%d) in irq_to_desc_alloc\n", @@ -212,15 +208,13 @@ struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) if (desc) goto out_unlock; - node = cpu_to_node(cpu); desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); - printk(KERN_DEBUG " alloc irq_desc for %d on cpu %d node %d\n", - irq, cpu, node); + printk(KERN_DEBUG " alloc irq_desc for %d on node %d\n", irq, node); if (!desc) { printk(KERN_ERR "can not alloc irq_desc\n"); BUG_ON(1); } - init_one_irq_desc(irq, desc, cpu); + init_one_irq_desc(irq, desc, node); irq_desc_ptrs[irq] = desc; @@ -270,7 +264,7 @@ struct irq_desc *irq_to_desc(unsigned int irq) return (irq < NR_IRQS) ? irq_desc + irq : NULL; } -struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) +struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node) { return irq_to_desc(irq); } diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index de5f412..7346825 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -16,7 +16,7 @@ extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp); extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume); extern struct lock_class_key irq_desc_lock_class; -extern void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr); +extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr); extern void clear_kstat_irqs(struct irq_desc *desc); extern spinlock_t sparse_irq_lock; diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c index ce72bc3..2f69bee 100644 --- a/kernel/irq/numa_migrate.c +++ b/kernel/irq/numa_migrate.c @@ -15,9 +15,9 @@ static void init_copy_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc, - int cpu, int nr) + int node, int nr) { - init_kstat_irqs(desc, cpu, nr); + init_kstat_irqs(desc, node, nr); if (desc->kstat_irqs != old_desc->kstat_irqs) memcpy(desc->kstat_irqs, old_desc->kstat_irqs, @@ -34,20 +34,20 @@ static void free_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc) } static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc, - struct irq_desc *desc, int cpu) + struct irq_desc *desc, int node) { memcpy(desc, old_desc, sizeof(struct irq_desc)); - if (!alloc_desc_masks(desc, cpu, false)) { + if (!alloc_desc_masks(desc, node, false)) { printk(KERN_ERR "irq %d: can not get new irq_desc cpumask " "for migration.\n", irq); return false; } spin_lock_init(&desc->lock); - desc->cpu = cpu; + desc->node = node; lockdep_set_class(&desc->lock, &irq_desc_lock_class); - init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids); + init_copy_kstat_irqs(old_desc, desc, node, nr_cpu_ids); init_copy_desc_masks(old_desc, desc); - arch_init_copy_chip_data(old_desc, desc, cpu); + arch_init_copy_chip_data(old_desc, desc, node); return true; } @@ -59,12 +59,11 @@ static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc) } static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, - int cpu) + int node) { struct irq_desc *desc; unsigned int irq; unsigned long flags; - int node; irq = old_desc->irq; @@ -76,7 +75,6 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, if (desc && old_desc != desc) goto out_unlock; - node = cpu_to_node(cpu); desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); if (!desc) { printk(KERN_ERR "irq %d: can not get new irq_desc " @@ -85,7 +83,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, desc = old_desc; goto out_unlock; } - if (!init_copy_one_irq_desc(irq, old_desc, desc, cpu)) { + if (!init_copy_one_irq_desc(irq, old_desc, desc, node)) { /* still use old one */ kfree(desc); desc = old_desc; @@ -107,24 +105,14 @@ out_unlock: return desc; } -struct irq_desc *move_irq_desc(struct irq_desc *desc, int cpu) +struct irq_desc *move_irq_desc(struct irq_desc *desc, int node) { - int old_cpu; - int node, old_node; - /* those all static, do move them */ if (desc->irq < NR_IRQS_LEGACY) return desc; - old_cpu = desc->cpu; - if (old_cpu != cpu) { - node = cpu_to_node(cpu); - old_node = cpu_to_node(old_cpu); - if (old_node != node) - desc = __real_move_irq_desc(desc, cpu); - else - desc->cpu = cpu; - } + if (desc->node != node) + desc = __real_move_irq_desc(desc, node); return desc; } diff --git a/kernel/softirq.c b/kernel/softirq.c index b525dd3..f674f33 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -828,7 +828,7 @@ int __init __weak arch_early_irq_init(void) return 0; } -int __weak arch_init_chip_data(struct irq_desc *desc, int cpu) +int __weak arch_init_chip_data(struct irq_desc *desc, int node) { return 0; } -- cgit v0.10.2 From a2f809b08ae4dddc1015c7dcd8659e5729e45b3e Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 27 Apr 2009 18:01:20 -0700 Subject: irq: change ACPI GSI APIs to also take a device argument We want to use dev_to_node() later on, to be aware of the 'home node' of the GSI in question. [ Impact: cleanup, prepare the IRQ code to be more NUMA aware ] Signed-off-by: Yinghai Lu Acked-by: Len Brown Cc: Andrew Morton Cc: Suresh Siddha Cc: "Eric W. Biederman" Cc: Rusty Russell Cc: Len Brown Cc: Bjorn Helgaas Cc: Tony Luck Cc: linux-acpi@vger.kernel.org Cc: linux-ia64@vger.kernel.org LKML-Reference: <49F65560.20904@kernel.org> Signed-off-by: Ingo Molnar diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 5510317..baec6f0 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -636,7 +636,7 @@ void __init acpi_numa_arch_fixup(void) * success: return IRQ number (>=0) * failure: return < 0 */ -int acpi_register_gsi(u32 gsi, int triggering, int polarity) +int acpi_register_gsi(struct device *dev, u32 gsi, int triggering, int polarity) { if (acpi_irq_model == ACPI_IRQ_MODEL_PLATFORM) return gsi; @@ -678,7 +678,8 @@ static int __init acpi_parse_fadt(struct acpi_table_header *table) fadt = (struct acpi_table_fadt *)fadt_header; - acpi_register_gsi(fadt->sci_interrupt, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW); + acpi_register_gsi(NULL, fadt->sci_interrupt, ACPI_LEVEL_SENSITIVE, + ACPI_ACTIVE_LOW); return 0; } diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 9d826e4..07f2913 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -154,8 +154,8 @@ extern int timer_through_8259; extern int io_apic_get_unique_id(int ioapic, int apic_id); extern int io_apic_get_version(int ioapic); extern int io_apic_get_redir_entries(int ioapic); -extern int io_apic_set_pci_routing(int ioapic, int pin, int irq, - int edge_level, int active_high_low); +extern int io_apic_set_pci_routing(struct device *dev, int ioapic, int pin, + int irq, int edge_level, int active_high_low); #endif /* CONFIG_ACPI */ extern int (*ioapic_renumber_irq)(int ioapic, int irq); diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 642fc7f..3ea1f53 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -72,7 +72,9 @@ extern void mp_register_ioapic(int id, u32 address, u32 gsi_base); extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi); extern void mp_config_acpi_legacy_irqs(void); -extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low); +struct device; +extern int mp_register_gsi(struct device *dev, u32 gsi, int edge_level, + int active_high_low); extern int acpi_probe_gsi(void); #ifdef CONFIG_X86_IO_APIC extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 723989d..6ee96b5 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -522,7 +522,7 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) * success: return IRQ number (>=0) * failure: return < 0 */ -int acpi_register_gsi(u32 gsi, int triggering, int polarity) +int acpi_register_gsi(struct device *dev, u32 gsi, int triggering, int polarity) { unsigned int irq; unsigned int plat_gsi = gsi; @@ -539,7 +539,7 @@ int acpi_register_gsi(u32 gsi, int triggering, int polarity) #ifdef CONFIG_X86_IO_APIC if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) { - plat_gsi = mp_register_gsi(gsi, triggering, polarity); + plat_gsi = mp_register_gsi(dev, gsi, triggering, polarity); } #endif acpi_gsi_to_irq(plat_gsi, &irq); @@ -1158,7 +1158,7 @@ void __init mp_config_acpi_legacy_irqs(void) } } -int mp_register_gsi(u32 gsi, int triggering, int polarity) +int mp_register_gsi(struct device *dev, u32 gsi, int triggering, int polarity) { int ioapic; int ioapic_pin; @@ -1253,7 +1253,7 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) } } #endif - io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, + io_apic_set_pci_routing(dev, ioapic, ioapic_pin, gsi, triggering == ACPI_EDGE_SENSITIVE ? 0 : 1, polarity == ACPI_ACTIVE_HIGH ? 0 : 1); return gsi; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 560b887..d934662 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3958,7 +3958,8 @@ int __init io_apic_get_version(int ioapic) } #endif -int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) +int io_apic_set_pci_routing(struct device *dev, int ioapic, int pin, int irq, + int triggering, int polarity) { struct irq_desc *desc; struct irq_cfg *cfg; diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c index 51b9f82..2faa9e2 100644 --- a/drivers/acpi/pci_irq.c +++ b/drivers/acpi/pci_irq.c @@ -401,7 +401,8 @@ int acpi_pci_irq_enable(struct pci_dev *dev) /* Interrupt Line values above 0xF are forbidden */ if (dev->irq > 0 && (dev->irq <= 0xF)) { printk(" - using IRQ %d\n", dev->irq); - acpi_register_gsi(dev->irq, ACPI_LEVEL_SENSITIVE, + acpi_register_gsi(&dev->dev, dev->irq, + ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW); return 0; } else { @@ -410,7 +411,7 @@ int acpi_pci_irq_enable(struct pci_dev *dev) } } - rc = acpi_register_gsi(gsi, triggering, polarity); + rc = acpi_register_gsi(&dev->dev, gsi, triggering, polarity); if (rc < 0) { dev_warn(&dev->dev, "PCI INT %c: failed to register GSI\n", pin_name(pin)); diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c index 340ba4f..4a9f349 100644 --- a/drivers/char/hpet.c +++ b/drivers/char/hpet.c @@ -224,7 +224,7 @@ static void hpet_timer_set_irq(struct hpet_dev *devp) break; } - gsi = acpi_register_gsi(irq, ACPI_LEVEL_SENSITIVE, + gsi = acpi_register_gsi(NULL, irq, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW); if (gsi > 0) break; @@ -939,7 +939,7 @@ static acpi_status hpet_resources(struct acpi_resource *res, void *data) irqp = &res->data.extended_irq; for (i = 0; i < irqp->interrupt_count; i++) { - irq = acpi_register_gsi(irqp->interrupts[i], + irq = acpi_register_gsi(NULL, irqp->interrupts[i], irqp->triggering, irqp->polarity); if (irq < 0) return AE_ERROR; diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c index adf1785..7f207f3 100644 --- a/drivers/pnp/pnpacpi/rsparser.c +++ b/drivers/pnp/pnpacpi/rsparser.c @@ -123,7 +123,7 @@ static void pnpacpi_parse_allocated_irqresource(struct pnp_dev *dev, } flags = irq_flags(triggering, polarity, shareable); - irq = acpi_register_gsi(gsi, triggering, polarity); + irq = acpi_register_gsi(&dev->dev, gsi, triggering, polarity); if (irq >= 0) pcibios_penalize_isa_irq(irq, 1); else diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 88be890..51b4b0a 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -119,7 +119,7 @@ extern int pci_mmcfg_config_num; extern int sbf_port; extern unsigned long acpi_realmode_flags; -int acpi_register_gsi (u32 gsi, int triggering, int polarity); +int acpi_register_gsi (struct device *dev, u32 gsi, int triggering, int polarity); int acpi_gsi_to_irq (u32 gsi, unsigned int *irq); #ifdef CONFIG_X86_IO_APIC -- cgit v0.10.2 From 024154cfdd802654cb236a18c78b6e37351e2c49 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 27 Apr 2009 18:01:50 -0700 Subject: irq: change io_apic_set_pci_routing() to use device parameter Make actual use of the device parameter passed down to io_apic_set_pci_routing() - to have the IRQ descriptor on the home node of the device. If no device has been passed down, we assume it's a platform device and use the boot node ID for the IRQ descriptor. [ Impact: optimization, make IO-APIC code more NUMA aware ] Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: Suresh Siddha Cc: "Eric W. Biederman" Cc: Rusty Russell LKML-Reference: <49F6557E.3080101@kernel.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index d934662..82376e0 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3963,7 +3963,7 @@ int io_apic_set_pci_routing(struct device *dev, int ioapic, int pin, int irq, { struct irq_desc *desc; struct irq_cfg *cfg; - int node = cpu_to_node(boot_cpu_id); + int node; if (!IO_APIC_IRQ(irq)) { apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", @@ -3971,6 +3971,11 @@ int io_apic_set_pci_routing(struct device *dev, int ioapic, int pin, int irq, return -EINVAL; } + if (dev) + node = dev_to_node(dev); + else + node = cpu_to_node(boot_cpu_id); + desc = irq_to_desc_alloc_node(irq, node); if (!desc) { printk(KERN_INFO "can not get irq_desc %d\n", irq); -- cgit v0.10.2 From d047f53a2ecce37e3bdf79eac5a326fbaadb3628 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 27 Apr 2009 18:02:23 -0700 Subject: x86/irq: change MSI irq_desc to be more numa aware Try to get irq_desc on the home node in create_irq_nr(). v2: don't check if we can move it when sparse_irq is not used v3: use move_irq_des, if that node is not what we want [ Impact: optimization, make MSI IRQ descriptors more NUMA aware ] Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: Suresh Siddha Cc: "Eric W. Biederman" Cc: Rusty Russell LKML-Reference: <49F6559F.7070005@kernel.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 82376e0..9cd4806 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3172,14 +3172,13 @@ static int nr_irqs_gsi = NR_IRQS_LEGACY; /* * Dynamic irq allocate and deallocation */ -unsigned int create_irq_nr(unsigned int irq_want) +unsigned int create_irq_nr(unsigned int irq_want, int node) { /* Allocate an unused irq */ unsigned int irq; unsigned int new; unsigned long flags; struct irq_cfg *cfg_new = NULL; - int node = cpu_to_node(boot_cpu_id); struct irq_desc *desc_new = NULL; irq = 0; @@ -3197,6 +3196,13 @@ unsigned int create_irq_nr(unsigned int irq_want) if (cfg_new->vector != 0) continue; + +#ifdef CONFIG_NUMA_IRQ_DESC + /* different node ?*/ + if (desc_new->node != node) + desc = move_irq_desc(desc, node); +#endif + if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0) irq = new; break; @@ -3214,11 +3220,12 @@ unsigned int create_irq_nr(unsigned int irq_want) int create_irq(void) { + int node = cpu_to_node(boot_cpu_id); unsigned int irq_want; int irq; irq_want = nr_irqs_gsi; - irq = create_irq_nr(irq_want); + irq = create_irq_nr(irq_want, node); if (irq == 0) irq = -1; @@ -3476,15 +3483,17 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) unsigned int irq_want; struct intel_iommu *iommu = NULL; int index = 0; + int node; /* x86 doesn't support multiple MSI yet */ if (type == PCI_CAP_ID_MSI && nvec > 1) return 1; + node = dev_to_node(&dev->dev); irq_want = nr_irqs_gsi; sub_handle = 0; list_for_each_entry(msidesc, &dev->msi_list, list) { - irq = create_irq_nr(irq_want); + irq = create_irq_nr(irq_want, node); if (irq == 0) return -1; irq_want = irq + 1; diff --git a/include/linux/irq.h b/include/linux/irq.h index a09baf8..4b95ddb 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -376,7 +376,7 @@ extern void set_irq_noprobe(unsigned int irq); extern void set_irq_probe(unsigned int irq); /* Handle dynamic irq creation and destruction */ -extern unsigned int create_irq_nr(unsigned int irq_want); +extern unsigned int create_irq_nr(unsigned int irq_want, int node); extern int create_irq(void); extern void destroy_irq(unsigned int irq); -- cgit v0.10.2 From 56b581ea9591b5767b1e0204c6a06c7d0c49396e Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 27 Apr 2009 18:02:46 -0700 Subject: irq: make ht irq_desc more numa aware Try to get irq_desc on the same node as create_irq_nr(). [ Impact: optimization, make HT IRQs more NUMA-aware ] Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: Suresh Siddha Cc: "Eric W. Biederman" Cc: Rusty Russell LKML-Reference: <49F655B6.8020109@kernel.org> Signed-off-by: Ingo Molnar diff --git a/drivers/pci/htirq.c b/drivers/pci/htirq.c index bf7d6ce..4e9dd0f 100644 --- a/drivers/pci/htirq.c +++ b/drivers/pci/htirq.c @@ -98,6 +98,7 @@ int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update) int max_irq; int pos; int irq; + int node; pos = pci_find_ht_capability(dev, HT_CAPTYPE_IRQ); if (!pos) @@ -125,7 +126,8 @@ int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update) cfg->msg.address_lo = 0xffffffff; cfg->msg.address_hi = 0xffffffff; - irq = create_irq(); + node = dev_to_node(&dev->dev); + irq = create_irq_nr(0, node); if (irq <= 0) { kfree(cfg); -- cgit v0.10.2 From 2a2ed0dfc9ec44a899c7d4672f73f2c045099118 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 28 Apr 2009 13:01:26 +0200 Subject: ALSA: hda - Don't enable auto-mute but for speakers in patch_realtek.c Enable auto-muting in model=auto only for devices with HP and speakers. For devices with HP and line-outs, don't enable the auto-muting. Also, add a debug print to show the auto-mute feature. Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 3a63063..96475dc 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -1099,13 +1099,16 @@ static void alc_init_auto_hp(struct hda_codec *codec) return; if (!spec->autocfg.speaker_pins[0]) { - if (spec->autocfg.line_out_pins[0]) + if (spec->autocfg.line_out_pins[0] && + spec->autocfg.line_out_type == AUTO_PIN_SPEAKER_OUT) spec->autocfg.speaker_pins[0] = spec->autocfg.line_out_pins[0]; else return; } + snd_printdd("realtek: Enable HP auto-muting on NID 0x%x\n", + spec->autocfg.hp_pins[0]); snd_hda_codec_write_cache(codec, spec->autocfg.hp_pins[0], 0, AC_VERB_SET_UNSOLICITED_ENABLE, AC_USRSP_EN | ALC880_HP_EVENT); -- cgit v0.10.2 From cb6605c1e4d2a2eaffdde433fbfe1567ca688458 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 28 Apr 2009 13:03:19 +0200 Subject: ALSA: hda - Fix a typo in patch_realtek.c again The commmit dfed0ef9b3ff9e37903920b6938ed33344ad0b3d was reverted accidentally by the merge of auto-detection fix patch. Fixed again now. Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 96475dc..3e7207b 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -1151,7 +1151,7 @@ static int alc_subsystem_id(struct hda_codec *codec, ass = snd_hda_codec_get_pincfg(codec, nid); snd_printd("realtek: No valid SSID, " "checking pincfg 0x%08x for NID 0x%x\n", - nid, ass); + ass, nid); if (!(ass & 1) && !(ass & 0x100000)) return 0; if ((ass >> 30) != 1) /* no physical connection */ -- cgit v0.10.2 From 51e95bed6263f6a2e558ca04fc4ff877f3b0cca3 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Fri, 24 Apr 2009 16:26:25 -0700 Subject: slub: add Documentation/ABI/testing/sysfs-kernel-slab Adds documentation for the slub ABI. This is placed in the `testing' directory since the meanings of these files are still subject to change as slub is developed. Acked-by: Christoph Lameter Cc: Randy Dunlap Signed-off-by: David Rientjes Signed-off-by: Pekka Enberg diff --git a/Documentation/ABI/testing/sysfs-kernel-slab b/Documentation/ABI/testing/sysfs-kernel-slab new file mode 100644 index 0000000..6dcf75e --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-slab @@ -0,0 +1,479 @@ +What: /sys/kernel/slab +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The /sys/kernel/slab directory contains a snapshot of the + internal state of the SLUB allocator for each cache. Certain + files may be modified to change the behavior of the cache (and + any cache it aliases, if any). +Users: kernel memory tuning tools + +What: /sys/kernel/slab/cache/aliases +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The aliases file is read-only and specifies how many caches + have merged into this cache. + +What: /sys/kernel/slab/cache/align +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The align file is read-only and specifies the cache's object + alignment in bytes. + +What: /sys/kernel/slab/cache/alloc_calls +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The alloc_calls file is read-only and lists the kernel code + locations from which allocations for this cache were performed. + The alloc_calls file only contains information if debugging is + enabled for that cache (see Documentation/vm/slub.txt). + +What: /sys/kernel/slab/cache/alloc_fastpath +Date: February 2008 +KernelVersion: 2.6.25 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The alloc_fastpath file is read-only and specifies how many + objects have been allocated using the fast path. + Available when CONFIG_SLUB_STATS is enabled. + +What: /sys/kernel/slab/cache/alloc_from_partial +Date: February 2008 +KernelVersion: 2.6.25 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The alloc_from_partial file is read-only and specifies how + many times a cpu slab has been full and it has been refilled + by using a slab from the list of partially used slabs. + Available when CONFIG_SLUB_STATS is enabled. + +What: /sys/kernel/slab/cache/alloc_refill +Date: February 2008 +KernelVersion: 2.6.25 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The alloc_refill file is read-only and specifies how many + times the per-cpu freelist was empty but there were objects + available as the result of remote cpu frees. + Available when CONFIG_SLUB_STATS is enabled. + +What: /sys/kernel/slab/cache/alloc_slab +Date: February 2008 +KernelVersion: 2.6.25 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The alloc_slab file is read-only and specifies how many times + a new slab had to be allocated from the page allocator. + Available when CONFIG_SLUB_STATS is enabled. + +What: /sys/kernel/slab/cache/alloc_slowpath +Date: February 2008 +KernelVersion: 2.6.25 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The alloc_slowpath file is read-only and specifies how many + objects have been allocated using the slow path because of a + refill or allocation from a partial or new slab. + Available when CONFIG_SLUB_STATS is enabled. + +What: /sys/kernel/slab/cache/cache_dma +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The cache_dma file is read-only and specifies whether objects + are from ZONE_DMA. + Available when CONFIG_ZONE_DMA is enabled. + +What: /sys/kernel/slab/cache/cpu_slabs +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The cpu_slabs file is read-only and displays how many cpu slabs + are active and their NUMA locality. + +What: /sys/kernel/slab/cache/cpuslab_flush +Date: April 2009 +KernelVersion: 2.6.31 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The file cpuslab_flush is read-only and specifies how many + times a cache's cpu slabs have been flushed as the result of + destroying or shrinking a cache, a cpu going offline, or as + the result of forcing an allocation from a certain node. + Available when CONFIG_SLUB_STATS is enabled. + +What: /sys/kernel/slab/cache/ctor +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The ctor file is read-only and specifies the cache's object + constructor function, which is invoked for each object when a + new slab is allocated. + +What: /sys/kernel/slab/cache/deactivate_empty +Date: February 2008 +KernelVersion: 2.6.25 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The file deactivate_empty is read-only and specifies how many + times an empty cpu slab was deactivated. + Available when CONFIG_SLUB_STATS is enabled. + +What: /sys/kernel/slab/cache/deactivate_full +Date: February 2008 +KernelVersion: 2.6.25 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The file deactivate_full is read-only and specifies how many + times a full cpu slab was deactivated. + Available when CONFIG_SLUB_STATS is enabled. + +What: /sys/kernel/slab/cache/deactivate_remote_frees +Date: February 2008 +KernelVersion: 2.6.25 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The file deactivate_remote_frees is read-only and specifies how + many times a cpu slab has been deactivated and contained free + objects that were freed remotely. + Available when CONFIG_SLUB_STATS is enabled. + +What: /sys/kernel/slab/cache/deactivate_to_head +Date: February 2008 +KernelVersion: 2.6.25 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The file deactivate_to_head is read-only and specifies how + many times a partial cpu slab was deactivated and added to the + head of its node's partial list. + Available when CONFIG_SLUB_STATS is enabled. + +What: /sys/kernel/slab/cache/deactivate_to_tail +Date: February 2008 +KernelVersion: 2.6.25 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The file deactivate_to_tail is read-only and specifies how + many times a partial cpu slab was deactivated and added to the + tail of its node's partial list. + Available when CONFIG_SLUB_STATS is enabled. + +What: /sys/kernel/slab/cache/destroy_by_rcu +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The destroy_by_rcu file is read-only and specifies whether + slabs (not objects) are freed by rcu. + +What: /sys/kernel/slab/cache/free_add_partial +Date: February 2008 +KernelVersion: 2.6.25 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The file free_add_partial is read-only and specifies how many + times an object has been freed in a full slab so that it had to + added to its node's partial list. + Available when CONFIG_SLUB_STATS is enabled. + +What: /sys/kernel/slab/cache/free_calls +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The free_calls file is read-only and lists the locations of + object frees if slab debugging is enabled (see + Documentation/vm/slub.txt). + +What: /sys/kernel/slab/cache/free_fastpath +Date: February 2008 +KernelVersion: 2.6.25 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The free_fastpath file is read-only and specifies how many + objects have been freed using the fast path because it was an + object from the cpu slab. + Available when CONFIG_SLUB_STATS is enabled. + +What: /sys/kernel/slab/cache/free_frozen +Date: February 2008 +KernelVersion: 2.6.25 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The free_frozen file is read-only and specifies how many + objects have been freed to a frozen slab (i.e. a remote cpu + slab). + Available when CONFIG_SLUB_STATS is enabled. + +What: /sys/kernel/slab/cache/free_remove_partial +Date: February 2008 +KernelVersion: 2.6.25 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The file free_remove_partial is read-only and specifies how + many times an object has been freed to a now-empty slab so + that it had to be removed from its node's partial list. + Available when CONFIG_SLUB_STATS is enabled. + +What: /sys/kernel/slab/cache/free_slab +Date: February 2008 +KernelVersion: 2.6.25 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The free_slab file is read-only and specifies how many times an + empty slab has been freed back to the page allocator. + Available when CONFIG_SLUB_STATS is enabled. + +What: /sys/kernel/slab/cache/free_slowpath +Date: February 2008 +KernelVersion: 2.6.25 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The free_slowpath file is read-only and specifies how many + objects have been freed using the slow path (i.e. to a full or + partial slab). + Available when CONFIG_SLUB_STATS is enabled. + +What: /sys/kernel/slab/cache/hwcache_align +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The hwcache_align file is read-only and specifies whether + objects are aligned on cachelines. + +What: /sys/kernel/slab/cache/min_partial +Date: February 2009 +KernelVersion: 2.6.30 +Contact: Pekka Enberg , + David Rientjes +Description: + The min_partial file specifies how many empty slabs shall + remain on a node's partial list to avoid the overhead of + allocating new slabs. Such slabs may be reclaimed by utilizing + the shrink file. + +What: /sys/kernel/slab/cache/object_size +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The object_size file is read-only and specifies the cache's + object size. + +What: /sys/kernel/slab/cache/objects +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The objects file is read-only and displays how many objects are + active and from which nodes they are from. + +What: /sys/kernel/slab/cache/objects_partial +Date: April 2008 +KernelVersion: 2.6.26 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The objects_partial file is read-only and displays how many + objects are on partial slabs and from which nodes they are + from. + +What: /sys/kernel/slab/cache/objs_per_slab +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The file objs_per_slab is read-only and specifies how many + objects may be allocated from a single slab of the order + specified in /sys/kernel/slab/cache/order. + +What: /sys/kernel/slab/cache/order +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The order file specifies the page order at which new slabs are + allocated. It is writable and can be changed to increase the + number of objects per slab. If a slab cannot be allocated + because of fragmentation, SLUB will retry with the minimum order + possible depending on its characteristics. + +What: /sys/kernel/slab/cache/order_fallback +Date: April 2008 +KernelVersion: 2.6.26 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The file order_fallback is read-only and specifies how many + times an allocation of a new slab has not been possible at the + cache's order and instead fallen back to its minimum possible + order. + Available when CONFIG_SLUB_STATS is enabled. + +What: /sys/kernel/slab/cache/partial +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The partial file is read-only and displays how long many + partial slabs there are and how long each node's list is. + +What: /sys/kernel/slab/cache/poison +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The poison file specifies whether objects should be poisoned + when a new slab is allocated. + +What: /sys/kernel/slab/cache/reclaim_account +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The reclaim_account file specifies whether the cache's objects + are reclaimable (and grouped by their mobility). + +What: /sys/kernel/slab/cache/red_zone +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The red_zone file specifies whether the cache's objects are red + zoned. + +What: /sys/kernel/slab/cache/remote_node_defrag_ratio +Date: January 2008 +KernelVersion: 2.6.25 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The file remote_node_defrag_ratio specifies the percentage of + times SLUB will attempt to refill the cpu slab with a partial + slab from a remote node as opposed to allocating a new slab on + the local node. This reduces the amount of wasted memory over + the entire system but can be expensive. + Available when CONFIG_NUMA is enabled. + +What: /sys/kernel/slab/cache/sanity_checks +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The sanity_checks file specifies whether expensive checks + should be performed on free and, at minimum, enables double free + checks. Caches that enable sanity_checks cannot be merged with + caches that do not. + +What: /sys/kernel/slab/cache/shrink +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The shrink file is written when memory should be reclaimed from + a cache. Empty partial slabs are freed and the partial list is + sorted so the slabs with the fewest available objects are used + first. + +What: /sys/kernel/slab/cache/slab_size +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The slab_size file is read-only and specifies the object size + with metadata (debugging information and alignment) in bytes. + +What: /sys/kernel/slab/cache/slabs +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The slabs file is read-only and displays how long many slabs + there are (both cpu and partial) and from which nodes they are + from. + +What: /sys/kernel/slab/cache/store_user +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The store_user file specifies whether the location of + allocation or free should be tracked for a cache. + +What: /sys/kernel/slab/cache/total_objects +Date: April 2008 +KernelVersion: 2.6.26 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The total_objects file is read-only and displays how many total + objects a cache has and from which nodes they are from. + +What: /sys/kernel/slab/cache/trace +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg , + Christoph Lameter +Description: + The trace file specifies whether object allocations and frees + should be traced. + +What: /sys/kernel/slab/cache/validate +Date: May 2007 +KernelVersion: 2.6.22 +Contact: Pekka Enberg , + Christoph Lameter +Description: + Writing to the validate file causes SLUB to traverse all of its + cache's objects and check the validity of metadata. -- cgit v0.10.2 From 4278600644dee621bd50d7498e244b135612e0f6 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Tue, 28 Apr 2009 23:12:10 +0900 Subject: sh: register the rtc-generic platform device properly. The device registration was accidentally omitted, add it back in. Do some basic device probing as well, so this doesn't show up for platforms that tie in to the RTC interface properly. Signed-off-by: Paul Mundt diff --git a/arch/sh/kernel/time_32.c b/arch/sh/kernel/time_32.c index a2458bf..4573321 100644 --- a/arch/sh/kernel/time_32.c +++ b/arch/sh/kernel/time_32.c @@ -3,7 +3,7 @@ * * Copyright (C) 1999 Tetsuya Okada & Niibe Yutaka * Copyright (C) 2000 Philipp Rumpf - * Copyright (C) 2002 - 2008 Paul Mundt + * Copyright (C) 2002 - 2009 Paul Mundt * Copyright (C) 2002 M. R. Brown * * Some code taken from i386 version. @@ -68,6 +68,21 @@ int set_rtc_time(struct rtc_time *tm) } EXPORT_SYMBOL(set_rtc_time); +static int __init rtc_generic_init(void) +{ + struct platform_device *pdev; + + if (rtc_sh_get_time == null_rtc_get_time) + return -ENODEV; + + pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0); + if (IS_ERR(pdev)) + return PTR_ERR(pdev); + + return 0; +} +module_init(rtc_generic_init); + #ifndef CONFIG_GENERIC_TIME void do_gettimeofday(struct timeval *tv) { -- cgit v0.10.2 From 1130b0296184bc21806225fd06d533515a99d2db Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 28 Apr 2009 14:56:18 +0200 Subject: perf_counter tools: fix Documentation/perf_counter build error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mike Galbraith reported: > marge:..Documentation/perf_counter # make > CC builtin-stat.o > In file included from builtin-stat.c:71: > /usr/include/ctype.h:102: error: expected expression before ‘]’ token Remove the ctype.h include. Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/builtin-stat.c b/Documentation/perf_counter/builtin-stat.c index d7ace63..112b94e 100644 --- a/Documentation/perf_counter/builtin-stat.c +++ b/Documentation/perf_counter/builtin-stat.c @@ -68,7 +68,6 @@ #include #include #include -#include #include #include #include diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c index dea016f..6a276d2 100644 --- a/Documentation/perf_counter/builtin-top.c +++ b/Documentation/perf_counter/builtin-top.c @@ -49,7 +49,6 @@ #include #include #include -#include #include #include #include -- cgit v0.10.2 From 6b2db28a7a2da1064df9e179d9b6d07b0bfe156a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 14 Apr 2009 10:54:49 +0900 Subject: fuse: misc cleanups * fuse_file_alloc() was structured in weird way. The success path was split between else block and code following the block. Restructure the code such that it's easier to read and modify. * Unindent success path of fuse_release_common() to ease future changes. Signed-off-by: Tejun Heo Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 06f30e9..028e17d 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -49,22 +49,26 @@ static int fuse_send_open(struct inode *inode, struct file *file, int isdir, struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) { struct fuse_file *ff; + ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL); - if (ff) { - ff->reserved_req = fuse_request_alloc(); - if (!ff->reserved_req) { - kfree(ff); - return NULL; - } else { - INIT_LIST_HEAD(&ff->write_entry); - atomic_set(&ff->count, 0); - spin_lock(&fc->lock); - ff->kh = ++fc->khctr; - spin_unlock(&fc->lock); - } - RB_CLEAR_NODE(&ff->polled_node); - init_waitqueue_head(&ff->poll_wait); + if (unlikely(!ff)) + return NULL; + + ff->reserved_req = fuse_request_alloc(); + if (unlikely(!ff->reserved_req)) { + kfree(ff); + return NULL; } + + INIT_LIST_HEAD(&ff->write_entry); + atomic_set(&ff->count, 0); + RB_CLEAR_NODE(&ff->polled_node); + init_waitqueue_head(&ff->poll_wait); + + spin_lock(&fc->lock); + ff->kh = ++fc->khctr; + spin_unlock(&fc->lock); + return ff; } @@ -158,34 +162,37 @@ void fuse_release_fill(struct fuse_file *ff, u64 nodeid, int flags, int opcode) int fuse_release_common(struct inode *inode, struct file *file, int isdir) { - struct fuse_file *ff = file->private_data; - if (ff) { - struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_req *req = ff->reserved_req; + struct fuse_conn *fc; + struct fuse_file *ff; + struct fuse_req *req; - fuse_release_fill(ff, get_node_id(inode), file->f_flags, - isdir ? FUSE_RELEASEDIR : FUSE_RELEASE); + ff = file->private_data; + if (unlikely(!ff)) + return 0; /* return value is ignored by VFS */ - /* Hold vfsmount and dentry until release is finished */ - req->misc.release.vfsmount = mntget(file->f_path.mnt); - req->misc.release.dentry = dget(file->f_path.dentry); + fc = get_fuse_conn(inode); + req = ff->reserved_req; - spin_lock(&fc->lock); - list_del(&ff->write_entry); - if (!RB_EMPTY_NODE(&ff->polled_node)) - rb_erase(&ff->polled_node, &fc->polled_files); - spin_unlock(&fc->lock); + fuse_release_fill(ff, get_node_id(inode), file->f_flags, + isdir ? FUSE_RELEASEDIR : FUSE_RELEASE); - wake_up_interruptible_sync(&ff->poll_wait); - /* - * Normally this will send the RELEASE request, - * however if some asynchronous READ or WRITE requests - * are outstanding, the sending will be delayed - */ - fuse_file_put(ff); - } + /* Hold vfsmount and dentry until release is finished */ + req->misc.release.vfsmount = mntget(file->f_path.mnt); + req->misc.release.dentry = dget(file->f_path.dentry); + + spin_lock(&fc->lock); + list_del(&ff->write_entry); + if (!RB_EMPTY_NODE(&ff->polled_node)) + rb_erase(&ff->polled_node, &fc->polled_files); + spin_unlock(&fc->lock); - /* Return value is ignored by VFS */ + wake_up_interruptible_sync(&ff->poll_wait); + /* + * Normally this will send the RELEASE request, however if + * some asynchronous READ or WRITE requests are outstanding, + * the sending will be delayed. + */ + fuse_file_put(ff); return 0; } -- cgit v0.10.2 From b0be46ebf72ca7478c1c4bd0153c42f90e768a03 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 28 Apr 2009 16:56:36 +0200 Subject: fuse: use struct path in release structure Use struct path instead of separate dentry and vfsmount in req->misc.release. Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 028e17d..3c8fa93 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -86,15 +86,14 @@ static struct fuse_file *fuse_file_get(struct fuse_file *ff) static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req) { - dput(req->misc.release.dentry); - mntput(req->misc.release.vfsmount); + path_put(&req->misc.release.path); } static void fuse_file_put(struct fuse_file *ff) { if (atomic_dec_and_test(&ff->count)) { struct fuse_req *req = ff->reserved_req; - struct inode *inode = req->misc.release.dentry->d_inode; + struct inode *inode = req->misc.release.path.dentry->d_inode; struct fuse_conn *fc = get_fuse_conn(inode); req->end = fuse_release_end; fuse_request_send_background(fc, req); @@ -177,8 +176,8 @@ int fuse_release_common(struct inode *inode, struct file *file, int isdir) isdir ? FUSE_RELEASEDIR : FUSE_RELEASE); /* Hold vfsmount and dentry until release is finished */ - req->misc.release.vfsmount = mntget(file->f_path.mnt); - req->misc.release.dentry = dget(file->f_path.dentry); + path_get(&file->f_path); + req->misc.release.path = file->f_path; spin_lock(&fc->lock); list_del(&ff->write_entry); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 6fc5aed..146317f 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -248,8 +248,7 @@ struct fuse_req { struct fuse_forget_in forget_in; struct { struct fuse_release_in in; - struct vfsmount *vfsmount; - struct dentry *dentry; + struct path path; } release; struct fuse_init_in init_in; struct fuse_init_out init_out; -- cgit v0.10.2 From 2d698b070382442f817813b9dd0103034e5bca81 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 28 Apr 2009 16:56:36 +0200 Subject: fuse: clean up fuse_write_fill() Move out code from fuse_write_fill() which is not common to all callers. Remove two function arguments which become unnecessary. Also remove unnecessary memset(), the request is already initialized to zero. Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 3c8fa93..823f84a 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -613,20 +613,16 @@ static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov, return generic_file_aio_read(iocb, iov, nr_segs, pos); } -static void fuse_write_fill(struct fuse_req *req, struct file *file, - struct fuse_file *ff, struct inode *inode, - loff_t pos, size_t count, int writepage) +static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff, + struct inode *inode, loff_t pos, size_t count) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_write_in *inarg = &req->misc.write.in; struct fuse_write_out *outarg = &req->misc.write.out; - memset(inarg, 0, sizeof(struct fuse_write_in)); inarg->fh = ff->fh; inarg->offset = pos; inarg->size = count; - inarg->write_flags = writepage ? FUSE_WRITE_CACHE : 0; - inarg->flags = file ? file->f_flags : 0; req->in.h.opcode = FUSE_WRITE; req->in.h.nodeid = get_node_id(inode); req->in.numargs = 2; @@ -646,9 +642,11 @@ static size_t fuse_send_write(struct fuse_req *req, struct file *file, fl_owner_t owner) { struct fuse_conn *fc = get_fuse_conn(inode); - fuse_write_fill(req, file, file->private_data, inode, pos, count, 0); + struct fuse_write_in *inarg = &req->misc.write.in; + + fuse_write_fill(req, file->private_data, inode, pos, count); + inarg->flags = file->f_flags; if (owner != NULL) { - struct fuse_write_in *inarg = &req->misc.write.in; inarg->write_flags |= FUSE_WRITE_LOCKOWNER; inarg->lock_owner = fuse_lock_owner_id(fc, owner); } @@ -1183,9 +1181,10 @@ static int fuse_writepage_locked(struct page *page) req->ff = fuse_file_get(ff); spin_unlock(&fc->lock); - fuse_write_fill(req, NULL, ff, inode, page_offset(page), 0, 1); + fuse_write_fill(req, ff, inode, page_offset(page), 0); copy_highpage(tmp_page, page); + req->misc.write.in.write_flags |= FUSE_WRITE_CACHE; req->in.argpages = 1; req->num_pages = 1; req->pages[0] = tmp_page; -- cgit v0.10.2 From d09cb9d7f6e4cb1dd0cf12ee0d352440291c74cf Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 28 Apr 2009 16:56:36 +0200 Subject: fuse: prepare fuse_direct_io() for CUSE Move code operating on the inode out from fuse_direct_io(). This prepares this function for use by CUSE, where the inode is not owned by a fuse filesystem. Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 823f84a..b83d7d8 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -993,9 +993,6 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf, ssize_t res = 0; struct fuse_req *req; - if (is_bad_inode(inode)) - return -EIO; - req = fuse_get_req(fc); if (IS_ERR(req)) return PTR_ERR(req); @@ -1038,12 +1035,8 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf, } } fuse_put_request(fc, req); - if (res > 0) { - if (write) - fuse_write_update_size(inode, pos); + if (res > 0) *ppos = pos; - } - fuse_invalidate_attr(inode); return res; } @@ -1051,7 +1044,17 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf, static ssize_t fuse_direct_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - return fuse_direct_io(file, buf, count, ppos, 0); + ssize_t res; + struct inode *inode = file->f_path.dentry->d_inode; + + if (is_bad_inode(inode)) + return -EIO; + + res = fuse_direct_io(file, buf, count, ppos, 0); + + fuse_invalidate_attr(inode); + + return res; } static ssize_t fuse_direct_write(struct file *file, const char __user *buf, @@ -1059,12 +1062,22 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf, { struct inode *inode = file->f_path.dentry->d_inode; ssize_t res; + + if (is_bad_inode(inode)) + return -EIO; + /* Don't allow parallel writes to the same file */ mutex_lock(&inode->i_mutex); res = generic_write_checks(file, ppos, &count, 0); - if (!res) + if (!res) { res = fuse_direct_io(file, buf, count, ppos, 1); + if (res > 0) + fuse_write_update_size(inode, *ppos); + } mutex_unlock(&inode->i_mutex); + + fuse_invalidate_attr(inode); + return res; } -- cgit v0.10.2 From da5e4714578ff323f7a61af490fc3539e68f188b Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 28 Apr 2009 16:56:36 +0200 Subject: fuse: add members to struct fuse_file Add new members ->fc and ->nodeid to struct fuse_file. This will aid in converting functions for use by CUSE, where the inode is not owned by a fuse filesystem. Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/file.c b/fs/fuse/file.c index b83d7d8..3be0301 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -54,6 +54,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc) if (unlikely(!ff)) return NULL; + ff->fc = fc; ff->reserved_req = fuse_request_alloc(); if (unlikely(!ff->reserved_req)) { kfree(ff); @@ -111,6 +112,7 @@ void fuse_finish_open(struct inode *inode, struct file *file, if (outarg->open_flags & FOPEN_NONSEEKABLE) nonseekable_open(inode, file); ff->fh = outarg->fh; + ff->nodeid = get_node_id(inode); file->private_data = fuse_file_get(ff); } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 146317f..4469d99 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -97,8 +97,13 @@ struct fuse_inode { struct list_head writepages; }; +struct fuse_conn; + /** FUSE specific file data */ struct fuse_file { + /** Fuse connection for this file */ + struct fuse_conn *fc; + /** Request reserved for flush and release */ struct fuse_req *reserved_req; @@ -108,6 +113,9 @@ struct fuse_file { /** File handle used by userspace */ u64 fh; + /** Node id of this file */ + u64 nodeid; + /** Refcount */ atomic_t count; @@ -185,8 +193,6 @@ enum fuse_req_state { FUSE_REQ_FINISHED }; -struct fuse_conn; - /** * A request to the client */ -- cgit v0.10.2 From 2106cb18930312af9325d3418e138569c5b903cc Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 28 Apr 2009 16:56:37 +0200 Subject: fuse: don't use inode in helpers called by fuse_direct_io() Use ff->fc and ff->nodeid instead of passing down the inode. This prepares this function for use by CUSE, where the inode is not owned by a fuse filesystem. Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 8b8eebc..222584b 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1035,7 +1035,7 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) req->out.argpages = 1; req->num_pages = 1; req->pages[0] = page; - fuse_read_fill(req, file, inode, file->f_pos, PAGE_SIZE, FUSE_READDIR); + fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, FUSE_READDIR); fuse_request_send(fc, req); nbytes = req->out.args[0].size; err = req->out.h.error; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 3be0301..aa4a387 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -379,8 +379,8 @@ static int fuse_fsync(struct file *file, struct dentry *de, int datasync) return fuse_fsync_common(file, de, datasync, 0); } -void fuse_read_fill(struct fuse_req *req, struct file *file, - struct inode *inode, loff_t pos, size_t count, int opcode) +void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos, + size_t count, int opcode) { struct fuse_read_in *inarg = &req->misc.read.in; struct fuse_file *ff = file->private_data; @@ -390,7 +390,7 @@ void fuse_read_fill(struct fuse_req *req, struct file *file, inarg->size = count; inarg->flags = file->f_flags; req->in.h.opcode = opcode; - req->in.h.nodeid = get_node_id(inode); + req->in.h.nodeid = ff->nodeid; req->in.numargs = 1; req->in.args[0].size = sizeof(struct fuse_read_in); req->in.args[0].value = inarg; @@ -400,12 +400,12 @@ void fuse_read_fill(struct fuse_req *req, struct file *file, } static size_t fuse_send_read(struct fuse_req *req, struct file *file, - struct inode *inode, loff_t pos, size_t count, - fl_owner_t owner) + loff_t pos, size_t count, fl_owner_t owner) { - struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_file *ff = file->private_data; + struct fuse_conn *fc = ff->fc; - fuse_read_fill(req, file, inode, pos, count, FUSE_READ); + fuse_read_fill(req, file, pos, count, FUSE_READ); if (owner != NULL) { struct fuse_read_in *inarg = &req->misc.read.in; @@ -463,7 +463,7 @@ static int fuse_readpage(struct file *file, struct page *page) req->out.argpages = 1; req->num_pages = 1; req->pages[0] = page; - num_read = fuse_send_read(req, file, inode, pos, count, NULL); + num_read = fuse_send_read(req, file, pos, count, NULL); err = req->out.h.error; fuse_put_request(fc, req); @@ -512,19 +512,18 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req) fuse_file_put(req->ff); } -static void fuse_send_readpages(struct fuse_req *req, struct file *file, - struct inode *inode) +static void fuse_send_readpages(struct fuse_req *req, struct file *file) { - struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_file *ff = file->private_data; + struct fuse_conn *fc = ff->fc; loff_t pos = page_offset(req->pages[0]); size_t count = req->num_pages << PAGE_CACHE_SHIFT; req->out.argpages = 1; req->out.page_zeroing = 1; - fuse_read_fill(req, file, inode, pos, count, FUSE_READ); + fuse_read_fill(req, file, pos, count, FUSE_READ); req->misc.read.attr_ver = fuse_get_attr_version(fc); if (fc->async_read) { - struct fuse_file *ff = file->private_data; req->ff = fuse_file_get(ff); req->end = fuse_readpages_end; fuse_request_send_background(fc, req); @@ -554,7 +553,7 @@ static int fuse_readpages_fill(void *_data, struct page *page) (req->num_pages == FUSE_MAX_PAGES_PER_REQ || (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read || req->pages[req->num_pages - 1]->index + 1 != page->index)) { - fuse_send_readpages(req, data->file, inode); + fuse_send_readpages(req, data->file); data->req = req = fuse_get_req(fc); if (IS_ERR(req)) { unlock_page(page); @@ -588,7 +587,7 @@ static int fuse_readpages(struct file *file, struct address_space *mapping, err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data); if (!err) { if (data.req->num_pages) - fuse_send_readpages(data.req, file, inode); + fuse_send_readpages(data.req, file); else fuse_put_request(fc, data.req); } @@ -616,9 +615,8 @@ static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov, } static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff, - struct inode *inode, loff_t pos, size_t count) + loff_t pos, size_t count) { - struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_write_in *inarg = &req->misc.write.in; struct fuse_write_out *outarg = &req->misc.write.out; @@ -626,9 +624,9 @@ static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff, inarg->offset = pos; inarg->size = count; req->in.h.opcode = FUSE_WRITE; - req->in.h.nodeid = get_node_id(inode); + req->in.h.nodeid = ff->nodeid; req->in.numargs = 2; - if (fc->minor < 9) + if (ff->fc->minor < 9) req->in.args[0].size = FUSE_COMPAT_WRITE_IN_SIZE; else req->in.args[0].size = sizeof(struct fuse_write_in); @@ -640,13 +638,13 @@ static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff, } static size_t fuse_send_write(struct fuse_req *req, struct file *file, - struct inode *inode, loff_t pos, size_t count, - fl_owner_t owner) + loff_t pos, size_t count, fl_owner_t owner) { - struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_file *ff = file->private_data; + struct fuse_conn *fc = ff->fc; struct fuse_write_in *inarg = &req->misc.write.in; - fuse_write_fill(req, file->private_data, inode, pos, count); + fuse_write_fill(req, ff, pos, count); inarg->flags = file->f_flags; if (owner != NULL) { inarg->write_flags |= FUSE_WRITE_LOCKOWNER; @@ -706,7 +704,7 @@ static int fuse_buffered_write(struct file *file, struct inode *inode, req->num_pages = 1; req->pages[0] = page; req->page_offset = offset; - nres = fuse_send_write(req, file, inode, pos, count, NULL); + nres = fuse_send_write(req, file, pos, count, NULL); err = req->out.h.error; fuse_put_request(fc, req); if (!err && !nres) @@ -747,7 +745,7 @@ static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file, for (i = 0; i < req->num_pages; i++) fuse_wait_on_page_writeback(inode, req->pages[i]->index); - res = fuse_send_write(req, file, inode, pos, count, NULL); + res = fuse_send_write(req, file, pos, count, NULL); offset = req->page_offset; count = res; @@ -988,8 +986,8 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, static ssize_t fuse_direct_io(struct file *file, const char __user *buf, size_t count, loff_t *ppos, int write) { - struct inode *inode = file->f_path.dentry->d_inode; - struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_file *ff = file->private_data; + struct fuse_conn *fc = ff->fc; size_t nmax = write ? fc->max_write : fc->max_read; loff_t pos = *ppos; ssize_t res = 0; @@ -1001,6 +999,7 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf, while (count) { size_t nres; + fl_owner_t owner = current->files; size_t nbytes = min(count, nmax); int err = fuse_get_user_pages(req, buf, &nbytes, write); if (err) { @@ -1009,11 +1008,10 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf, } if (write) - nres = fuse_send_write(req, file, inode, pos, nbytes, - current->files); + nres = fuse_send_write(req, file, pos, nbytes, owner); else - nres = fuse_send_read(req, file, inode, pos, nbytes, - current->files); + nres = fuse_send_read(req, file, pos, nbytes, owner); + fuse_release_user_pages(req, !write); if (req->out.h.error) { if (!res) @@ -1196,7 +1194,7 @@ static int fuse_writepage_locked(struct page *page) req->ff = fuse_file_get(ff); spin_unlock(&fc->lock); - fuse_write_fill(req, ff, inode, page_offset(page), 0); + fuse_write_fill(req, ff, page_offset(page), 0); copy_highpage(tmp_page, page); req->misc.write.in.write_flags |= FUSE_WRITE_CACHE; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 4469d99..a51f63c 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -520,7 +520,7 @@ void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req, * Initialize READ or READDIR request */ void fuse_read_fill(struct fuse_req *req, struct file *file, - struct inode *inode, loff_t pos, size_t count, int opcode); + loff_t pos, size_t count, int opcode); /** * Send OPEN or OPENDIR request -- cgit v0.10.2 From c7b7143c6342b8751d47b03a025ac5c0ac1ae809 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 28 Apr 2009 16:56:37 +0200 Subject: fuse: clean up args in fuse_finish_open() and fuse_release_fill() Move setting ff->fh, ff->nodeid and file->private_data outside fuse_finish_open(). Add ->open_flags member to struct fuse_file. This simplifies the argument passing to fuse_finish_open() and fuse_release_fill(), and paves the way for creating an open helper that doesn't need an inode pointer. Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 222584b..da87a3d 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -365,9 +365,9 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, * Synchronous release for the case when something goes wrong in CREATE_OPEN */ static void fuse_sync_release(struct fuse_conn *fc, struct fuse_file *ff, - u64 nodeid, int flags) + int flags) { - fuse_release_fill(ff, nodeid, flags, FUSE_RELEASE); + fuse_release_fill(ff, flags, FUSE_RELEASE); ff->reserved_req->force = 1; fuse_request_send(fc, ff->reserved_req); fuse_put_request(fc, ff->reserved_req); @@ -445,12 +445,14 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, goto out_free_ff; fuse_put_request(fc, req); + ff->fh = outopen.fh; + ff->nodeid = outentry.nodeid; + ff->open_flags = outopen.open_flags; inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation, &outentry.attr, entry_attr_timeout(&outentry), 0); if (!inode) { flags &= ~(O_CREAT | O_EXCL | O_TRUNC); - ff->fh = outopen.fh; - fuse_sync_release(fc, ff, outentry.nodeid, flags); + fuse_sync_release(fc, ff, flags); fuse_send_forget(fc, forget_req, outentry.nodeid, 1); return -ENOMEM; } @@ -460,11 +462,11 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, fuse_invalidate_attr(dir); file = lookup_instantiate_filp(nd, entry, generic_file_open); if (IS_ERR(file)) { - ff->fh = outopen.fh; - fuse_sync_release(fc, ff, outentry.nodeid, flags); + fuse_sync_release(fc, ff, flags); return PTR_ERR(file); } - fuse_finish_open(inode, file, ff, &outopen); + file->private_data = fuse_file_get(ff); + fuse_finish_open(inode, file); return 0; out_free_ff: diff --git a/fs/fuse/file.c b/fs/fuse/file.c index aa4a387..d9458ca 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -79,7 +79,7 @@ void fuse_file_free(struct fuse_file *ff) kfree(ff); } -static struct fuse_file *fuse_file_get(struct fuse_file *ff) +struct fuse_file *fuse_file_get(struct fuse_file *ff) { atomic_inc(&ff->count); return ff; @@ -102,18 +102,16 @@ static void fuse_file_put(struct fuse_file *ff) } } -void fuse_finish_open(struct inode *inode, struct file *file, - struct fuse_file *ff, struct fuse_open_out *outarg) +void fuse_finish_open(struct inode *inode, struct file *file) { - if (outarg->open_flags & FOPEN_DIRECT_IO) + struct fuse_file *ff = file->private_data; + + if (ff->open_flags & FOPEN_DIRECT_IO) file->f_op = &fuse_direct_io_file_operations; - if (!(outarg->open_flags & FOPEN_KEEP_CACHE)) + if (!(ff->open_flags & FOPEN_KEEP_CACHE)) invalidate_inode_pages2(inode->i_mapping); - if (outarg->open_flags & FOPEN_NONSEEKABLE) + if (ff->open_flags & FOPEN_NONSEEKABLE) nonseekable_open(inode, file); - ff->fh = outarg->fh; - ff->nodeid = get_node_id(inode); - file->private_data = fuse_file_get(ff); } int fuse_open_common(struct inode *inode, struct file *file, int isdir) @@ -141,13 +139,17 @@ int fuse_open_common(struct inode *inode, struct file *file, int isdir) else { if (isdir) outarg.open_flags &= ~FOPEN_DIRECT_IO; - fuse_finish_open(inode, file, ff, &outarg); + ff->fh = outarg.fh; + ff->nodeid = get_node_id(inode); + ff->open_flags = outarg.open_flags; + file->private_data = fuse_file_get(ff); + fuse_finish_open(inode, file); } return err; } -void fuse_release_fill(struct fuse_file *ff, u64 nodeid, int flags, int opcode) +void fuse_release_fill(struct fuse_file *ff, int flags, int opcode) { struct fuse_req *req = ff->reserved_req; struct fuse_release_in *inarg = &req->misc.release.in; @@ -155,7 +157,7 @@ void fuse_release_fill(struct fuse_file *ff, u64 nodeid, int flags, int opcode) inarg->fh = ff->fh; inarg->flags = flags; req->in.h.opcode = opcode; - req->in.h.nodeid = nodeid; + req->in.h.nodeid = ff->nodeid; req->in.numargs = 1; req->in.args[0].size = sizeof(struct fuse_release_in); req->in.args[0].value = inarg; @@ -174,7 +176,7 @@ int fuse_release_common(struct inode *inode, struct file *file, int isdir) fc = get_fuse_conn(inode); req = ff->reserved_req; - fuse_release_fill(ff, get_node_id(inode), file->f_flags, + fuse_release_fill(ff, file->f_flags, isdir ? FUSE_RELEASEDIR : FUSE_RELEASE); /* Hold vfsmount and dentry until release is finished */ diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index a51f63c..ce46c12 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -119,6 +119,9 @@ struct fuse_file { /** Refcount */ atomic_t count; + /** FOPEN_* flags returned by open */ + u32 open_flags; + /** Entry on inode's write_files list */ struct list_head write_entry; @@ -528,12 +531,12 @@ void fuse_read_fill(struct fuse_req *req, struct file *file, int fuse_open_common(struct inode *inode, struct file *file, int isdir); struct fuse_file *fuse_file_alloc(struct fuse_conn *fc); +struct fuse_file *fuse_file_get(struct fuse_file *ff); void fuse_file_free(struct fuse_file *ff); -void fuse_finish_open(struct inode *inode, struct file *file, - struct fuse_file *ff, struct fuse_open_out *outarg); +void fuse_finish_open(struct inode *inode, struct file *file); /** Fill in ff->reserved_req with a RELEASE request */ -void fuse_release_fill(struct fuse_file *ff, u64 nodeid, int flags, int opcode); +void fuse_release_fill(struct fuse_file *ff, int flags, int opcode); /** * Send RELEASE or RELEASEDIR request -- cgit v0.10.2 From 91fe96b403f8a0a4a8a045a39b1bd549b0da7941 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 28 Apr 2009 16:56:37 +0200 Subject: fuse: create fuse_do_open() helper for CUSE Create a helper for sending an OPEN request that doesn't need a struct inode pointer. Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index da87a3d..faa3b2f 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1103,7 +1103,7 @@ static void fuse_put_link(struct dentry *dentry, struct nameidata *nd, void *c) static int fuse_dir_open(struct inode *inode, struct file *file) { - return fuse_open_common(inode, file, 1); + return fuse_open_common(inode, file, true); } static int fuse_dir_release(struct inode *inode, struct file *file) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index d9458ca..6bdaa55 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -15,10 +15,9 @@ static const struct file_operations fuse_direct_io_file_operations; -static int fuse_send_open(struct inode *inode, struct file *file, int isdir, - struct fuse_open_out *outargp) +static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file, + int opcode, struct fuse_open_out *outargp) { - struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_open_in inarg; struct fuse_req *req; int err; @@ -31,8 +30,8 @@ static int fuse_send_open(struct inode *inode, struct file *file, int isdir, inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY); if (!fc->atomic_o_trunc) inarg.flags &= ~O_TRUNC; - req->in.h.opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN; - req->in.h.nodeid = get_node_id(inode); + req->in.h.opcode = opcode; + req->in.h.nodeid = nodeid; req->in.numargs = 1; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; @@ -102,6 +101,35 @@ static void fuse_file_put(struct fuse_file *ff) } } +static int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, + bool isdir) +{ + struct fuse_open_out outarg; + struct fuse_file *ff; + int err; + int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN; + + ff = fuse_file_alloc(fc); + if (!ff) + return -ENOMEM; + + err = fuse_send_open(fc, nodeid, file, opcode, &outarg); + if (err) { + fuse_file_free(ff); + return err; + } + + if (isdir) + outarg.open_flags &= ~FOPEN_DIRECT_IO; + + ff->fh = outarg.fh; + ff->nodeid = nodeid; + ff->open_flags = outarg.open_flags; + file->private_data = fuse_file_get(ff); + + return 0; +} + void fuse_finish_open(struct inode *inode, struct file *file) { struct fuse_file *ff = file->private_data; @@ -114,11 +142,9 @@ void fuse_finish_open(struct inode *inode, struct file *file) nonseekable_open(inode, file); } -int fuse_open_common(struct inode *inode, struct file *file, int isdir) +int fuse_open_common(struct inode *inode, struct file *file, bool isdir) { struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_open_out outarg; - struct fuse_file *ff; int err; /* VFS checks this, but only _after_ ->open() */ @@ -129,24 +155,13 @@ int fuse_open_common(struct inode *inode, struct file *file, int isdir) if (err) return err; - ff = fuse_file_alloc(fc); - if (!ff) - return -ENOMEM; - - err = fuse_send_open(inode, file, isdir, &outarg); + err = fuse_do_open(fc, get_node_id(inode), file, isdir); if (err) - fuse_file_free(ff); - else { - if (isdir) - outarg.open_flags &= ~FOPEN_DIRECT_IO; - ff->fh = outarg.fh; - ff->nodeid = get_node_id(inode); - ff->open_flags = outarg.open_flags; - file->private_data = fuse_file_get(ff); - fuse_finish_open(inode, file); - } + return err; - return err; + fuse_finish_open(inode, file); + + return 0; } void fuse_release_fill(struct fuse_file *ff, int flags, int opcode) @@ -201,7 +216,7 @@ int fuse_release_common(struct inode *inode, struct file *file, int isdir) static int fuse_open(struct inode *inode, struct file *file) { - return fuse_open_common(inode, file, 0); + return fuse_open_common(inode, file, false); } static int fuse_release(struct inode *inode, struct file *file) diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index ce46c12..429e669 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -528,7 +528,7 @@ void fuse_read_fill(struct fuse_req *req, struct file *file, /** * Send OPEN or OPENDIR request */ -int fuse_open_common(struct inode *inode, struct file *file, int isdir); +int fuse_open_common(struct inode *inode, struct file *file, bool isdir); struct fuse_file *fuse_file_alloc(struct fuse_conn *fc); struct fuse_file *fuse_file_get(struct fuse_file *ff); -- cgit v0.10.2 From 8b0797a4984de4406de25808e1a424344de543e4 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 28 Apr 2009 16:56:39 +0200 Subject: fuse: don't use inode in fuse_sync_release() Make fuse_sync_release() a generic helper function that doesn't need a struct inode pointer. This makes it suitable for use by CUSE. Change return value of fuse_release_common() from int to void. Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index faa3b2f..b3089a0 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -362,19 +362,6 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, } /* - * Synchronous release for the case when something goes wrong in CREATE_OPEN - */ -static void fuse_sync_release(struct fuse_conn *fc, struct fuse_file *ff, - int flags) -{ - fuse_release_fill(ff, flags, FUSE_RELEASE); - ff->reserved_req->force = 1; - fuse_request_send(fc, ff->reserved_req); - fuse_put_request(fc, ff->reserved_req); - kfree(ff); -} - -/* * Atomic create+open operation * * If the filesystem doesn't support this, then fall back to separate @@ -452,7 +439,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, &outentry.attr, entry_attr_timeout(&outentry), 0); if (!inode) { flags &= ~(O_CREAT | O_EXCL | O_TRUNC); - fuse_sync_release(fc, ff, flags); + fuse_sync_release(ff, flags); fuse_send_forget(fc, forget_req, outentry.nodeid, 1); return -ENOMEM; } @@ -462,7 +449,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, fuse_invalidate_attr(dir); file = lookup_instantiate_filp(nd, entry, generic_file_open); if (IS_ERR(file)) { - fuse_sync_release(fc, ff, flags); + fuse_sync_release(ff, flags); return PTR_ERR(file); } file->private_data = fuse_file_get(ff); @@ -1108,7 +1095,9 @@ static int fuse_dir_open(struct inode *inode, struct file *file) static int fuse_dir_release(struct inode *inode, struct file *file) { - return fuse_release_common(inode, file, 1); + fuse_release_common(file, FUSE_RELEASEDIR); + + return 0; } static int fuse_dir_fsync(struct file *file, struct dentry *de, int datasync) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 6bdaa55..ab627b4 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -93,10 +93,9 @@ static void fuse_file_put(struct fuse_file *ff) { if (atomic_dec_and_test(&ff->count)) { struct fuse_req *req = ff->reserved_req; - struct inode *inode = req->misc.release.path.dentry->d_inode; - struct fuse_conn *fc = get_fuse_conn(inode); + req->end = fuse_release_end; - fuse_request_send_background(fc, req); + fuse_request_send_background(ff->fc, req); kfree(ff); } } @@ -164,11 +163,20 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir) return 0; } -void fuse_release_fill(struct fuse_file *ff, int flags, int opcode) +static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode) { + struct fuse_conn *fc = ff->fc; struct fuse_req *req = ff->reserved_req; struct fuse_release_in *inarg = &req->misc.release.in; + spin_lock(&fc->lock); + list_del(&ff->write_entry); + if (!RB_EMPTY_NODE(&ff->polled_node)) + rb_erase(&ff->polled_node, &fc->polled_files); + spin_unlock(&fc->lock); + + wake_up_interruptible_sync(&ff->poll_wait); + inarg->fh = ff->fh; inarg->flags = flags; req->in.h.opcode = opcode; @@ -178,40 +186,28 @@ void fuse_release_fill(struct fuse_file *ff, int flags, int opcode) req->in.args[0].value = inarg; } -int fuse_release_common(struct inode *inode, struct file *file, int isdir) +void fuse_release_common(struct file *file, int opcode) { - struct fuse_conn *fc; struct fuse_file *ff; struct fuse_req *req; ff = file->private_data; if (unlikely(!ff)) - return 0; /* return value is ignored by VFS */ + return; - fc = get_fuse_conn(inode); req = ff->reserved_req; - - fuse_release_fill(ff, file->f_flags, - isdir ? FUSE_RELEASEDIR : FUSE_RELEASE); + fuse_prepare_release(ff, file->f_flags, opcode); /* Hold vfsmount and dentry until release is finished */ path_get(&file->f_path); req->misc.release.path = file->f_path; - spin_lock(&fc->lock); - list_del(&ff->write_entry); - if (!RB_EMPTY_NODE(&ff->polled_node)) - rb_erase(&ff->polled_node, &fc->polled_files); - spin_unlock(&fc->lock); - - wake_up_interruptible_sync(&ff->poll_wait); /* * Normally this will send the RELEASE request, however if * some asynchronous READ or WRITE requests are outstanding, * the sending will be delayed. */ fuse_file_put(ff); - return 0; } static int fuse_open(struct inode *inode, struct file *file) @@ -221,7 +217,20 @@ static int fuse_open(struct inode *inode, struct file *file) static int fuse_release(struct inode *inode, struct file *file) { - return fuse_release_common(inode, file, 0); + fuse_release_common(file, FUSE_RELEASE); + + /* return value is ignored by VFS */ + return 0; +} + +void fuse_sync_release(struct fuse_file *ff, int flags) +{ + WARN_ON(atomic_read(&ff->count) > 1); + fuse_prepare_release(ff, flags, FUSE_RELEASE); + ff->reserved_req->force = 1; + fuse_request_send(ff->fc, ff->reserved_req); + fuse_put_request(ff->fc, ff->reserved_req); + kfree(ff); } /* diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 429e669..ef2e1f3 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -535,13 +535,12 @@ struct fuse_file *fuse_file_get(struct fuse_file *ff); void fuse_file_free(struct fuse_file *ff); void fuse_finish_open(struct inode *inode, struct file *file); -/** Fill in ff->reserved_req with a RELEASE request */ -void fuse_release_fill(struct fuse_file *ff, int flags, int opcode); +void fuse_sync_release(struct fuse_file *ff, int flags); /** * Send RELEASE or RELEASEDIR request */ -int fuse_release_common(struct inode *inode, struct file *file, int isdir); +void fuse_release_common(struct file *file, int opcode); /** * Send FSYNC or FSYNCDIR request -- cgit v0.10.2 From d36f248710c05714f37d921258b630bd1456b99f Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 28 Apr 2009 16:56:39 +0200 Subject: fuse: don't use inode in fuse_do_ioctl() helper Create a helper for sending an IOCTL request that doesn't use a struct inode. This prepares this function for use by CUSE, where the inode is not owned by a fuse filesystem. Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/file.c b/fs/fuse/file.c index ab627b4..9f0ade0 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1647,12 +1647,11 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov, * limits ioctl data transfers to well-formed ioctls and is the forced * behavior for all FUSE servers. */ -static long fuse_file_do_ioctl(struct file *file, unsigned int cmd, - unsigned long arg, unsigned int flags) +static long fuse_do_ioctl(struct file *file, unsigned int cmd, + unsigned long arg, unsigned int flags) { - struct inode *inode = file->f_dentry->d_inode; struct fuse_file *ff = file->private_data; - struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_conn *fc = ff->fc; struct fuse_ioctl_in inarg = { .fh = ff->fh, .cmd = cmd, @@ -1671,13 +1670,6 @@ static long fuse_file_do_ioctl(struct file *file, unsigned int cmd, /* assume all the iovs returned by client always fits in a page */ BUILD_BUG_ON(sizeof(struct iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE); - if (!fuse_allow_task(fc, current)) - return -EACCES; - - err = -EIO; - if (is_bad_inode(inode)) - goto out; - err = -ENOMEM; pages = kzalloc(sizeof(pages[0]) * FUSE_MAX_PAGES_PER_REQ, GFP_KERNEL); iov_page = alloc_page(GFP_KERNEL); @@ -1738,7 +1730,7 @@ static long fuse_file_do_ioctl(struct file *file, unsigned int cmd, /* okay, let's send it to the client */ req->in.h.opcode = FUSE_IOCTL; - req->in.h.nodeid = get_node_id(inode); + req->in.h.nodeid = ff->nodeid; req->in.numargs = 1; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; @@ -1822,16 +1814,31 @@ static long fuse_file_do_ioctl(struct file *file, unsigned int cmd, return err ? err : outarg.result; } +static long fuse_file_ioctl_common(struct file *file, unsigned int cmd, + unsigned long arg, unsigned int flags) +{ + struct inode *inode = file->f_dentry->d_inode; + struct fuse_conn *fc = get_fuse_conn(inode); + + if (!fuse_allow_task(fc, current)) + return -EACCES; + + if (is_bad_inode(inode)) + return -EIO; + + return fuse_do_ioctl(file, cmd, arg, flags); +} + static long fuse_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - return fuse_file_do_ioctl(file, cmd, arg, 0); + return fuse_file_ioctl_common(file, cmd, arg, 0); } static long fuse_file_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - return fuse_file_do_ioctl(file, cmd, arg, FUSE_IOCTL_COMPAT); + return fuse_file_ioctl_common(file, cmd, arg, FUSE_IOCTL_COMPAT); } /* -- cgit v0.10.2 From 797759aaf31351a1ab1b9130c4f180ce496f46c5 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 28 Apr 2009 16:56:41 +0200 Subject: fuse: don't use inode in fuse_file_poll Use ff->fc and ff->nodeid instead of file->f_dentry->d_inode in the fuse_file_poll() implementation. This prepares this function for use by CUSE, where the inode is not owned by a fuse filesystem. Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 9f0ade0..c5de60e 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1894,9 +1894,8 @@ static void fuse_register_polled_file(struct fuse_conn *fc, static unsigned fuse_file_poll(struct file *file, poll_table *wait) { - struct inode *inode = file->f_dentry->d_inode; struct fuse_file *ff = file->private_data; - struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_conn *fc = ff->fc; struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh }; struct fuse_poll_out outarg; struct fuse_req *req; @@ -1921,7 +1920,7 @@ static unsigned fuse_file_poll(struct file *file, poll_table *wait) return PTR_ERR(req); req->in.h.opcode = FUSE_POLL; - req->in.h.nodeid = get_node_id(inode); + req->in.h.nodeid = ff->nodeid; req->in.numargs = 1; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; -- cgit v0.10.2 From a325f9b92273d6c64ec56167905b951b9827ec33 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 14 Apr 2009 10:54:52 +0900 Subject: fuse: update fuse_conn_init() and separate out fuse_conn_kill() Update fuse_conn_init() such that it doesn't take @sb and move bdi registration into a separate function. Also separate out fuse_conn_kill() from fuse_put_super(). These will be used to implement cuse. Signed-off-by: Tejun Heo Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index ba76b68..368189f 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -282,7 +282,7 @@ __releases(&fc->lock) wake_up_all(&fc->blocked_waitq); } if (fc->num_background == FUSE_CONGESTION_THRESHOLD && - fc->connected) { + fc->connected && fc->bdi_initialized) { clear_bdi_congested(&fc->bdi, READ); clear_bdi_congested(&fc->bdi, WRITE); } @@ -408,7 +408,8 @@ static void fuse_request_send_nowait_locked(struct fuse_conn *fc, fc->num_background++; if (fc->num_background == FUSE_MAX_BACKGROUND) fc->blocked = 1; - if (fc->num_background == FUSE_CONGESTION_THRESHOLD) { + if (fc->num_background == FUSE_CONGESTION_THRESHOLD && + fc->bdi_initialized) { set_bdi_congested(&fc->bdi, READ); set_bdi_congested(&fc->bdi, WRITE); } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index ef2e1f3..2efcf12 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -394,6 +394,9 @@ struct fuse_conn { /** Filesystem supports NFS exporting. Only set in INIT */ unsigned export_support:1; + /** Set if bdi is valid */ + unsigned bdi_initialized:1; + /* * The following bitfields are only for optimization purposes * and hence races in setting them will not cause malfunction @@ -662,7 +665,7 @@ struct fuse_conn *fuse_conn_get(struct fuse_conn *fc); /** * Initialize fuse_conn */ -int fuse_conn_init(struct fuse_conn *fc, struct super_block *sb); +void fuse_conn_init(struct fuse_conn *fc); /** * Release reference to fuse_conn diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 75ca5ac..fea7c10 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -274,11 +274,14 @@ static void fuse_send_destroy(struct fuse_conn *fc) } } -static void fuse_put_super(struct super_block *sb) +static void fuse_bdi_destroy(struct fuse_conn *fc) { - struct fuse_conn *fc = get_fuse_conn_super(sb); + if (fc->bdi_initialized) + bdi_destroy(&fc->bdi); +} - fuse_send_destroy(fc); +static void fuse_conn_kill(struct fuse_conn *fc) +{ spin_lock(&fc->lock); fc->connected = 0; fc->blocked = 0; @@ -292,7 +295,15 @@ static void fuse_put_super(struct super_block *sb) list_del(&fc->entry); fuse_ctl_remove_conn(fc); mutex_unlock(&fuse_mutex); - bdi_destroy(&fc->bdi); + fuse_bdi_destroy(fc); +} + +static void fuse_put_super(struct super_block *sb) +{ + struct fuse_conn *fc = get_fuse_conn_super(sb); + + fuse_send_destroy(fc); + fuse_conn_kill(fc); fuse_conn_put(fc); } @@ -463,10 +474,8 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt) return 0; } -int fuse_conn_init(struct fuse_conn *fc, struct super_block *sb) +void fuse_conn_init(struct fuse_conn *fc) { - int err; - memset(fc, 0, sizeof(*fc)); spin_lock_init(&fc->lock); mutex_init(&fc->inst_mutex); @@ -481,49 +490,12 @@ int fuse_conn_init(struct fuse_conn *fc, struct super_block *sb) INIT_LIST_HEAD(&fc->bg_queue); INIT_LIST_HEAD(&fc->entry); atomic_set(&fc->num_waiting, 0); - fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; - fc->bdi.unplug_io_fn = default_unplug_io_fn; - /* fuse does it's own writeback accounting */ - fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB; fc->khctr = 0; fc->polled_files = RB_ROOT; - fc->dev = sb->s_dev; - err = bdi_init(&fc->bdi); - if (err) - goto error_mutex_destroy; - if (sb->s_bdev) { - err = bdi_register(&fc->bdi, NULL, "%u:%u-fuseblk", - MAJOR(fc->dev), MINOR(fc->dev)); - } else { - err = bdi_register_dev(&fc->bdi, fc->dev); - } - if (err) - goto error_bdi_destroy; - /* - * For a single fuse filesystem use max 1% of dirty + - * writeback threshold. - * - * This gives about 1M of write buffer for memory maps on a - * machine with 1G and 10% dirty_ratio, which should be more - * than enough. - * - * Privileged users can raise it by writing to - * - * /sys/class/bdi//max_ratio - */ - bdi_set_max_ratio(&fc->bdi, 1); fc->reqctr = 0; fc->blocked = 1; fc->attr_version = 1; get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); - - return 0; - - error_bdi_destroy: - bdi_destroy(&fc->bdi); - error_mutex_destroy: - mutex_destroy(&fc->inst_mutex); - return err; } EXPORT_SYMBOL_GPL(fuse_conn_init); @@ -794,6 +766,48 @@ static void fuse_free_conn(struct fuse_conn *fc) kfree(fc); } +static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) +{ + int err; + + fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; + fc->bdi.unplug_io_fn = default_unplug_io_fn; + /* fuse does it's own writeback accounting */ + fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB; + + err = bdi_init(&fc->bdi); + if (err) + return err; + + fc->bdi_initialized = 1; + + if (sb->s_bdev) { + err = bdi_register(&fc->bdi, NULL, "%u:%u-fuseblk", + MAJOR(fc->dev), MINOR(fc->dev)); + } else { + err = bdi_register_dev(&fc->bdi, fc->dev); + } + + if (err) + return err; + + /* + * For a single fuse filesystem use max 1% of dirty + + * writeback threshold. + * + * This gives about 1M of write buffer for memory maps on a + * machine with 1G and 10% dirty_ratio, which should be more + * than enough. + * + * Privileged users can raise it by writing to + * + * /sys/class/bdi//max_ratio + */ + bdi_set_max_ratio(&fc->bdi, 1); + + return 0; +} + static int fuse_fill_super(struct super_block *sb, void *data, int silent) { struct fuse_conn *fc; @@ -840,11 +854,12 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) if (!fc) goto err_fput; - err = fuse_conn_init(fc, sb); - if (err) { - kfree(fc); - goto err_fput; - } + fuse_conn_init(fc); + + fc->dev = sb->s_dev; + err = fuse_bdi_init(fc, sb); + if (err) + goto err_put_conn; fc->release = fuse_free_conn; fc->flags = d.flags; @@ -908,7 +923,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) err_put_root: dput(root_dentry); err_put_conn: - bdi_destroy(&fc->bdi); + fuse_bdi_destroy(fc); fuse_conn_put(fc); err_fput: fput(file); -- cgit v0.10.2 From 08cbf542bf24fb0481a54526b177347ae4046f5e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 14 Apr 2009 10:54:53 +0900 Subject: fuse: export symbols to be used by CUSE Export the following symbols for CUSE. fuse_conn_put() fuse_conn_get() fuse_conn_kill() fuse_send_init() fuse_do_open() fuse_sync_release() fuse_direct_io() fuse_do_ioctl() fuse_file_poll() fuse_request_alloc() fuse_get_req() fuse_put_request() fuse_request_send() fuse_abort_conn() fuse_dev_release() fuse_dev_operations Signed-off-by: Tejun Heo Signed-off-by: Miklos Szeredi diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 368189f..8fed2ed 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -46,6 +46,7 @@ struct fuse_req *fuse_request_alloc(void) fuse_request_init(req); return req; } +EXPORT_SYMBOL_GPL(fuse_request_alloc); struct fuse_req *fuse_request_alloc_nofs(void) { @@ -124,6 +125,7 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc) atomic_dec(&fc->num_waiting); return ERR_PTR(err); } +EXPORT_SYMBOL_GPL(fuse_get_req); /* * Return request in fuse_file->reserved_req. However that may @@ -208,6 +210,7 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) fuse_request_free(req); } } +EXPORT_SYMBOL_GPL(fuse_put_request); static unsigned len_args(unsigned numargs, struct fuse_arg *args) { @@ -400,6 +403,7 @@ void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) } spin_unlock(&fc->lock); } +EXPORT_SYMBOL_GPL(fuse_request_send); static void fuse_request_send_nowait_locked(struct fuse_conn *fc, struct fuse_req *req) @@ -440,6 +444,7 @@ void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req) req->isreply = 1; fuse_request_send_nowait(fc, req); } +EXPORT_SYMBOL_GPL(fuse_request_send_background); /* * Called under fc->lock @@ -1106,8 +1111,9 @@ void fuse_abort_conn(struct fuse_conn *fc) } spin_unlock(&fc->lock); } +EXPORT_SYMBOL_GPL(fuse_abort_conn); -static int fuse_dev_release(struct inode *inode, struct file *file) +int fuse_dev_release(struct inode *inode, struct file *file) { struct fuse_conn *fc = fuse_get_conn(file); if (fc) { @@ -1121,6 +1127,7 @@ static int fuse_dev_release(struct inode *inode, struct file *file) return 0; } +EXPORT_SYMBOL_GPL(fuse_dev_release); static int fuse_dev_fasync(int fd, struct file *file, int on) { @@ -1143,6 +1150,7 @@ const struct file_operations fuse_dev_operations = { .release = fuse_dev_release, .fasync = fuse_dev_fasync, }; +EXPORT_SYMBOL_GPL(fuse_dev_operations); static struct miscdevice fuse_miscdevice = { .minor = FUSE_MINOR, diff --git a/fs/fuse/file.c b/fs/fuse/file.c index c5de60e..fce6ce6 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -12,6 +12,7 @@ #include #include #include +#include static const struct file_operations fuse_direct_io_file_operations; @@ -100,8 +101,8 @@ static void fuse_file_put(struct fuse_file *ff) } } -static int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, - bool isdir) +int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, + bool isdir) { struct fuse_open_out outarg; struct fuse_file *ff; @@ -128,6 +129,7 @@ static int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, return 0; } +EXPORT_SYMBOL_GPL(fuse_do_open); void fuse_finish_open(struct inode *inode, struct file *file) { @@ -232,6 +234,7 @@ void fuse_sync_release(struct fuse_file *ff, int flags) fuse_put_request(ff->fc, ff->reserved_req); kfree(ff); } +EXPORT_SYMBOL_GPL(fuse_sync_release); /* * Scramble the ID space with XTEA, so that the value of the files_struct @@ -1009,8 +1012,8 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, return 0; } -static ssize_t fuse_direct_io(struct file *file, const char __user *buf, - size_t count, loff_t *ppos, int write) +ssize_t fuse_direct_io(struct file *file, const char __user *buf, + size_t count, loff_t *ppos, int write) { struct fuse_file *ff = file->private_data; struct fuse_conn *fc = ff->fc; @@ -1066,6 +1069,7 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf, return res; } +EXPORT_SYMBOL_GPL(fuse_direct_io); static ssize_t fuse_direct_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) @@ -1647,8 +1651,8 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov, * limits ioctl data transfers to well-formed ioctls and is the forced * behavior for all FUSE servers. */ -static long fuse_do_ioctl(struct file *file, unsigned int cmd, - unsigned long arg, unsigned int flags) +long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, + unsigned int flags) { struct fuse_file *ff = file->private_data; struct fuse_conn *fc = ff->fc; @@ -1813,6 +1817,7 @@ static long fuse_do_ioctl(struct file *file, unsigned int cmd, return err ? err : outarg.result; } +EXPORT_SYMBOL_GPL(fuse_do_ioctl); static long fuse_file_ioctl_common(struct file *file, unsigned int cmd, unsigned long arg, unsigned int flags) @@ -1892,7 +1897,7 @@ static void fuse_register_polled_file(struct fuse_conn *fc, spin_unlock(&fc->lock); } -static unsigned fuse_file_poll(struct file *file, poll_table *wait) +unsigned fuse_file_poll(struct file *file, poll_table *wait) { struct fuse_file *ff = file->private_data; struct fuse_conn *fc = ff->fc; @@ -1939,6 +1944,7 @@ static unsigned fuse_file_poll(struct file *file, poll_table *wait) } return POLLERR; } +EXPORT_SYMBOL_GPL(fuse_file_poll); /* * This is called from fuse_handle_notify() on FUSE_NOTIFY_POLL and diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 2efcf12..aaf2f9f 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -261,6 +261,8 @@ struct fuse_req { } release; struct fuse_init_in init_in; struct fuse_init_out init_out; + struct cuse_init_in cuse_init_in; + struct cuse_init_out cuse_init_out; struct { struct fuse_read_in in; u64 attr_ver; @@ -662,6 +664,8 @@ void fuse_invalidate_entry_cache(struct dentry *entry); */ struct fuse_conn *fuse_conn_get(struct fuse_conn *fc); +void fuse_conn_kill(struct fuse_conn *fc); + /** * Initialize fuse_conn */ @@ -704,4 +708,13 @@ void fuse_release_nowrite(struct inode *inode); u64 fuse_get_attr_version(struct fuse_conn *fc); +int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, + bool isdir); +ssize_t fuse_direct_io(struct file *file, const char __user *buf, + size_t count, loff_t *ppos, int write); +long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, + unsigned int flags); +unsigned fuse_file_poll(struct file *file, poll_table *wait); +int fuse_dev_release(struct inode *inode, struct file *file); + #endif /* _FS_FUSE_I_H */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index fea7c10..d8673cc 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -280,7 +280,7 @@ static void fuse_bdi_destroy(struct fuse_conn *fc) bdi_destroy(&fc->bdi); } -static void fuse_conn_kill(struct fuse_conn *fc) +void fuse_conn_kill(struct fuse_conn *fc) { spin_lock(&fc->lock); fc->connected = 0; @@ -297,6 +297,7 @@ static void fuse_conn_kill(struct fuse_conn *fc) mutex_unlock(&fuse_mutex); fuse_bdi_destroy(fc); } +EXPORT_SYMBOL_GPL(fuse_conn_kill); static void fuse_put_super(struct super_block *sb) { @@ -508,12 +509,14 @@ void fuse_conn_put(struct fuse_conn *fc) fc->release(fc); } } +EXPORT_SYMBOL_GPL(fuse_conn_put); struct fuse_conn *fuse_conn_get(struct fuse_conn *fc) { atomic_inc(&fc->count); return fc; } +EXPORT_SYMBOL_GPL(fuse_conn_get); static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode) { -- cgit v0.10.2 From cd891ae0305601bdb4d2e7e85282961c4ff256cd Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 28 Apr 2009 11:39:34 -0400 Subject: tracing: convert ftrace_dump spinlocks to raw ftrace_dump is used for printing out the contents of the ftrace ring buffer to the console on failure. Currently it uses a spinlock to synchronize the output from multiple failures on different CPUs. This spin lock currently is a normal spinlock and can cause issues with lockdep and lock tracing. This patch converts it to raw since it is for error handling only. The lock is local to the ftrace_dump and is not used by any other infrastructure. [ Impact: prevent ftrace_dump from locking up by internal tracing ] Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b6183bc..5d704a4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4114,7 +4114,8 @@ trace_printk_seq(struct trace_seq *s) static void __ftrace_dump(bool disable_tracing) { - static DEFINE_SPINLOCK(ftrace_dump_lock); + static raw_spinlock_t ftrace_dump_lock = + (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; /* use static because iter can be a bit big for the stack */ static struct trace_iterator iter; unsigned int old_userobj; @@ -4123,7 +4124,8 @@ static void __ftrace_dump(bool disable_tracing) int cnt = 0, cpu; /* only one dump */ - spin_lock_irqsave(&ftrace_dump_lock, flags); + local_irq_save(flags); + __raw_spin_lock(&ftrace_dump_lock); if (dump_ran) goto out; @@ -4195,7 +4197,8 @@ static void __ftrace_dump(bool disable_tracing) } out: - spin_unlock_irqrestore(&ftrace_dump_lock, flags); + __raw_spin_unlock(&ftrace_dump_lock); + local_irq_restore(flags); } /* By default: disable tracing after the dump */ -- cgit v0.10.2 From edc953fa4ebc0265ef3b1754fe116a9fd4264e15 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 28 Apr 2009 11:13:46 -0400 Subject: x86: clean up alternative.h Alternative header duplicates assembly that could be merged in one single macro. Merging this into this macro also allows to directly declare ALTERNATIVE() statements within assembly code. Uses a __stringify() of the feature bits rather than passing a "i" operand. Leave the old %0 operand as-is (set to 0), unused to stay compatible with API. (v2: tab alignment fixes) [ Impact: cleanup ] Signed-off-by: Mathieu Desnoyers LKML-Reference: <20090428151346.GA31212@Krystal> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index f6aa18e..1a37bcd 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -3,6 +3,7 @@ #include #include +#include #include /* @@ -74,6 +75,22 @@ static inline void alternatives_smp_switch(int smp) {} const unsigned char *const *find_nop_table(void); +/* alternative assembly primitive: */ +#define ALTERNATIVE(oldinstr, newinstr, feature) \ + \ + "661:\n\t" oldinstr "\n662:\n" \ + ".section .altinstructions,\"a\"\n" \ + _ASM_ALIGN "\n" \ + _ASM_PTR "661b\n" /* label */ \ + _ASM_PTR "663f\n" /* new instruction */ \ + " .byte " __stringify(feature) "\n" /* feature bit */ \ + " .byte 662b-661b\n" /* sourcelen */ \ + " .byte 664f-663f\n" /* replacementlen */ \ + ".previous\n" \ + ".section .altinstr_replacement, \"ax\"\n" \ + "663:\n\t" newinstr "\n664:\n" /* replacement */ \ + ".previous" + /* * Alternative instructions for different CPU types or capabilities. * @@ -87,18 +104,7 @@ const unsigned char *const *find_nop_table(void); * without volatile and memory clobber. */ #define alternative(oldinstr, newinstr, feature) \ - asm volatile ("661:\n\t" oldinstr "\n662:\n" \ - ".section .altinstructions,\"a\"\n" \ - _ASM_ALIGN "\n" \ - _ASM_PTR "661b\n" /* label */ \ - _ASM_PTR "663f\n" /* new instruction */ \ - " .byte %c0\n" /* feature bit */ \ - " .byte 662b-661b\n" /* sourcelen */ \ - " .byte 664f-663f\n" /* replacementlen */ \ - ".previous\n" \ - ".section .altinstr_replacement,\"ax\"\n" \ - "663:\n\t" newinstr "\n664:\n" /* replacement */ \ - ".previous" :: "i" (feature) : "memory") + asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory") /* * Alternative inline assembly with input. @@ -109,35 +115,16 @@ const unsigned char *const *find_nop_table(void); * Best is to use constraints that are fixed size (like (%1) ... "r") * If you use variable sized constraints like "m" or "g" in the * replacement make sure to pad to the worst case length. + * Leaving an unused argument 0 to keep API compatibility. */ #define alternative_input(oldinstr, newinstr, feature, input...) \ - asm volatile ("661:\n\t" oldinstr "\n662:\n" \ - ".section .altinstructions,\"a\"\n" \ - _ASM_ALIGN "\n" \ - _ASM_PTR "661b\n" /* label */ \ - _ASM_PTR "663f\n" /* new instruction */ \ - " .byte %c0\n" /* feature bit */ \ - " .byte 662b-661b\n" /* sourcelen */ \ - " .byte 664f-663f\n" /* replacementlen */ \ - ".previous\n" \ - ".section .altinstr_replacement,\"ax\"\n" \ - "663:\n\t" newinstr "\n664:\n" /* replacement */ \ - ".previous" :: "i" (feature), ##input) + asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ + : : "i" (0), ## input) /* Like alternative_input, but with a single output argument */ #define alternative_io(oldinstr, newinstr, feature, output, input...) \ - asm volatile ("661:\n\t" oldinstr "\n662:\n" \ - ".section .altinstructions,\"a\"\n" \ - _ASM_ALIGN "\n" \ - _ASM_PTR "661b\n" /* label */ \ - _ASM_PTR "663f\n" /* new instruction */ \ - " .byte %c[feat]\n" /* feature bit */ \ - " .byte 662b-661b\n" /* sourcelen */ \ - " .byte 664f-663f\n" /* replacementlen */ \ - ".previous\n" \ - ".section .altinstr_replacement,\"ax\"\n" \ - "663:\n\t" newinstr "\n664:\n" /* replacement */ \ - ".previous" : output : [feat] "i" (feature), ##input) + asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ + : output : "i" (0), ## input) /* * use this macro(s) if you need more than one output parameter -- cgit v0.10.2 From fa59440d0c7b5a2bcdc9e35f25fdac693e54c86a Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 23 Apr 2009 10:19:58 +0100 Subject: [ARM] 5470/1: U300 register definitions This adds registers, interrupt numbers and IO mappings for the U300 series platforms core support, including basic block offsets and registers definitions for the system controller. Signed-off-by: Linus Walleij Signed-off-by: Russell King diff --git a/arch/arm/mach-u300/include/mach/hardware.h b/arch/arm/mach-u300/include/mach/hardware.h new file mode 100644 index 0000000..b99d4ce --- /dev/null +++ b/arch/arm/mach-u300/include/mach/hardware.h @@ -0,0 +1,5 @@ +/* + * arch/arm/mach-u300/include/mach/hardware.h + */ +#include +#include diff --git a/arch/arm/mach-u300/include/mach/io.h b/arch/arm/mach-u300/include/mach/io.h new file mode 100644 index 0000000..5d6b4c1 --- /dev/null +++ b/arch/arm/mach-u300/include/mach/io.h @@ -0,0 +1,20 @@ +/* + * + * arch/arm/mach-u300/include/mach/io.h + * + * + * Copyright (C) 2006-2009 ST-Ericsson AB + * License terms: GNU General Public License (GPL) version 2 + * Dummy IO map for being able to use writew()/readw(), + * writel()/readw() and similar accessor functions. + * Author: Linus Walleij + */ +#ifndef __MACH_IO_H +#define __MACH_IO_H + +#define IO_SPACE_LIMIT 0xffffffff + +#define __io(a) __typesafe_io(a) +#define __mem_pci(a) (a) + +#endif diff --git a/arch/arm/mach-u300/include/mach/irqs.h b/arch/arm/mach-u300/include/mach/irqs.h new file mode 100644 index 0000000..a6867b1 --- /dev/null +++ b/arch/arm/mach-u300/include/mach/irqs.h @@ -0,0 +1,114 @@ +/* + * + * arch/arm/mach-u300/include/mach/irqs.h + * + * + * Copyright (C) 2006-2009 ST-Ericsson AB + * License terms: GNU General Public License (GPL) version 2 + * IRQ channel definitions for the U300 platforms. + * Author: Linus Walleij + */ + +#ifndef __MACH_IRQS_H +#define __MACH_IRQS_H + +#define IRQ_U300_INTCON0_START 0 +#define IRQ_U300_INTCON1_START 32 +/* These are on INTCON0 - 30 lines */ +#define IRQ_U300_IRQ0_EXT 0 +#define IRQ_U300_IRQ1_EXT 1 +#define IRQ_U300_DMA 2 +#define IRQ_U300_VIDEO_ENC_0 3 +#define IRQ_U300_VIDEO_ENC_1 4 +#define IRQ_U300_AAIF_RX 5 +#define IRQ_U300_AAIF_TX 6 +#define IRQ_U300_AAIF_VGPIO 7 +#define IRQ_U300_AAIF_WAKEUP 8 +#define IRQ_U300_PCM_I2S0_FRAME 9 +#define IRQ_U300_PCM_I2S0_FIFO 10 +#define IRQ_U300_PCM_I2S1_FRAME 11 +#define IRQ_U300_PCM_I2S1_FIFO 12 +#define IRQ_U300_XGAM_GAMCON 13 +#define IRQ_U300_XGAM_CDI 14 +#define IRQ_U300_XGAM_CDICON 15 +#if defined(CONFIG_MACH_U300_BS2X) || defined(CONFIG_MACH_U300_BS330) +/* MMIACC not used on the DB3210 or DB3350 chips */ +#define IRQ_U300_XGAM_MMIACC 16 +#endif +#define IRQ_U300_XGAM_PDI 17 +#define IRQ_U300_XGAM_PDICON 18 +#define IRQ_U300_XGAM_GAMEACC 19 +#define IRQ_U300_XGAM_MCIDCT 20 +#define IRQ_U300_APEX 21 +#define IRQ_U300_UART0 22 +#define IRQ_U300_SPI 23 +#define IRQ_U300_TIMER_APP_OS 24 +#define IRQ_U300_TIMER_APP_DD 25 +#define IRQ_U300_TIMER_APP_GP1 26 +#define IRQ_U300_TIMER_APP_GP2 27 +#define IRQ_U300_TIMER_OS 28 +#define IRQ_U300_TIMER_MS 29 +#define IRQ_U300_KEYPAD_KEYBF 30 +#define IRQ_U300_KEYPAD_KEYBR 31 +/* These are on INTCON1 - 32 lines */ +#define IRQ_U300_GPIO_PORT0 32 +#define IRQ_U300_GPIO_PORT1 33 +#define IRQ_U300_GPIO_PORT2 34 + +#if defined(CONFIG_MACH_U300_BS2X) || defined(CONFIG_MACH_U300_BS330) || \ + defined(CONFIG_MACH_U300_BS335) +/* These are for DB3150, DB3200 and DB3350 */ +#define IRQ_U300_WDOG 35 +#define IRQ_U300_EVHIST 36 +#define IRQ_U300_MSPRO 37 +#define IRQ_U300_MMCSD_MCIINTR0 38 +#define IRQ_U300_MMCSD_MCIINTR1 39 +#define IRQ_U300_I2C0 40 +#define IRQ_U300_I2C1 41 +#define IRQ_U300_RTC 42 +#define IRQ_U300_NFIF 43 +#define IRQ_U300_NFIF2 44 +#endif + +/* DB3150 and DB3200 have only 45 IRQs */ +#if defined(CONFIG_MACH_U300_BS2X) || defined(CONFIG_MACH_U300_BS330) +#define U300_NR_IRQS 45 +#endif + +/* The DB3350-specific interrupt lines */ +#ifdef CONFIG_MACH_U300_BS335 +#define IRQ_U300_ISP_F0 45 +#define IRQ_U300_ISP_F1 46 +#define IRQ_U300_ISP_F2 47 +#define IRQ_U300_ISP_F3 48 +#define IRQ_U300_ISP_F4 49 +#define IRQ_U300_GPIO_PORT3 50 +#define IRQ_U300_SYSCON_PLL_LOCK 51 +#define IRQ_U300_UART1 52 +#define IRQ_U300_GPIO_PORT4 53 +#define IRQ_U300_GPIO_PORT5 54 +#define IRQ_U300_GPIO_PORT6 55 +#define U300_NR_IRQS 56 +#endif + +/* The DB3210-specific interrupt lines */ +#ifdef CONFIG_MACH_U300_BS365 +#define IRQ_U300_GPIO_PORT3 35 +#define IRQ_U300_GPIO_PORT4 36 +#define IRQ_U300_WDOG 37 +#define IRQ_U300_EVHIST 38 +#define IRQ_U300_MSPRO 39 +#define IRQ_U300_MMCSD_MCIINTR0 40 +#define IRQ_U300_MMCSD_MCIINTR1 41 +#define IRQ_U300_I2C0 42 +#define IRQ_U300_I2C1 43 +#define IRQ_U300_RTC 44 +#define IRQ_U300_NFIF 45 +#define IRQ_U300_NFIF2 46 +#define IRQ_U300_SYSCON_PLL_LOCK 47 +#define U300_NR_IRQS 48 +#endif + +#define NR_IRQS U300_NR_IRQS + +#endif diff --git a/arch/arm/mach-u300/include/mach/syscon.h b/arch/arm/mach-u300/include/mach/syscon.h new file mode 100644 index 0000000..1c90d1b --- /dev/null +++ b/arch/arm/mach-u300/include/mach/syscon.h @@ -0,0 +1,644 @@ +/* + * + * arch/arm/mach-u300/include/mach/syscon.h + * + * + * Copyright (C) 2008 ST-Ericsson AB + * + * Author: Rickard Andersson + */ + +#ifndef __MACH_SYSCON_H +#define __MACH_SYSCON_H + +/* + * All register defines for SYSCON registers that concerns individual + * block clocks and reset lines are registered here. This is because + * we don't want any other file to try to fool around with this stuff. + */ + +/* APP side SYSCON registers */ +/* TODO: this is incomplete. Add all from asic_syscon_map.h eventually. */ +/* CLK Control Register 16bit (R/W) */ +#define U300_SYSCON_CCR (0x0000) +#define U300_SYSCON_CCR_I2S1_USE_VCXO (0x0040) +#define U300_SYSCON_CCR_I2S0_USE_VCXO (0x0020) +#define U300_SYSCON_CCR_TURN_VCXO_ON (0x0008) +#define U300_SYSCON_CCR_CLKING_PERFORMANCE_MASK (0x0007) +#define U300_SYSCON_CCR_CLKING_PERFORMANCE_LOW_POWER (0x04) +#define U300_SYSCON_CCR_CLKING_PERFORMANCE_LOW (0x03) +#define U300_SYSCON_CCR_CLKING_PERFORMANCE_INTERMEDIATE (0x02) +#define U300_SYSCON_CCR_CLKING_PERFORMANCE_HIGH (0x01) +#define U300_SYSCON_CCR_CLKING_PERFORMANCE_BEST (0x00) +/* CLK Status Register 16bit (R/W) */ +#define U300_SYSCON_CSR (0x0004) +#define U300_SYSCON_CSR_PLL208_LOCK_IND (0x0002) +#define U300_SYSCON_CSR_PLL13_LOCK_IND (0x0001) +/* Reset lines for SLOW devices 16bit (R/W) */ +#define U300_SYSCON_RSR (0x0014) +#ifdef CONFIG_MACH_U300_BS335 +#define U300_SYSCON_RSR_PPM_RESET_EN (0x0200) +#endif +#define U300_SYSCON_RSR_ACC_TMR_RESET_EN (0x0100) +#define U300_SYSCON_RSR_APP_TMR_RESET_EN (0x0080) +#define U300_SYSCON_RSR_RTC_RESET_EN (0x0040) +#define U300_SYSCON_RSR_KEYPAD_RESET_EN (0x0020) +#define U300_SYSCON_RSR_GPIO_RESET_EN (0x0010) +#define U300_SYSCON_RSR_EH_RESET_EN (0x0008) +#define U300_SYSCON_RSR_BTR_RESET_EN (0x0004) +#define U300_SYSCON_RSR_UART_RESET_EN (0x0002) +#define U300_SYSCON_RSR_SLOW_BRIDGE_RESET_EN (0x0001) +/* Reset lines for FAST devices 16bit (R/W) */ +#define U300_SYSCON_RFR (0x0018) +#ifdef CONFIG_MACH_U300_BS335 +#define U300_SYSCON_RFR_UART1_RESET_ENABLE (0x0080) +#endif +#define U300_SYSCON_RFR_SPI_RESET_ENABLE (0x0040) +#define U300_SYSCON_RFR_MMC_RESET_ENABLE (0x0020) +#define U300_SYSCON_RFR_PCM_I2S1_RESET_ENABLE (0x0010) +#define U300_SYSCON_RFR_PCM_I2S0_RESET_ENABLE (0x0008) +#define U300_SYSCON_RFR_I2C1_RESET_ENABLE (0x0004) +#define U300_SYSCON_RFR_I2C0_RESET_ENABLE (0x0002) +#define U300_SYSCON_RFR_FAST_BRIDGE_RESET_ENABLE (0x0001) +/* Reset lines for the rest of the peripherals 16bit (R/W) */ +#define U300_SYSCON_RRR (0x001c) +#ifdef CONFIG_MACH_U300_BS335 +#define U300_SYSCON_RRR_CDS_RESET_EN (0x4000) +#define U300_SYSCON_RRR_ISP_RESET_EN (0x2000) +#endif +#define U300_SYSCON_RRR_INTCON_RESET_EN (0x1000) +#define U300_SYSCON_RRR_MSPRO_RESET_EN (0x0800) +#define U300_SYSCON_RRR_XGAM_RESET_EN (0x0100) +#define U300_SYSCON_RRR_XGAM_VC_SYNC_RESET_EN (0x0080) +#define U300_SYSCON_RRR_NANDIF_RESET_EN (0x0040) +#define U300_SYSCON_RRR_EMIF_RESET_EN (0x0020) +#define U300_SYSCON_RRR_DMAC_RESET_EN (0x0010) +#define U300_SYSCON_RRR_CPU_RESET_EN (0x0008) +#define U300_SYSCON_RRR_APEX_RESET_EN (0x0004) +#define U300_SYSCON_RRR_AHB_RESET_EN (0x0002) +#define U300_SYSCON_RRR_AAIF_RESET_EN (0x0001) +/* Clock enable for SLOW peripherals 16bit (R/W) */ +#define U300_SYSCON_CESR (0x0020) +#ifdef CONFIG_MACH_U300_BS335 +#define U300_SYSCON_CESR_PPM_CLK_EN (0x0200) +#endif +#define U300_SYSCON_CESR_ACC_TMR_CLK_EN (0x0100) +#define U300_SYSCON_CESR_APP_TMR_CLK_EN (0x0080) +#define U300_SYSCON_CESR_KEYPAD_CLK_EN (0x0040) +#define U300_SYSCON_CESR_GPIO_CLK_EN (0x0010) +#define U300_SYSCON_CESR_EH_CLK_EN (0x0008) +#define U300_SYSCON_CESR_BTR_CLK_EN (0x0004) +#define U300_SYSCON_CESR_UART_CLK_EN (0x0002) +#define U300_SYSCON_CESR_SLOW_BRIDGE_CLK_EN (0x0001) +/* Clock enable for FAST peripherals 16bit (R/W) */ +#define U300_SYSCON_CEFR (0x0024) +#ifdef CONFIG_MACH_U300_BS335 +#define U300_SYSCON_CEFR_UART1_CLK_EN (0x0200) +#endif +#define U300_SYSCON_CEFR_I2S1_CORE_CLK_EN (0x0100) +#define U300_SYSCON_CEFR_I2S0_CORE_CLK_EN (0x0080) +#define U300_SYSCON_CEFR_SPI_CLK_EN (0x0040) +#define U300_SYSCON_CEFR_MMC_CLK_EN (0x0020) +#define U300_SYSCON_CEFR_I2S1_CLK_EN (0x0010) +#define U300_SYSCON_CEFR_I2S0_CLK_EN (0x0008) +#define U300_SYSCON_CEFR_I2C1_CLK_EN (0x0004) +#define U300_SYSCON_CEFR_I2C0_CLK_EN (0x0002) +#define U300_SYSCON_CEFR_FAST_BRIDGE_CLK_EN (0x0001) +/* Clock enable for the rest of the peripherals 16bit (R/W) */ +#define U300_SYSCON_CERR (0x0028) +#ifdef CONFIG_MACH_U300_BS335 +#define U300_SYSCON_CERR_CDS_CLK_EN (0x2000) +#define U300_SYSCON_CERR_ISP_CLK_EN (0x1000) +#endif +#define U300_SYSCON_CERR_MSPRO_CLK_EN (0x0800) +#define U300_SYSCON_CERR_AHB_SUBSYS_BRIDGE_CLK_EN (0x0400) +#define U300_SYSCON_CERR_SEMI_CLK_EN (0x0200) +#define U300_SYSCON_CERR_XGAM_CLK_EN (0x0100) +#define U300_SYSCON_CERR_VIDEO_ENC_CLK_EN (0x0080) +#define U300_SYSCON_CERR_NANDIF_CLK_EN (0x0040) +#define U300_SYSCON_CERR_EMIF_CLK_EN (0x0020) +#define U300_SYSCON_CERR_DMAC_CLK_EN (0x0010) +#define U300_SYSCON_CERR_CPU_CLK_EN (0x0008) +#define U300_SYSCON_CERR_APEX_CLK_EN (0x0004) +#define U300_SYSCON_CERR_AHB_CLK_EN (0x0002) +#define U300_SYSCON_CERR_AAIF_CLK_EN (0x0001) +/* Single block clock enable 16bit (-/W) */ +#define U300_SYSCON_SBCER (0x002c) +#ifdef CONFIG_MACH_U300_BS335 +#define U300_SYSCON_SBCER_PPM_CLK_EN (0x0009) +#endif +#define U300_SYSCON_SBCER_ACC_TMR_CLK_EN (0x0008) +#define U300_SYSCON_SBCER_APP_TMR_CLK_EN (0x0007) +#define U300_SYSCON_SBCER_KEYPAD_CLK_EN (0x0006) +#define U300_SYSCON_SBCER_GPIO_CLK_EN (0x0004) +#define U300_SYSCON_SBCER_EH_CLK_EN (0x0003) +#define U300_SYSCON_SBCER_BTR_CLK_EN (0x0002) +#define U300_SYSCON_SBCER_UART_CLK_EN (0x0001) +#define U300_SYSCON_SBCER_SLOW_BRIDGE_CLK_EN (0x0000) +#ifdef CONFIG_MACH_U300_BS335 +#define U300_SYSCON_SBCER_UART1_CLK_EN (0x0019) +#endif +#define U300_SYSCON_SBCER_I2S1_CORE_CLK_EN (0x0018) +#define U300_SYSCON_SBCER_I2S0_CORE_CLK_EN (0x0017) +#define U300_SYSCON_SBCER_SPI_CLK_EN (0x0016) +#define U300_SYSCON_SBCER_MMC_CLK_EN (0x0015) +#define U300_SYSCON_SBCER_I2S1_CLK_EN (0x0014) +#define U300_SYSCON_SBCER_I2S0_CLK_EN (0x0013) +#define U300_SYSCON_SBCER_I2C1_CLK_EN (0x0012) +#define U300_SYSCON_SBCER_I2C0_CLK_EN (0x0011) +#define U300_SYSCON_SBCER_FAST_BRIDGE_CLK_EN (0x0010) +#ifdef CONFIG_MACH_U300_BS335 +#define U300_SYSCON_SBCER_CDS_CLK_EN (0x002D) +#define U300_SYSCON_SBCER_ISP_CLK_EN (0x002C) +#endif +#define U300_SYSCON_SBCER_MSPRO_CLK_EN (0x002B) +#define U300_SYSCON_SBCER_AHB_SUBSYS_BRIDGE_CLK_EN (0x002A) +#define U300_SYSCON_SBCER_SEMI_CLK_EN (0x0029) +#define U300_SYSCON_SBCER_XGAM_CLK_EN (0x0028) +#define U300_SYSCON_SBCER_VIDEO_ENC_CLK_EN (0x0027) +#define U300_SYSCON_SBCER_NANDIF_CLK_EN (0x0026) +#define U300_SYSCON_SBCER_EMIF_CLK_EN (0x0025) +#define U300_SYSCON_SBCER_DMAC_CLK_EN (0x0024) +#define U300_SYSCON_SBCER_CPU_CLK_EN (0x0023) +#define U300_SYSCON_SBCER_APEX_CLK_EN (0x0022) +#define U300_SYSCON_SBCER_AHB_CLK_EN (0x0021) +#define U300_SYSCON_SBCER_AAIF_CLK_EN (0x0020) +/* Single block clock disable 16bit (-/W) */ +#define U300_SYSCON_SBCDR (0x0030) +/* Same values as above for SBCER */ +/* Clock force SLOW peripherals 16bit (R/W) */ +#define U300_SYSCON_CFSR (0x003c) +#ifdef CONFIG_MACH_U300_BS335 +#define U300_SYSCON_CFSR_PPM_CLK_FORCE_EN (0x0200) +#endif +#define U300_SYSCON_CFSR_ACC_TMR_CLK_FORCE_EN (0x0100) +#define U300_SYSCON_CFSR_APP_TMR_CLK_FORCE_EN (0x0080) +#define U300_SYSCON_CFSR_KEYPAD_CLK_FORCE_EN (0x0020) +#define U300_SYSCON_CFSR_GPIO_CLK_FORCE_EN (0x0010) +#define U300_SYSCON_CFSR_EH_CLK_FORCE_EN (0x0008) +#define U300_SYSCON_CFSR_BTR_CLK_FORCE_EN (0x0004) +#define U300_SYSCON_CFSR_UART_CLK_FORCE_EN (0x0002) +#define U300_SYSCON_CFSR_SLOW_BRIDGE_CLK_FORCE_EN (0x0001) +/* Clock force FAST peripherals 16bit (R/W) */ +#define U300_SYSCON_CFFR (0x40) +/* Values not defined. Define if you want to use them. */ +/* Clock force the rest of the peripherals 16bit (R/W) */ +#define U300_SYSCON_CFRR (0x44) +#ifdef CONFIG_MACH_U300_BS335 +#define U300_SYSCON_CFRR_CDS_CLK_FORCE_EN (0x2000) +#define U300_SYSCON_CFRR_ISP_CLK_FORCE_EN (0x1000) +#endif +#define U300_SYSCON_CFRR_MSPRO_CLK_FORCE_EN (0x0800) +#define U300_SYSCON_CFRR_AHB_SUBSYS_BRIDGE_CLK_FORCE_EN (0x0400) +#define U300_SYSCON_CFRR_SEMI_CLK_FORCE_EN (0x0200) +#define U300_SYSCON_CFRR_XGAM_CLK_FORCE_EN (0x0100) +#define U300_SYSCON_CFRR_VIDEO_ENC_CLK_FORCE_EN (0x0080) +#define U300_SYSCON_CFRR_NANDIF_CLK_FORCE_EN (0x0040) +#define U300_SYSCON_CFRR_EMIF_CLK_FORCE_EN (0x0020) +#define U300_SYSCON_CFRR_DMAC_CLK_FORCE_EN (0x0010) +#define U300_SYSCON_CFRR_CPU_CLK_FORCE_EN (0x0008) +#define U300_SYSCON_CFRR_APEX_CLK_FORCE_EN (0x0004) +#define U300_SYSCON_CFRR_AHB_CLK_FORCE_EN (0x0002) +#define U300_SYSCON_CFRR_AAIF_CLK_FORCE_EN (0x0001) +/* PLL208 Frequency Control 16bit (R/W) */ +#define U300_SYSCON_PFCR (0x48) +#define U300_SYSCON_PFCR_DPLL_MULT_NUM (0x000F) +/* Power Management Control 16bit (R/W) */ +#define U300_SYSCON_PMCR (0x50) +#define U300_SYSCON_PMCR_DCON_ENABLE (0x0002) +#define U300_SYSCON_PMCR_PWR_MGNT_ENABLE (0x0001) +/* + * All other clocking registers moved to clock.c! + */ +/* Reset Out 16bit (R/W) */ +#define U300_SYSCON_RCR (0x6c) +#define U300_SYSCON_RCR_RESOUT0_RST_N_DISABLE (0x0001) +/* EMIF Slew Rate Control 16bit (R/W) */ +#define U300_SYSCON_SRCLR (0x70) +#define U300_SYSCON_SRCLR_MASK (0x03FF) +#define U300_SYSCON_SRCLR_VALUE (0x03FF) +#define U300_SYSCON_SRCLR_EMIF_1_SLRC_5_B (0x0200) +#define U300_SYSCON_SRCLR_EMIF_1_SLRC_5_A (0x0100) +#define U300_SYSCON_SRCLR_EMIF_1_SLRC_4_B (0x0080) +#define U300_SYSCON_SRCLR_EMIF_1_SLRC_4_A (0x0040) +#define U300_SYSCON_SRCLR_EMIF_1_SLRC_3_B (0x0020) +#define U300_SYSCON_SRCLR_EMIF_1_SLRC_3_A (0x0010) +#define U300_SYSCON_SRCLR_EMIF_1_SLRC_2_B (0x0008) +#define U300_SYSCON_SRCLR_EMIF_1_SLRC_2_A (0x0004) +#define U300_SYSCON_SRCLR_EMIF_1_SLRC_1_B (0x0002) +#define U300_SYSCON_SRCLR_EMIF_1_SLRC_1_A (0x0001) +/* EMIF Clock Control Register 16bit (R/W) */ +#define U300_SYSCON_ECCR (0x0078) +#define U300_SYSCON_ECCR_MASK (0x000F) +#define U300_SYSCON_ECCR_EMIF_1_STATIC_CLK_EN_N_DISABLE (0x0008) +#define U300_SYSCON_ECCR_EMIF_1_RET_OUT_CLK_EN_N_DISABLE (0x0004) +#define U300_SYSCON_ECCR_EMIF_MEMCLK_RET_EN_N_DISABLE (0x0002) +#define U300_SYSCON_ECCR_EMIF_SDRCLK_RET_EN_N_DISABLE (0x0001) +/* PAD MUX Control register 1 (LOW) 16bit (R/W) */ +#define U300_SYSCON_PMC1LR (0x007C) +#define U300_SYSCON_PMC1LR_MASK (0xFFFF) +#define U300_SYSCON_PMC1LR_CDI_MASK (0xC000) +#define U300_SYSCON_PMC1LR_CDI_CDI (0x0000) +#define U300_SYSCON_PMC1LR_CDI_EMIF (0x4000) +#define U300_SYSCON_PMC1LR_CDI_GPIO (0x8000) +#define U300_SYSCON_PMC1LR_CDI_WCDMA (0xC000) +#define U300_SYSCON_PMC1LR_PDI_MASK (0x3000) +#define U300_SYSCON_PMC1LR_PDI_PDI (0x0000) +#define U300_SYSCON_PMC1LR_PDI_EGG (0x1000) +#define U300_SYSCON_PMC1LR_PDI_WCDMA (0x3000) +#define U300_SYSCON_PMC1LR_MMCSD_MASK (0x0C00) +#define U300_SYSCON_PMC1LR_MMCSD_MMCSD (0x0000) +#define U300_SYSCON_PMC1LR_MMCSD_MSPRO (0x0400) +#define U300_SYSCON_PMC1LR_MMCSD_DSP (0x0800) +#define U300_SYSCON_PMC1LR_MMCSD_WCDMA (0x0C00) +#define U300_SYSCON_PMC1LR_ETM_MASK (0x0300) +#define U300_SYSCON_PMC1LR_ETM_ACC (0x0000) +#define U300_SYSCON_PMC1LR_ETM_APP (0x0100) +#define U300_SYSCON_PMC1LR_EMIF_1_CS2_MASK (0x00C0) +#define U300_SYSCON_PMC1LR_EMIF_1_CS2_STATIC (0x0000) +#define U300_SYSCON_PMC1LR_EMIF_1_CS2_NFIF (0x0040) +#define U300_SYSCON_PMC1LR_EMIF_1_CS2_SDRAM (0x0080) +#define U300_SYSCON_PMC1LR_EMIF_1_CS2_STATIC_2GB (0x00C0) +#define U300_SYSCON_PMC1LR_EMIF_1_CS1_MASK (0x0030) +#define U300_SYSCON_PMC1LR_EMIF_1_CS1_STATIC (0x0000) +#define U300_SYSCON_PMC1LR_EMIF_1_CS1_NFIF (0x0010) +#define U300_SYSCON_PMC1LR_EMIF_1_CS1_SDRAM (0x0020) +#define U300_SYSCON_PMC1LR_EMIF_1_CS1_SEMI (0x0030) +#define U300_SYSCON_PMC1LR_EMIF_1_CS0_MASK (0x000C) +#define U300_SYSCON_PMC1LR_EMIF_1_CS0_STATIC (0x0000) +#define U300_SYSCON_PMC1LR_EMIF_1_CS0_NFIF (0x0004) +#define U300_SYSCON_PMC1LR_EMIF_1_CS0_SDRAM (0x0008) +#define U300_SYSCON_PMC1LR_EMIF_1_CS0_SEMI (0x000C) +#define U300_SYSCON_PMC1LR_EMIF_1_MASK (0x0003) +#define U300_SYSCON_PMC1LR_EMIF_1_STATIC (0x0000) +#define U300_SYSCON_PMC1LR_EMIF_1_SDRAM0 (0x0001) +#define U300_SYSCON_PMC1LR_EMIF_1_SDRAM1 (0x0002) +#define U300_SYSCON_PMC1LR_EMIF_1 (0x0003) +/* PAD MUX Control register 2 (HIGH) 16bit (R/W) */ +#define U300_SYSCON_PMC1HR (0x007E) +#define U300_SYSCON_PMC1HR_MASK (0xFFFF) +#define U300_SYSCON_PMC1HR_MISC_2_MASK (0xC000) +#define U300_SYSCON_PMC1HR_MISC_2_APP_GPIO (0x0000) +#define U300_SYSCON_PMC1HR_MISC_2_MSPRO (0x4000) +#define U300_SYSCON_PMC1HR_MISC_2_DSP (0x8000) +#define U300_SYSCON_PMC1HR_MISC_2_AAIF (0xC000) +#define U300_SYSCON_PMC1HR_APP_GPIO_2_MASK (0x3000) +#define U300_SYSCON_PMC1HR_APP_GPIO_2_APP_GPIO (0x0000) +#define U300_SYSCON_PMC1HR_APP_GPIO_2_NFIF (0x1000) +#define U300_SYSCON_PMC1HR_APP_GPIO_2_DSP (0x2000) +#define U300_SYSCON_PMC1HR_APP_GPIO_2_AAIF (0x3000) +#define U300_SYSCON_PMC1HR_APP_GPIO_1_MASK (0x0C00) +#define U300_SYSCON_PMC1HR_APP_GPIO_1_APP_GPIO (0x0000) +#define U300_SYSCON_PMC1HR_APP_GPIO_1_MMC (0x0400) +#define U300_SYSCON_PMC1HR_APP_GPIO_1_DSP (0x0800) +#define U300_SYSCON_PMC1HR_APP_GPIO_1_AAIF (0x0C00) +#define U300_SYSCON_PMC1HR_APP_SPI_CS_2_MASK (0x0300) +#define U300_SYSCON_PMC1HR_APP_SPI_CS_2_APP_GPIO (0x0000) +#define U300_SYSCON_PMC1HR_APP_SPI_CS_2_SPI (0x0100) +#define U300_SYSCON_PMC1HR_APP_SPI_CS_2_AAIF (0x0300) +#define U300_SYSCON_PMC1HR_APP_SPI_CS_1_MASK (0x00C0) +#define U300_SYSCON_PMC1HR_APP_SPI_CS_1_APP_GPIO (0x0000) +#define U300_SYSCON_PMC1HR_APP_SPI_CS_1_SPI (0x0040) +#define U300_SYSCON_PMC1HR_APP_SPI_CS_1_AAIF (0x00C0) +#define U300_SYSCON_PMC1HR_APP_SPI_2_MASK (0x0030) +#define U300_SYSCON_PMC1HR_APP_SPI_2_APP_GPIO (0x0000) +#define U300_SYSCON_PMC1HR_APP_SPI_2_SPI (0x0010) +#define U300_SYSCON_PMC1HR_APP_SPI_2_DSP (0x0020) +#define U300_SYSCON_PMC1HR_APP_SPI_2_AAIF (0x0030) +#define U300_SYSCON_PMC1HR_APP_UART0_2_MASK (0x000C) +#define U300_SYSCON_PMC1HR_APP_UART0_2_APP_GPIO (0x0000) +#define U300_SYSCON_PMC1HR_APP_UART0_2_UART0 (0x0004) +#define U300_SYSCON_PMC1HR_APP_UART0_2_NFIF_CS (0x0008) +#define U300_SYSCON_PMC1HR_APP_UART0_2_AAIF (0x000C) +#define U300_SYSCON_PMC1HR_APP_UART0_1_MASK (0x0003) +#define U300_SYSCON_PMC1HR_APP_UART0_1_APP_GPIO (0x0000) +#define U300_SYSCON_PMC1HR_APP_UART0_1_UART0 (0x0001) +#define U300_SYSCON_PMC1HR_APP_UART0_1_AAIF (0x0003) +/* Step one for killing the applications system 16bit (-/W) */ +#define U300_SYSCON_KA1R (0x0080) +#define U300_SYSCON_KA1R_MASK (0xFFFF) +#define U300_SYSCON_KA1R_VALUE (0xFFFF) +/* Step two for killing the application system 16bit (-/W) */ +#define U300_SYSCON_KA2R (0x0084) +#define U300_SYSCON_KA2R_MASK (0xFFFF) +#define U300_SYSCON_KA2R_VALUE (0xFFFF) +/* MMC/MSPRO frequency divider register 0 16bit (R/W) */ +#define U300_SYSCON_MMF0R (0x90) +#define U300_SYSCON_MMF0R_MASK (0x00FF) +#define U300_SYSCON_MMF0R_FREQ_0_HIGH_MASK (0x00F0) +#define U300_SYSCON_MMF0R_FREQ_0_LOW_MASK (0x000F) +/* MMC/MSPRO frequency divider register 1 16bit (R/W) */ +#define U300_SYSCON_MMF1R (0x94) +#define U300_SYSCON_MMF1R_MASK (0x00FF) +#define U300_SYSCON_MMF1R_FREQ_1_HIGH_MASK (0x00F0) +#define U300_SYSCON_MMF1R_FREQ_1_LOW_MASK (0x000F) +/* AAIF control register 16 bit (R/W) */ +#define U300_SYSCON_AAIFCR (0x98) +#define U300_SYSCON_AAIFCR_MASK (0x0003) +#define U300_SYSCON_AAIFCR_AASW_CTRL_MASK (0x0003) +#define U300_SYSCON_AAIFCR_AASW_CTRL_FUNCTIONAL (0x0000) +#define U300_SYSCON_AAIFCR_AASW_CTRL_MONITORING (0x0001) +#define U300_SYSCON_AAIFCR_AASW_CTRL_ACC_TO_EXT (0x0002) +#define U300_SYSCON_AAIFCR_AASW_CTRL_APP_TO_EXT (0x0003) +/* Clock control for the MMC and MSPRO blocks 16bit (R/W) */ +#define U300_SYSCON_MMCR (0x9C) +#define U300_SYSCON_MMCR_MASK (0x0003) +#define U300_SYSCON_MMCR_MMC_FB_CLK_SEL_ENABLE (0x0002) +#define U300_SYSCON_MMCR_MSPRO_FREQSEL_ENABLE (0x0001) + +/* TODO: More SYSCON registers missing */ +#define U300_SYSCON_PMC3R (0x10c) +#define U300_SYSCON_PMC3R_APP_MISC_11_MASK (0xc000) +#define U300_SYSCON_PMC3R_APP_MISC_11_SPI (0x4000) +#define U300_SYSCON_PMC3R_APP_MISC_10_MASK (0x3000) +#define U300_SYSCON_PMC3R_APP_MISC_10_SPI (0x1000) +/* TODO: Missing other configs, I just added the SPI stuff */ + +/* SYS_0_CLK_CONTROL first clock control 16bit (R/W) */ +#define U300_SYSCON_S0CCR (0x120) +#define U300_SYSCON_S0CCR_FIELD_MASK (0x43FF) +#define U300_SYSCON_S0CCR_CLOCK_REQ (0x4000) +#define U300_SYSCON_S0CCR_CLOCK_INV (0x0200) +#define U300_SYSCON_S0CCR_CLOCK_FREQ_MASK (0x01E0) +#define U300_SYSCON_S0CCR_CLOCK_SELECT_MASK (0x001E) +#define U300_SYSCON_S0CCR_CLOCK_ENABLE (0x0001) +#define U300_SYSCON_S0CCR_SEL_MCLK (0x8<<1) +#define U300_SYSCON_S0CCR_SEL_ACC_FSM_CLK (0xA<<1) +#define U300_SYSCON_S0CCR_SEL_PLL60_48_CLK (0xC<<1) +#define U300_SYSCON_S0CCR_SEL_PLL60_60_CLK (0xD<<1) +#define U300_SYSCON_S0CCR_SEL_ACC_PLL208_CLK (0xE<<1) +#define U300_SYSCON_S0CCR_SEL_APP_PLL13_CLK (0x0<<1) +#define U300_SYSCON_S0CCR_SEL_APP_FSM_CLK (0x2<<1) +#define U300_SYSCON_S0CCR_SEL_RTC_CLK (0x4<<1) +#define U300_SYSCON_S0CCR_SEL_APP_PLL208_CLK (0x6<<1) +/* SYS_1_CLK_CONTROL second clock control 16 bit (R/W) */ +#define U300_SYSCON_S1CCR (0x124) +#define U300_SYSCON_S1CCR_FIELD_MASK (0x43FF) +#define U300_SYSCON_S1CCR_CLOCK_REQ (0x4000) +#define U300_SYSCON_S1CCR_CLOCK_INV (0x0200) +#define U300_SYSCON_S1CCR_CLOCK_FREQ_MASK (0x01E0) +#define U300_SYSCON_S1CCR_CLOCK_SELECT_MASK (0x001E) +#define U300_SYSCON_S1CCR_CLOCK_ENABLE (0x0001) +#define U300_SYSCON_S1CCR_SEL_MCLK (0x8<<1) +#define U300_SYSCON_S1CCR_SEL_ACC_FSM_CLK (0xA<<1) +#define U300_SYSCON_S1CCR_SEL_PLL60_48_CLK (0xC<<1) +#define U300_SYSCON_S1CCR_SEL_PLL60_60_CLK (0xD<<1) +#define U300_SYSCON_S1CCR_SEL_ACC_PLL208_CLK (0xE<<1) +#define U300_SYSCON_S1CCR_SEL_ACC_PLL13_CLK (0x0<<1) +#define U300_SYSCON_S1CCR_SEL_APP_FSM_CLK (0x2<<1) +#define U300_SYSCON_S1CCR_SEL_RTC_CLK (0x4<<1) +#define U300_SYSCON_S1CCR_SEL_APP_PLL208_CLK (0x6<<1) +/* SYS_2_CLK_CONTROL third clock contol 16 bit (R/W) */ +#define U300_SYSCON_S2CCR (0x128) +#define U300_SYSCON_S2CCR_FIELD_MASK (0xC3FF) +#define U300_SYSCON_S2CCR_CLK_STEAL (0x8000) +#define U300_SYSCON_S2CCR_CLOCK_REQ (0x4000) +#define U300_SYSCON_S2CCR_CLOCK_INV (0x0200) +#define U300_SYSCON_S2CCR_CLOCK_FREQ_MASK (0x01E0) +#define U300_SYSCON_S2CCR_CLOCK_SELECT_MASK (0x001E) +#define U300_SYSCON_S2CCR_CLOCK_ENABLE (0x0001) +#define U300_SYSCON_S2CCR_SEL_MCLK (0x8<<1) +#define U300_SYSCON_S2CCR_SEL_ACC_FSM_CLK (0xA<<1) +#define U300_SYSCON_S2CCR_SEL_PLL60_48_CLK (0xC<<1) +#define U300_SYSCON_S2CCR_SEL_PLL60_60_CLK (0xD<<1) +#define U300_SYSCON_S2CCR_SEL_ACC_PLL208_CLK (0xE<<1) +#define U300_SYSCON_S2CCR_SEL_ACC_PLL13_CLK (0x0<<1) +#define U300_SYSCON_S2CCR_SEL_APP_FSM_CLK (0x2<<1) +#define U300_SYSCON_S2CCR_SEL_RTC_CLK (0x4<<1) +#define U300_SYSCON_S2CCR_SEL_APP_PLL208_CLK (0x6<<1) +/* SYS_MISC_CONTROL, miscellaneous 16bit (R/W) */ +#define U300_SYSCON_MCR (0x12c) +#define U300_SYSCON_MCR_FIELD_MASK (0x00FF) +#define U300_SYSCON_MCR_PMGEN_CR_4_MASK (0x00C0) +#define U300_SYSCON_MCR_PMGEN_CR_4_GPIO (0x0000) +#define U300_SYSCON_MCR_PMGEN_CR_4_SPI (0x0040) +#define U300_SYSCON_MCR_PMGEN_CR_4_AAIF (0x00C0) +#define U300_SYSCON_MCR_PMGEN_CR_2_MASK (0x0030) +#define U300_SYSCON_MCR_PMGEN_CR_2_GPIO (0x0000) +#define U300_SYSCON_MCR_PMGEN_CR_2_EMIF_1_STATIC (0x0010) +#define U300_SYSCON_MCR_PMGEN_CR_2_DSP (0x0020) +#define U300_SYSCON_MCR_PMGEN_CR_2_AAIF (0x0030) +#define U300_SYSCON_MCR_PMGEN_CR_0_MASK (0x000C) +#define U300_SYSCON_MCR_PMGEN_CR_0_EMIF_1_SDRAM_M1 (0x0000) +#define U300_SYSCON_MCR_PMGEN_CR_0_EMIF_1_SDRAM_M2 (0x0004) +#define U300_SYSCON_MCR_PMGEN_CR_0_EMIF_1_SDRAM_M3 (0x0008) +#define U300_SYSCON_MCR_PMGEN_CR_0_EMIF_0_SDRAM (0x000C) +#define U300_SYSCON_MCR_PM1G_MODE_ENABLE (0x0002) +#define U300_SYSCON_MCR_PMTG5_MODE_ENABLE (0x0001) +/* Clock activity observability register 0 */ +#define U300_SYSCON_C0OAR (0x140) +#define U300_SYSCON_C0OAR_MASK (0xFFFF) +#define U300_SYSCON_C0OAR_VALUE (0xFFFF) +#define U300_SYSCON_C0OAR_BT_H_CLK (0x8000) +#define U300_SYSCON_C0OAR_ASPB_P_CLK (0x4000) +#define U300_SYSCON_C0OAR_APP_SEMI_H_CLK (0x2000) +#define U300_SYSCON_C0OAR_APP_SEMI_CLK (0x1000) +#define U300_SYSCON_C0OAR_APP_MMC_MSPRO_CLK (0x0800) +#define U300_SYSCON_C0OAR_APP_I2S1_CLK (0x0400) +#define U300_SYSCON_C0OAR_APP_I2S0_CLK (0x0200) +#define U300_SYSCON_C0OAR_APP_CPU_CLK (0x0100) +#define U300_SYSCON_C0OAR_APP_52_CLK (0x0080) +#define U300_SYSCON_C0OAR_APP_208_CLK (0x0040) +#define U300_SYSCON_C0OAR_APP_104_CLK (0x0020) +#define U300_SYSCON_C0OAR_APEX_CLK (0x0010) +#define U300_SYSCON_C0OAR_AHPB_M_H_CLK (0x0008) +#define U300_SYSCON_C0OAR_AHB_CLK (0x0004) +#define U300_SYSCON_C0OAR_AFPB_P_CLK (0x0002) +#define U300_SYSCON_C0OAR_AAIF_CLK (0x0001) +/* Clock activity observability register 1 */ +#define U300_SYSCON_C1OAR (0x144) +#define U300_SYSCON_C1OAR_MASK (0x3FFE) +#define U300_SYSCON_C1OAR_VALUE (0x3FFE) +#define U300_SYSCON_C1OAR_NFIF_F_CLK (0x2000) +#define U300_SYSCON_C1OAR_MSPRO_CLK (0x1000) +#define U300_SYSCON_C1OAR_MMC_P_CLK (0x0800) +#define U300_SYSCON_C1OAR_MMC_CLK (0x0400) +#define U300_SYSCON_C1OAR_KP_P_CLK (0x0200) +#define U300_SYSCON_C1OAR_I2C1_P_CLK (0x0100) +#define U300_SYSCON_C1OAR_I2C0_P_CLK (0x0080) +#define U300_SYSCON_C1OAR_GPIO_CLK (0x0040) +#define U300_SYSCON_C1OAR_EMIF_MPMC_CLK (0x0020) +#define U300_SYSCON_C1OAR_EMIF_H_CLK (0x0010) +#define U300_SYSCON_C1OAR_EVHIST_CLK (0x0008) +#define U300_SYSCON_C1OAR_PPM_CLK (0x0004) +#define U300_SYSCON_C1OAR_DMA_CLK (0x0002) +/* Clock activity observability register 2 */ +#define U300_SYSCON_C2OAR (0x148) +#define U300_SYSCON_C2OAR_MASK (0x0FFF) +#define U300_SYSCON_C2OAR_VALUE (0x0FFF) +#define U300_SYSCON_C2OAR_XGAM_CDI_CLK (0x0800) +#define U300_SYSCON_C2OAR_XGAM_CLK (0x0400) +#define U300_SYSCON_C2OAR_VC_H_CLK (0x0200) +#define U300_SYSCON_C2OAR_VC_CLK (0x0100) +#define U300_SYSCON_C2OAR_UA_P_CLK (0x0080) +#define U300_SYSCON_C2OAR_TMR1_CLK (0x0040) +#define U300_SYSCON_C2OAR_TMR0_CLK (0x0020) +#define U300_SYSCON_C2OAR_SPI_P_CLK (0x0010) +#define U300_SYSCON_C2OAR_PCM_I2S1_CORE_CLK (0x0008) +#define U300_SYSCON_C2OAR_PCM_I2S1_CLK (0x0004) +#define U300_SYSCON_C2OAR_PCM_I2S0_CORE_CLK (0x0002) +#define U300_SYSCON_C2OAR_PCM_I2S0_CLK (0x0001) + +/* Chip ID register 16bit (R/-) */ +#define U300_SYSCON_CIDR (0x400) +/* Video IRQ clear 16bit (R/W) */ +#define U300_SYSCON_VICR (0x404) +#define U300_SYSCON_VICR_VIDEO1_IRQ_CLEAR_ENABLE (0x0002) +#define U300_SYSCON_VICR_VIDEO0_IRQ_CLEAR_ENABLE (0x0001) +/* SMCR */ +#define U300_SYSCON_SMCR (0x4d0) +#define U300_SYSCON_SMCR_FIELD_MASK (0x000e) +#define U300_SYSCON_SMCR_SEMI_SREFACK_IND (0x0008) +#define U300_SYSCON_SMCR_SEMI_SREFREQ_ENABLE (0x0004) +#define U300_SYSCON_SMCR_SEMI_EXT_BOOT_MODE_ENABLE (0x0002) +/* CPU_SW_DBGEN Software Debug Enable 16bit (R/W) */ +#define U300_SYSCON_CSDR (0x4f0) +#define U300_SYSCON_CSDR_SW_DEBUG_ENABLE (0x0001) +/* PRINT_CONTROL Print Control 16bit (R/-) */ +#define U300_SYSCON_PCR (0x4f8) +#define U300_SYSCON_PCR_SERV_IND (0x0001) +/* BOOT_CONTROL 16bit (R/-) */ +#define U300_SYSCON_BCR (0x4fc) +#define U300_SYSCON_BCR_ACC_CPU_SUBSYS_VINITHI_IND (0x0400) +#define U300_SYSCON_BCR_APP_CPU_SUBSYS_VINITHI_IND (0x0200) +#define U300_SYSCON_BCR_EXTRA_BOOT_OPTION_MASK (0x01FC) +#define U300_SYSCON_BCR_APP_BOOT_SERV_MASK (0x0003) + + +/* CPU clock defines */ +/** + * CPU high frequency in MHz + */ +#define SYSCON_CPU_CLOCK_HIGH 208 +/** + * CPU medium frequency in MHz + */ +#define SYSCON_CPU_CLOCK_MEDIUM 104 +/** + * CPU low frequency in MHz + */ +#define SYSCON_CPU_CLOCK_LOW 13 + +/* EMIF clock defines */ +/** + * EMIF high frequency in MHz + */ +#define SYSCON_EMIF_CLOCK_HIGH 104 +/** + * EMIF medium frequency in MHz + */ +#define SYSCON_EMIF_CLOCK_MEDIUM 104 +/** + * EMIF low frequency in MHz + */ +#define SYSCON_EMIF_CLOCK_LOW 13 + +/* AHB clock defines */ +/** + * AHB high frequency in MHz + */ +#define SYSCON_AHB_CLOCK_HIGH 52 +/** + * AHB medium frequency in MHz + */ +#define SYSCON_AHB_CLOCK_MEDIUM 52 +/** + * AHB low frequency in MHz + */ +#define SYSCON_AHB_CLOCK_LOW 7 /* i.e 13/2=6.5MHz */ + +enum syscon_busmaster { + SYSCON_BM_DMAC, + SYSCON_BM_XGAM, + SYSCON_BM_VIDEO_ENC +}; + +/* + * Note that this array must match the order of the array "clk_reg" + * in syscon.c + */ +enum syscon_clk { + SYSCON_CLKCONTROL_SLOW_BRIDGE, + SYSCON_CLKCONTROL_UART, + SYSCON_CLKCONTROL_BTR, + SYSCON_CLKCONTROL_EH, + SYSCON_CLKCONTROL_GPIO, + SYSCON_CLKCONTROL_KEYPAD, + SYSCON_CLKCONTROL_APP_TIMER, + SYSCON_CLKCONTROL_ACC_TIMER, + SYSCON_CLKCONTROL_FAST_BRIDGE, + SYSCON_CLKCONTROL_I2C0, + SYSCON_CLKCONTROL_I2C1, + SYSCON_CLKCONTROL_I2S0, + SYSCON_CLKCONTROL_I2S1, + SYSCON_CLKCONTROL_MMC, + SYSCON_CLKCONTROL_SPI, + SYSCON_CLKCONTROL_I2S0_CORE, + SYSCON_CLKCONTROL_I2S1_CORE, + SYSCON_CLKCONTROL_AAIF, + SYSCON_CLKCONTROL_AHB, + SYSCON_CLKCONTROL_APEX, + SYSCON_CLKCONTROL_CPU, + SYSCON_CLKCONTROL_DMA, + SYSCON_CLKCONTROL_EMIF, + SYSCON_CLKCONTROL_NAND_IF, + SYSCON_CLKCONTROL_VIDEO_ENC, + SYSCON_CLKCONTROL_XGAM, + SYSCON_CLKCONTROL_SEMI, + SYSCON_CLKCONTROL_AHB_SUBSYS, + SYSCON_CLKCONTROL_MSPRO +}; + +enum syscon_sysclk_mode { + SYSCON_SYSCLK_DISABLED, + SYSCON_SYSCLK_M_CLK, + SYSCON_SYSCLK_ACC_FSM, + SYSCON_SYSCLK_PLL60_48, + SYSCON_SYSCLK_PLL60_60, + SYSCON_SYSCLK_ACC_PLL208, + SYSCON_SYSCLK_APP_PLL13, + SYSCON_SYSCLK_APP_FSM, + SYSCON_SYSCLK_RTC, + SYSCON_SYSCLK_APP_PLL208 +}; + +enum syscon_sysclk_req { + SYSCON_SYSCLKREQ_DISABLED, + SYSCON_SYSCLKREQ_ACTIVE_LOW +}; + +enum syscon_clk_mode { + SYSCON_CLKMODE_OFF, + SYSCON_CLKMODE_DEFAULT, + SYSCON_CLKMODE_LOW, + SYSCON_CLKMODE_MEDIUM, + SYSCON_CLKMODE_HIGH, + SYSCON_CLKMODE_PERMANENT, + SYSCON_CLKMODE_ON, +}; + +enum syscon_call_mode { + SYSCON_CLKCALL_NOWAIT, + SYSCON_CLKCALL_WAIT, +}; + +int syscon_dc_on(bool keep_power_on); +int syscon_set_busmaster_active_state(enum syscon_busmaster busmaster, + bool active); +bool syscon_get_busmaster_active_state(void); +int syscon_set_sleep_mask(enum syscon_clk, + bool sleep_ctrl); +int syscon_config_sysclk(u32 sysclk, + enum syscon_sysclk_mode sysclkmode, + bool inverse, + u32 divisor, + enum syscon_sysclk_req sysclkreq); +bool syscon_can_turn_off_semi_clock(void); + +/* This function is restricted to core.c */ +int syscon_request_normal_power(bool req); + +/* This function is restricted to be used by platform_speed.c */ +int syscon_speed_request(enum syscon_call_mode wait_mode, + enum syscon_clk_mode req_clk_mode); +#endif /* __MACH_SYSCON_H */ diff --git a/arch/arm/mach-u300/include/mach/u300-regs.h b/arch/arm/mach-u300/include/mach/u300-regs.h new file mode 100644 index 0000000..88333df --- /dev/null +++ b/arch/arm/mach-u300/include/mach/u300-regs.h @@ -0,0 +1,187 @@ +/* + * + * arch/arm/mach-u300/include/mach/u300-regs.h + * + * + * Copyright (C) 2006-2009 ST-Ericsson AB + * License terms: GNU General Public License (GPL) version 2 + * Basic register address definitions in physical memory and + * some block defintions for core devices like the timer. + * Author: Linus Walleij + */ + +#ifndef __MACH_U300_REGS_H +#define __MACH_U300_REGS_H + +/* + * These are the large blocks of memory allocated for I/O. + * the defines are used for setting up the I/O memory mapping. + */ + +/* NAND Flash CS0 */ +#define U300_NAND_CS0_PHYS_BASE 0x80000000 +#define U300_NAND_CS0_VIRT_BASE 0xff040000 + +/* NFIF */ +#define U300_NAND_IF_PHYS_BASE 0x9f800000 +#define U300_NAND_IF_VIRT_BASE 0xff030000 + +/* AHB Peripherals */ +#define U300_AHB_PER_PHYS_BASE 0xa0000000 +#define U300_AHB_PER_VIRT_BASE 0xff010000 + +/* FAST Peripherals */ +#define U300_FAST_PER_PHYS_BASE 0xc0000000 +#define U300_FAST_PER_VIRT_BASE 0xff020000 + +/* SLOW Peripherals */ +#define U300_SLOW_PER_PHYS_BASE 0xc0010000 +#define U300_SLOW_PER_VIRT_BASE 0xff000000 + +/* Boot ROM */ +#define U300_BOOTROM_PHYS_BASE 0xffff0000 +#define U300_BOOTROM_VIRT_BASE 0xffff0000 + +/* SEMI config base */ +#ifdef CONFIG_MACH_U300_BS335 +#define U300_SEMI_CONFIG_BASE 0x2FFE0000 +#else +#define U300_SEMI_CONFIG_BASE 0x30000000 +#endif + +/* + * All the following peripherals are specified at their PHYSICAL address, + * so if you need to access them (in the kernel), you MUST use the macros + * defined in to map to the IO_ADDRESS_AHB() IO_ADDRESS_FAST() + * etc. + */ + +/* + * AHB peripherals + */ + +/* AHB Peripherals Bridge Controller */ +#define U300_AHB_BRIDGE_BASE (U300_AHB_PER_PHYS_BASE+0x0000) + +/* Vectored Interrupt Controller 0, servicing 32 interrupts */ +#define U300_INTCON0_BASE (U300_AHB_PER_PHYS_BASE+0x1000) +#define U300_INTCON0_VBASE (U300_AHB_PER_VIRT_BASE+0x1000) + +/* Vectored Interrupt Controller 1, servicing 32 interrupts */ +#define U300_INTCON1_BASE (U300_AHB_PER_PHYS_BASE+0x2000) +#define U300_INTCON1_VBASE (U300_AHB_PER_VIRT_BASE+0x2000) + +/* Memory Stick Pro (MSPRO) controller */ +#define U300_MSPRO_BASE (U300_AHB_PER_PHYS_BASE+0x3000) + +/* EMIF Configuration Area */ +#define U300_EMIF_CFG_BASE (U300_AHB_PER_PHYS_BASE+0x4000) + + +/* + * FAST peripherals + */ + +/* FAST bridge control */ +#define U300_FAST_BRIDGE_BASE (U300_FAST_PER_PHYS_BASE+0x0000) + +/* MMC/SD controller */ +#define U300_MMCSD_BASE (U300_FAST_PER_PHYS_BASE+0x1000) + +/* PCM I2S0 controller */ +#define U300_PCM_I2S0_BASE (U300_FAST_PER_PHYS_BASE+0x2000) + +/* PCM I2S1 controller */ +#define U300_PCM_I2S1_BASE (U300_FAST_PER_PHYS_BASE+0x3000) + +/* I2C0 controller */ +#define U300_I2C0_BASE (U300_FAST_PER_PHYS_BASE+0x4000) + +/* I2C1 controller */ +#define U300_I2C1_BASE (U300_FAST_PER_PHYS_BASE+0x5000) + +/* SPI controller */ +#define U300_SPI_BASE (U300_FAST_PER_PHYS_BASE+0x6000) + +#ifdef CONFIG_MACH_U300_BS335 +/* Fast UART1 on U335 only */ +#define U300_UART1_BASE (U300_SLOW_PER_PHYS_BASE+0x7000) +#endif + +/* + * SLOW peripherals + */ + +/* SLOW bridge control */ +#define U300_SLOW_BRIDGE_BASE (U300_SLOW_PER_PHYS_BASE) + +/* SYSCON */ +#define U300_SYSCON_BASE (U300_SLOW_PER_PHYS_BASE+0x1000) +#define U300_SYSCON_VBASE (U300_SLOW_PER_VIRT_BASE+0x1000) + +/* Watchdog */ +#define U300_WDOG_BASE (U300_SLOW_PER_PHYS_BASE+0x2000) + +/* UART0 */ +#define U300_UART0_BASE (U300_SLOW_PER_PHYS_BASE+0x3000) + +/* APP side special timer */ +#define U300_TIMER_APP_BASE (U300_SLOW_PER_PHYS_BASE+0x4000) +#define U300_TIMER_APP_VBASE (U300_SLOW_PER_VIRT_BASE+0x4000) + +/* Keypad */ +#define U300_KEYPAD_BASE (U300_SLOW_PER_PHYS_BASE+0x5000) + +/* GPIO */ +#define U300_GPIO_BASE (U300_SLOW_PER_PHYS_BASE+0x6000) + +/* RTC */ +#define U300_RTC_BASE (U300_SLOW_PER_PHYS_BASE+0x7000) + +/* Bus tracer */ +#define U300_BUSTR_BASE (U300_SLOW_PER_PHYS_BASE+0x8000) + +/* Event handler (hardware queue) */ +#define U300_EVHIST_BASE (U300_SLOW_PER_PHYS_BASE+0x9000) + +/* Genric Timer */ +#define U300_TIMER_BASE (U300_SLOW_PER_PHYS_BASE+0xa000) + +/* PPM */ +#define U300_PPM_BASE (U300_SLOW_PER_PHYS_BASE+0xb000) + + +/* + * REST peripherals + */ + +/* ISP (image signal processor) is only available in U335 */ +#ifdef CONFIG_MACH_U300_BS335 +#define U300_ISP_BASE (0xA0008000) +#endif + +/* DMA Controller base */ +#define U300_DMAC_BASE (0xC0020000) + +/* MSL Base */ +#define U300_MSL_BASE (0xc0022000) + +/* APEX Base */ +#define U300_APEX_BASE (0xc0030000) + +/* Video Encoder Base */ +#ifdef CONFIG_MACH_U300_BS335 +#define U300_VIDEOENC_BASE (0xc0080000) +#else +#define U300_VIDEOENC_BASE (0xc0040000) +#endif + +/* XGAM Base */ +#define U300_XGAM_BASE (0xd0000000) + +/* + * Virtual accessor macros for static devices + */ + + +#endif -- cgit v0.10.2 From bd41b99d4661e775ff152f2842782c43dbb30a59 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 23 Apr 2009 21:15:04 +0100 Subject: [ARM] 5471/2: U300 GPIO and PADMUX support This adds GPIO and PADMUX headers and implementation for the U300 platform. This is an implementation in isolation that depend on later patches in this series to plug into the framework. Signed-off-by: Linus Walleij Signed-off-by: Russell King diff --git a/arch/arm/mach-u300/gpio.c b/arch/arm/mach-u300/gpio.c new file mode 100644 index 0000000..2d5ae8e --- /dev/null +++ b/arch/arm/mach-u300/gpio.c @@ -0,0 +1,701 @@ +/* + * + * arch/arm/mach-u300/gpio.c + * + * + * Copyright (C) 2007-2009 ST-Ericsson AB + * License terms: GNU General Public License (GPL) version 2 + * U300 GPIO module. + * This can driver either of the two basic GPIO cores + * available in the U300 platforms: + * COH 901 335 - Used in DB3150 (U300 1.0) and DB3200 (U330 1.0) + * COH 901 571/3 - Used in DB3210 (U365 2.0) and DB3350 (U335 1.0) + * Notice that you also have inline macros in + * Author: Linus Walleij + * Author: Jonas Aaberg + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Need access to SYSCON registers for PADmuxing */ +#include + +#include "padmux.h" + +/* Reference to GPIO block clock */ +static struct clk *clk; + +/* Memory resource */ +static struct resource *memres; +static void __iomem *virtbase; + +struct u300_gpio_port { + const char *name; + int irq; + int number; +}; + + +static struct u300_gpio_port gpio_ports[] = { + { + .name = "gpio0", + .number = 0, + }, + { + .name = "gpio1", + .number = 1, + }, + { + .name = "gpio2", + .number = 2, + }, +#ifdef U300_COH901571_3 + { + .name = "gpio3", + .number = 3, + }, + { + .name = "gpio4", + .number = 4, + }, +#ifdef CONFIG_MACH_U300_BS335 + { + .name = "gpio5", + .number = 5, + }, + { + .name = "gpio6", + .number = 6, + }, +#endif +#endif + +}; + + +#ifdef U300_COH901571_3 + +/* Default input value */ +#define DEFAULT_OUTPUT_LOW 0 +#define DEFAULT_OUTPUT_HIGH 1 + +/* GPIO Pull-Up status */ +#define DISABLE_PULL_UP 0 +#define ENABLE_PULL_UP 1 + +#define GPIO_NOT_USED 0 +#define GPIO_IN 1 +#define GPIO_OUT 2 + +struct u300_gpio_configuration_data { + unsigned char pin_usage; + unsigned char default_output_value; + unsigned char pull_up; +}; + +/* Initial configuration */ +const struct u300_gpio_configuration_data +u300_gpio_config[U300_GPIO_NUM_PORTS][U300_GPIO_PINS_PER_PORT] = { +#ifdef CONFIG_MACH_U300_BS335 + /* Port 0, pins 0-7 */ + { + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_OUT, DEFAULT_OUTPUT_HIGH, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_OUT, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_OUT, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_OUT, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_OUT, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_OUT, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP} + }, + /* Port 1, pins 0-7 */ + { + {GPIO_OUT, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_OUT, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_OUT, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, ENABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_OUT, DEFAULT_OUTPUT_HIGH, DISABLE_PULL_UP}, + {GPIO_OUT, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_OUT, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP} + }, + /* Port 2, pins 0-7 */ + { + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_OUT, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, ENABLE_PULL_UP}, + {GPIO_OUT, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, ENABLE_PULL_UP} + }, + /* Port 3, pins 0-7 */ + { + {GPIO_IN, DEFAULT_OUTPUT_LOW, ENABLE_PULL_UP}, + {GPIO_OUT, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP} + }, + /* Port 4, pins 0-7 */ + { + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP} + }, + /* Port 5, pins 0-7 */ + { + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP} + }, + /* Port 6, pind 0-7 */ + { + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP} + } +#endif + +#ifdef CONFIG_MACH_U300_BS365 + /* Port 0, pins 0-7 */ + { + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_OUT, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_OUT, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_OUT, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_OUT, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, ENABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP} + }, + /* Port 1, pins 0-7 */ + { + {GPIO_OUT, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_OUT, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_OUT, DEFAULT_OUTPUT_HIGH, DISABLE_PULL_UP}, + {GPIO_OUT, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_OUT, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP} + }, + /* Port 2, pins 0-7 */ + { + {GPIO_IN, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, ENABLE_PULL_UP}, + {GPIO_OUT, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_OUT, DEFAULT_OUTPUT_LOW, DISABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, ENABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, ENABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, ENABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, ENABLE_PULL_UP} + }, + /* Port 3, pins 0-7 */ + { + {GPIO_IN, DEFAULT_OUTPUT_LOW, ENABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, ENABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, ENABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, ENABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, ENABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, ENABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, ENABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, ENABLE_PULL_UP} + }, + /* Port 4, pins 0-7 */ + { + {GPIO_IN, DEFAULT_OUTPUT_LOW, ENABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, ENABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, ENABLE_PULL_UP}, + {GPIO_IN, DEFAULT_OUTPUT_LOW, ENABLE_PULL_UP}, + /* These 4 pins doesn't exist on DB3210 */ + {GPIO_OUT, DEFAULT_OUTPUT_LOW, ENABLE_PULL_UP}, + {GPIO_OUT, DEFAULT_OUTPUT_LOW, ENABLE_PULL_UP}, + {GPIO_OUT, DEFAULT_OUTPUT_LOW, ENABLE_PULL_UP}, + {GPIO_OUT, DEFAULT_OUTPUT_LOW, ENABLE_PULL_UP} + } +#endif +}; +#endif + + +/* No users == we can power down GPIO */ +static int gpio_users; + +struct gpio_struct { + int (*callback)(void *); + void *data; + int users; +}; + +static struct gpio_struct gpio_pin[U300_GPIO_MAX]; + +/* + * Let drivers register callback in order to get notified when there is + * an interrupt on the gpio pin + */ +int gpio_register_callback(unsigned gpio, int (*func)(void *arg), void *data) +{ + if (gpio_pin[gpio].callback) + printk(KERN_WARNING "GPIO: %s: WARNING: callback already " \ + "registered for gpio pin#%d\n", __func__, gpio); + gpio_pin[gpio].callback = func; + gpio_pin[gpio].data = data; + + return 0; +} +EXPORT_SYMBOL(gpio_register_callback); + +int gpio_unregister_callback(unsigned gpio) +{ + if (!gpio_pin[gpio].callback) + printk(KERN_WARNING "GPIO: %s: WARNING: callback already " \ + "unregistered for gpio pin#%d\n", __func__, gpio); + gpio_pin[gpio].callback = NULL; + gpio_pin[gpio].data = NULL; + + return 0; +} +EXPORT_SYMBOL(gpio_unregister_callback); + +int gpio_request(unsigned gpio, const char *label) +{ + if (gpio_pin[gpio].users) + return -EINVAL; + else + gpio_pin[gpio].users++; + + gpio_users++; + + return 0; +} +EXPORT_SYMBOL(gpio_request); + +void gpio_free(unsigned gpio) +{ + gpio_users--; + gpio_pin[gpio].users--; + if (unlikely(gpio_pin[gpio].users < 0)) { + printk(KERN_WARNING "GPIO: Warning: gpio#%d release mismatch\n", + gpio); + gpio_pin[gpio].users = 0; + } + + return; +} +EXPORT_SYMBOL(gpio_free); + +/* This returns zero or nonzero */ +int gpio_get_value(unsigned gpio) +{ + return readl(virtbase + U300_GPIO_PXPDIR + + PIN_TO_PORT(gpio) * U300_GPIO_PORTX_SPACING) & (1 << (gpio & 0x07)); +} +EXPORT_SYMBOL(gpio_get_value); + +/* + * We hope that the compiler will optimize away the unused branch + * in case "value" is a constant + */ +void gpio_set_value(unsigned gpio, int value) +{ + u32 val; + unsigned long flags; + + local_irq_save(flags); + if (value) { + /* set */ + val = readl(virtbase + U300_GPIO_PXPDOR + + PIN_TO_PORT(gpio) * U300_GPIO_PORTX_SPACING) + & (1 << (gpio & 0x07)); + writel(val | (1 << (gpio & 0x07)), virtbase + + U300_GPIO_PXPDOR + + PIN_TO_PORT(gpio) * U300_GPIO_PORTX_SPACING); + } else { + /* clear */ + val = readl(virtbase + U300_GPIO_PXPDOR + + PIN_TO_PORT(gpio) * U300_GPIO_PORTX_SPACING) + & (1 << (gpio & 0x07)); + writel(val & ~(1 << (gpio & 0x07)), virtbase + + U300_GPIO_PXPDOR + + PIN_TO_PORT(gpio) * U300_GPIO_PORTX_SPACING); + } + local_irq_restore(flags); +} +EXPORT_SYMBOL(gpio_set_value); + +int gpio_direction_input(unsigned gpio) +{ + unsigned long flags; + u32 val; + + if (gpio > U300_GPIO_MAX) + return -EINVAL; + + local_irq_save(flags); + val = readl(virtbase + U300_GPIO_PXPCR + PIN_TO_PORT(gpio) * + U300_GPIO_PORTX_SPACING); + /* Mask out this pin*/ + val &= ~(U300_GPIO_PXPCR_PIN_MODE_MASK << ((gpio & 0x07) << 1)); + /* This is not needed since it sets the bits to zero.*/ + /* val |= (U300_GPIO_PXPCR_PIN_MODE_INPUT << (gpio*2)); */ + writel(val, virtbase + U300_GPIO_PXPCR + PIN_TO_PORT(gpio) * + U300_GPIO_PORTX_SPACING); + local_irq_restore(flags); + return 0; +} +EXPORT_SYMBOL(gpio_direction_input); + +int gpio_direction_output(unsigned gpio, int value) +{ + unsigned long flags; + u32 val; + + if (gpio > U300_GPIO_MAX) + return -EINVAL; + + local_irq_save(flags); + val = readl(virtbase + U300_GPIO_PXPCR + PIN_TO_PORT(gpio) * + U300_GPIO_PORTX_SPACING); + /* Mask out this pin */ + val &= ~(U300_GPIO_PXPCR_PIN_MODE_MASK << ((gpio & 0x07) << 1)); + /* + * FIXME: configure for push/pull, open drain or open source per pin + * in setup. The current driver will only support push/pull. + */ + val |= (U300_GPIO_PXPCR_PIN_MODE_OUTPUT_PUSH_PULL + << ((gpio & 0x07) << 1)); + writel(val, virtbase + U300_GPIO_PXPCR + PIN_TO_PORT(gpio) * + U300_GPIO_PORTX_SPACING); + gpio_set_value(gpio, value); + local_irq_restore(flags); + return 0; +} +EXPORT_SYMBOL(gpio_direction_output); + +/* + * Enable an IRQ, edge is rising edge (!= 0) or falling edge (==0). + */ +void enable_irq_on_gpio_pin(unsigned gpio, int edge) +{ + u32 val; + unsigned long flags; + local_irq_save(flags); + + val = readl(virtbase + U300_GPIO_PXIEN + PIN_TO_PORT(gpio) * + U300_GPIO_PORTX_SPACING); + val |= (1 << (gpio & 0x07)); + writel(val, virtbase + U300_GPIO_PXIEN + PIN_TO_PORT(gpio) * + U300_GPIO_PORTX_SPACING); + val = readl(virtbase + U300_GPIO_PXICR + PIN_TO_PORT(gpio) * + U300_GPIO_PORTX_SPACING); + if (edge) + val |= (1 << (gpio & 0x07)); + else + val &= ~(1 << (gpio & 0x07)); + writel(val, virtbase + U300_GPIO_PXICR + PIN_TO_PORT(gpio) * + U300_GPIO_PORTX_SPACING); + local_irq_restore(flags); +} +EXPORT_SYMBOL(enable_irq_on_gpio_pin); + +void disable_irq_on_gpio_pin(unsigned gpio) +{ + u32 val; + unsigned long flags; + + local_irq_save(flags); + val = readl(virtbase + U300_GPIO_PXIEN + PIN_TO_PORT(gpio) * + U300_GPIO_PORTX_SPACING); + val &= ~(1 << (gpio & 0x07)); + writel(val, virtbase + U300_GPIO_PXIEN + PIN_TO_PORT(gpio) * + U300_GPIO_PORTX_SPACING); + local_irq_restore(flags); +} +EXPORT_SYMBOL(disable_irq_on_gpio_pin); + +/* Enable (value == 0) or disable (value == 1) internal pullup */ +void gpio_pullup(unsigned gpio, int value) +{ + u32 val; + unsigned long flags; + + local_irq_save(flags); + if (value) { + val = readl(virtbase + U300_GPIO_PXPER + PIN_TO_PORT(gpio) * + U300_GPIO_PORTX_SPACING); + writel(val | (1 << (gpio & 0x07)), virtbase + U300_GPIO_PXPER + + PIN_TO_PORT(gpio) * U300_GPIO_PORTX_SPACING); + } else { + val = readl(virtbase + U300_GPIO_PXPER + PIN_TO_PORT(gpio) * + U300_GPIO_PORTX_SPACING); + writel(val & ~(1 << (gpio & 0x07)), virtbase + U300_GPIO_PXPER + + PIN_TO_PORT(gpio) * U300_GPIO_PORTX_SPACING); + } + local_irq_restore(flags); +} +EXPORT_SYMBOL(gpio_pullup); + +static irqreturn_t gpio_irq_handler(int irq, void *dev_id) +{ + struct u300_gpio_port *port = dev_id; + u32 val; + int pin; + + /* Read event register */ + val = readl(virtbase + U300_GPIO_PXIEV + port->number * + U300_GPIO_PORTX_SPACING); + /* Mask with enable register */ + val &= readl(virtbase + U300_GPIO_PXIEV + port->number * + U300_GPIO_PORTX_SPACING); + /* Mask relevant bits */ + val &= U300_GPIO_PXIEV_ALL_IRQ_EVENT_MASK; + /* ACK IRQ (clear event) */ + writel(val, virtbase + U300_GPIO_PXIEV + port->number * + U300_GPIO_PORTX_SPACING); + /* Print message */ + while (val != 0) { + unsigned gpio; + + pin = __ffs(val); + /* mask off this pin */ + val &= ~(1 << pin); + gpio = (port->number << 3) + pin; + + if (gpio_pin[gpio].callback) + (void)gpio_pin[gpio].callback(gpio_pin[gpio].data); + else + printk(KERN_DEBUG "GPIO: Stray GPIO IRQ on line %d\n", + gpio); + } + return IRQ_HANDLED; +} + +static void gpio_set_initial_values(void) +{ +#ifdef U300_COH901571_3 + int i, j; + unsigned long flags; + u32 val; + + /* Write default values to all pins */ + for (i = 0; i < U300_GPIO_NUM_PORTS; i++) { + val = 0; + for (j = 0; j < 8; j++) + val |= (u32) (u300_gpio_config[i][j].default_output_value != DEFAULT_OUTPUT_LOW) << j; + local_irq_save(flags); + writel(val, virtbase + U300_GPIO_PXPDOR + i * U300_GPIO_PORTX_SPACING); + local_irq_restore(flags); + } + + /* + * Put all pins that are set to either 'GPIO_OUT' or 'GPIO_NOT_USED' + * to output and 'GPIO_IN' to input for each port. And initalize + * default value on outputs. + */ + for (i = 0; i < U300_GPIO_NUM_PORTS; i++) { + for (j = 0; j < U300_GPIO_PINS_PER_PORT; j++) { + local_irq_save(flags); + val = readl(virtbase + U300_GPIO_PXPCR + + i * U300_GPIO_PORTX_SPACING); + /* Mask out this pin */ + val &= ~(U300_GPIO_PXPCR_PIN_MODE_MASK << (j << 1)); + + if (u300_gpio_config[i][j].pin_usage != GPIO_IN) + val |= (U300_GPIO_PXPCR_PIN_MODE_OUTPUT_PUSH_PULL << (j << 1)); + writel(val, virtbase + U300_GPIO_PXPCR + + i * U300_GPIO_PORTX_SPACING); + local_irq_restore(flags); + } + } + + /* Enable or disable the internal pull-ups in the GPIO ASIC block */ + for (i = 0; i < U300_GPIO_MAX; i++) { + val = 0; + for (j = 0; j < 8; j++) + val |= (u32)((u300_gpio_config[i][j].pull_up == DISABLE_PULL_UP)) << j; + local_irq_save(flags); + writel(val, virtbase + U300_GPIO_PXPER + i * U300_GPIO_PORTX_SPACING); + local_irq_restore(flags); + } +#endif +} + +static int __devinit gpio_probe(struct platform_device *pdev) +{ + u32 val; + int err = 0; + int i; + int num_irqs; + + memset(gpio_pin, 0, sizeof(gpio_pin)); + + /* Get GPIO clock */ + clk = clk_get(&pdev->dev, NULL); + if (IS_ERR(clk)) { + err = PTR_ERR(clk); + printk(KERN_ERR "GPIO: could not get GPIO clock\n"); + goto err_no_clk; + } + err = clk_enable(clk); + if (err) { + printk(KERN_ERR "GPIO: could not enable GPIO clock\n"); + goto err_no_clk_enable; + } + + memres = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!memres) + goto err_no_resource; + + if (request_mem_region(memres->start, memres->end - memres->start, "GPIO Controller") + == NULL) { + err = -ENODEV; + goto err_no_ioregion; + } + + virtbase = ioremap(memres->start, memres->end - memres->start + 1); + if (!virtbase) { + err = -ENOMEM; + goto err_no_ioremap; + } + +#ifdef U300_COH901335 + printk(KERN_INFO "GPIO: Initializing GPIO Controller COH 901 335\n"); + /* Turn on the GPIO block */ + writel(U300_GPIO_CR_BLOCK_CLOCK_ENABLE, virtbase + U300_GPIO_CR); +#endif + +#ifdef U300_COH901571_3 + printk(KERN_INFO "GPIO: Initializing GPIO Controller COH 901 571/3\n"); + val = readl(virtbase + U300_GPIO_CR); + printk(KERN_INFO "GPIO: COH901571/3 block version: %d, " \ + "number of cores: %d\n", + ((val & 0x0000FE00) >> 9), + ((val & 0x000001FC) >> 2)); + writel(U300_GPIO_CR_BLOCK_CLKRQ_ENABLE, virtbase + U300_GPIO_CR); +#endif + + /* Set up some padmuxing here */ +#ifdef CONFIG_MMC + pmx_set_mission_mode_mmc(); +#endif +#ifdef CONFIG_SPI_PL022 + pmx_set_mission_mode_spi(); +#endif + + gpio_set_initial_values(); + + for (num_irqs = 0 ; num_irqs < U300_GPIO_NUM_PORTS; num_irqs++) { + + gpio_ports[num_irqs].irq = + platform_get_irq_byname(pdev, + gpio_ports[num_irqs].name); + + err = request_irq(gpio_ports[num_irqs].irq, + gpio_irq_handler, IRQF_DISABLED, + gpio_ports[num_irqs].name, + &gpio_ports[num_irqs]); + if (err) { + printk(KERN_CRIT "GPIO: Cannot allocate IRQ for %s!\n", + gpio_ports[num_irqs].name); + goto err_no_irq; + } + /* Turns off PortX_irq_force */ + writel(0x0, virtbase + U300_GPIO_PXIFR + + num_irqs * U300_GPIO_PORTX_SPACING); + } + printk(KERN_INFO "GPIO: U300 gpio module loaded\n"); + + return 0; + + err_no_irq: + for (i = 0; i < num_irqs; i++) + free_irq(gpio_ports[i].irq, &gpio_ports[i]); + iounmap(virtbase); + err_no_ioremap: + release_mem_region(memres->start, memres->end - memres->start); + err_no_ioregion: + err_no_resource: + clk_disable(clk); + err_no_clk_enable: + clk_put(clk); + err_no_clk: + printk(KERN_INFO "GPIO: module ERROR:%d\n", err); + return err; +} + +static int __devexit gpio_remove(struct platform_device *pdev) +{ + int i; + + /* Turn off the GPIO block */ + writel(0x00000000U, virtbase + U300_GPIO_CR); + for (i = 0 ; i < U300_GPIO_NUM_PORTS; i++) + free_irq(gpio_ports[i].irq, &gpio_ports[i]); + iounmap(virtbase); + release_mem_region(memres->start, memres->end - memres->start); + clk_disable(clk); + clk_put(clk); + return 0; +} + +static struct platform_driver gpio_driver = { + .driver = { + .name = "u300-gpio", + }, + .probe = gpio_probe, + .remove = __devexit_p(gpio_remove), +}; + + +static int __init u300_gpio_init(void) +{ + return platform_driver_register(&gpio_driver); +} + +static void __exit u300_gpio_exit(void) +{ + platform_driver_unregister(&gpio_driver); +} + +arch_initcall(u300_gpio_init); +module_exit(u300_gpio_exit); + +MODULE_AUTHOR("Linus Walleij "); + +#ifdef U300_COH901571_3 +MODULE_DESCRIPTION("ST-Ericsson AB COH 901 571/3 GPIO driver"); +#endif + +#ifdef U300_COH901335 +MODULE_DESCRIPTION("ST-Ericsson AB COH 901 335 GPIO driver"); +#endif + +MODULE_LICENSE("GPL"); diff --git a/arch/arm/mach-u300/include/mach/gpio.h b/arch/arm/mach-u300/include/mach/gpio.h new file mode 100644 index 0000000..c817412 --- /dev/null +++ b/arch/arm/mach-u300/include/mach/gpio.h @@ -0,0 +1,290 @@ +/* + * + * arch/arm/mach-u300/include/mach/gpio.h + * + * + * Copyright (C) 2007-2009 ST-Ericsson AB + * License terms: GNU General Public License (GPL) version 2 + * GPIO block resgister definitions and inline macros for + * U300 GPIO COH 901 335 or COH 901 571/3 + * Author: Linus Walleij + */ + +#ifndef __MACH_U300_GPIO_H +#define __MACH_U300_GPIO_H + +#include +#include +#include +#include + +/* Switch type depending on platform/chip variant */ +#if defined(CONFIG_MACH_U300_BS2X) || defined(CONFIG_MACH_U300_BS330) +#define U300_COH901335 +#endif +#if defined(CONFIG_MACH_U300_BS365) || defined(CONFIG_MACH_U300_BS335) +#define U300_COH901571_3 +#endif + +/* Get base address for regs here */ +#include "u300-regs.h" +/* IRQ numbers */ +#include "irqs.h" + +/* + * This is the GPIO block definitions. GPIO (General Purpose I/O) can be + * used for anything, and often is. The event/enable etc figures are for + * the lowermost pin (pin 0 on each port), shift this left to match your + * pin if you're gonna use these values. + */ +#ifdef U300_COH901335 +#define U300_GPIO_PORTX_SPACING (0x1C) +/* Port X Pin Data Register 32bit, this is both input and output (R/W) */ +#define U300_GPIO_PXPDIR (0x00) +#define U300_GPIO_PXPDOR (0x00) +/* Port X Pin Config Register 32bit (R/W) */ +#define U300_GPIO_PXPCR (0x04) +#define U300_GPIO_PXPCR_ALL_PINS_MODE_MASK (0x0000FFFFUL) +#define U300_GPIO_PXPCR_PIN_MODE_MASK (0x00000003UL) +#define U300_GPIO_PXPCR_PIN_MODE_SHIFT (0x00000002UL) +#define U300_GPIO_PXPCR_PIN_MODE_INPUT (0x00000000UL) +#define U300_GPIO_PXPCR_PIN_MODE_OUTPUT_PUSH_PULL (0x00000001UL) +#define U300_GPIO_PXPCR_PIN_MODE_OUTPUT_OPEN_DRAIN (0x00000002UL) +#define U300_GPIO_PXPCR_PIN_MODE_OUTPUT_OPEN_SOURCE (0x00000003UL) +/* Port X Interrupt Event Register 32bit (R/W) */ +#define U300_GPIO_PXIEV (0x08) +#define U300_GPIO_PXIEV_ALL_IRQ_EVENT_MASK (0x000000FFUL) +#define U300_GPIO_PXIEV_IRQ_EVENT (0x00000001UL) +/* Port X Interrupt Enable Register 32bit (R/W) */ +#define U300_GPIO_PXIEN (0x0C) +#define U300_GPIO_PXIEN_ALL_IRQ_ENABLE_MASK (0x000000FFUL) +#define U300_GPIO_PXIEN_IRQ_ENABLE (0x00000001UL) +/* Port X Interrupt Force Register 32bit (R/W) */ +#define U300_GPIO_PXIFR (0x10) +#define U300_GPIO_PXIFR_ALL_IRQ_FORCE_MASK (0x000000FFUL) +#define U300_GPIO_PXIFR_IRQ_FORCE (0x00000001UL) +/* Port X Interrupt Config Register 32bit (R/W) */ +#define U300_GPIO_PXICR (0x14) +#define U300_GPIO_PXICR_ALL_IRQ_CONFIG_MASK (0x000000FFUL) +#define U300_GPIO_PXICR_IRQ_CONFIG_MASK (0x00000001UL) +#define U300_GPIO_PXICR_IRQ_CONFIG_FALLING_EDGE (0x00000000UL) +#define U300_GPIO_PXICR_IRQ_CONFIG_RISING_EDGE (0x00000001UL) +/* Port X Pull-up Enable Register 32bit (R/W) */ +#define U300_GPIO_PXPER (0x18) +#define U300_GPIO_PXPER_ALL_PULL_UP_DISABLE_MASK (0x000000FFUL) +#define U300_GPIO_PXPER_PULL_UP_DISABLE (0x00000001UL) +/* Control Register 32bit (R/W) */ +#define U300_GPIO_CR (0x54) +#define U300_GPIO_CR_BLOCK_CLOCK_ENABLE (0x00000001UL) +/* three ports of 8 bits each = GPIO pins 0..23 */ +#define U300_GPIO_NUM_PORTS 3 +#define U300_GPIO_PINS_PER_PORT 8 +#define U300_GPIO_MAX (U300_GPIO_PINS_PER_PORT * U300_GPIO_NUM_PORTS - 1) +#endif + +#ifdef U300_COH901571_3 +/* + * Control Register 32bit (R/W) + * bit 15-9 (mask 0x0000FE00) contains the number of cores. 8*cores + * gives the number of GPIO pins. + * bit 8-2 (mask 0x000001FC) contains the core version ID. + */ +#define U300_GPIO_CR (0x00) +#define U300_GPIO_CR_SYNC_SEL_ENABLE (0x00000002UL) +#define U300_GPIO_CR_BLOCK_CLKRQ_ENABLE (0x00000001UL) +#define U300_GPIO_PORTX_SPACING (0x30) +/* Port X Pin Data INPUT Register 32bit (R/W) */ +#define U300_GPIO_PXPDIR (0x04) +/* Port X Pin Data OUTPUT Register 32bit (R/W) */ +#define U300_GPIO_PXPDOR (0x08) +/* Port X Pin Config Register 32bit (R/W) */ +#define U300_GPIO_PXPCR (0x0C) +#define U300_GPIO_PXPCR_ALL_PINS_MODE_MASK (0x0000FFFFUL) +#define U300_GPIO_PXPCR_PIN_MODE_MASK (0x00000003UL) +#define U300_GPIO_PXPCR_PIN_MODE_SHIFT (0x00000002UL) +#define U300_GPIO_PXPCR_PIN_MODE_INPUT (0x00000000UL) +#define U300_GPIO_PXPCR_PIN_MODE_OUTPUT_PUSH_PULL (0x00000001UL) +#define U300_GPIO_PXPCR_PIN_MODE_OUTPUT_OPEN_DRAIN (0x00000002UL) +#define U300_GPIO_PXPCR_PIN_MODE_OUTPUT_OPEN_SOURCE (0x00000003UL) +/* Port X Pull-up Enable Register 32bit (R/W) */ +#define U300_GPIO_PXPER (0x10) +#define U300_GPIO_PXPER_ALL_PULL_UP_DISABLE_MASK (0x000000FFUL) +#define U300_GPIO_PXPER_PULL_UP_DISABLE (0x00000001UL) +/* Port X Interrupt Event Register 32bit (R/W) */ +#define U300_GPIO_PXIEV (0x14) +#define U300_GPIO_PXIEV_ALL_IRQ_EVENT_MASK (0x000000FFUL) +#define U300_GPIO_PXIEV_IRQ_EVENT (0x00000001UL) +/* Port X Interrupt Enable Register 32bit (R/W) */ +#define U300_GPIO_PXIEN (0x18) +#define U300_GPIO_PXIEN_ALL_IRQ_ENABLE_MASK (0x000000FFUL) +#define U300_GPIO_PXIEN_IRQ_ENABLE (0x00000001UL) +/* Port X Interrupt Force Register 32bit (R/W) */ +#define U300_GPIO_PXIFR (0x1C) +#define U300_GPIO_PXIFR_ALL_IRQ_FORCE_MASK (0x000000FFUL) +#define U300_GPIO_PXIFR_IRQ_FORCE (0x00000001UL) +/* Port X Interrupt Config Register 32bit (R/W) */ +#define U300_GPIO_PXICR (0x20) +#define U300_GPIO_PXICR_ALL_IRQ_CONFIG_MASK (0x000000FFUL) +#define U300_GPIO_PXICR_IRQ_CONFIG_MASK (0x00000001UL) +#define U300_GPIO_PXICR_IRQ_CONFIG_FALLING_EDGE (0x00000000UL) +#define U300_GPIO_PXICR_IRQ_CONFIG_RISING_EDGE (0x00000001UL) +#ifdef CONFIG_MACH_U300_BS335 +/* seven ports of 8 bits each = GPIO pins 0..55 */ +#define U300_GPIO_NUM_PORTS 7 +#else +/* five ports of 8 bits each = GPIO pins 0..39 */ +#define U300_GPIO_NUM_PORTS 5 +#endif +#define U300_GPIO_PINS_PER_PORT 8 +#define U300_GPIO_MAX (U300_GPIO_PINS_PER_PORT * U300_GPIO_NUM_PORTS - 1) +#endif + +/* + * Individual pin assignments for the B26/S26. Notice that the + * actual usage of these pins depends on the PAD MUX settings, that + * is why the same number can potentially appear several times. + * In the reference design each pin is only used for one purpose. + * These were determined by inspecting the B26/S26 schematic: + * 2/1911-ROA 128 1603 + */ +#ifdef CONFIG_MACH_U300_BS2X +#define U300_GPIO_PIN_UART_RX 0 +#define U300_GPIO_PIN_UART_TX 1 +#define U300_GPIO_PIN_GPIO02 2 /* Unrouted */ +#define U300_GPIO_PIN_GPIO03 3 /* Unrouted */ +#define U300_GPIO_PIN_CAM_SLEEP 4 +#define U300_GPIO_PIN_CAM_REG_EN 5 +#define U300_GPIO_PIN_GPIO06 6 /* Unrouted */ +#define U300_GPIO_PIN_GPIO07 7 /* Unrouted */ + +#define U300_GPIO_PIN_GPIO08 8 /* Service point SP2321 */ +#define U300_GPIO_PIN_GPIO09 9 /* Service point SP2322 */ +#define U300_GPIO_PIN_PHFSENSE 10 /* Headphone jack sensing */ +#define U300_GPIO_PIN_MMC_CLKRET 11 /* Clock return from MMC/SD card */ +#define U300_GPIO_PIN_MMC_CD 12 /* MMC Card insertion detection */ +#define U300_GPIO_PIN_FLIPSENSE 13 /* Mechanical flip sensing */ +#define U300_GPIO_PIN_GPIO14 14 /* DSP JTAG Port RTCK */ +#define U300_GPIO_PIN_GPIO15 15 /* Unrouted */ + +#define U300_GPIO_PIN_GPIO16 16 /* Unrouted */ +#define U300_GPIO_PIN_GPIO17 17 /* Unrouted */ +#define U300_GPIO_PIN_GPIO18 18 /* Unrouted */ +#define U300_GPIO_PIN_GPIO19 19 /* Unrouted */ +#define U300_GPIO_PIN_GPIO20 20 /* Unrouted */ +#define U300_GPIO_PIN_GPIO21 21 /* Unrouted */ +#define U300_GPIO_PIN_GPIO22 22 /* Unrouted */ +#define U300_GPIO_PIN_GPIO23 23 /* Unrouted */ +#endif + +/* + * Individual pin assignments for the B330/S330 and B365/S365. + * Notice that the actual usage of these pins depends on the + * PAD MUX settings, that is why the same number can potentially + * appear several times. In the reference design each pin is only + * used for one purpose. These were determined by inspecting the + * S365 schematic. + */ +#if defined(CONFIG_MACH_U300_BS330) || defined(CONFIG_MACH_U300_BS365) || \ + defined(CONFIG_MACH_U300_BS335) +#define U300_GPIO_PIN_UART_RX 0 +#define U300_GPIO_PIN_UART_TX 1 +#define U300_GPIO_PIN_UART_CTS 2 +#define U300_GPIO_PIN_UART_RTS 3 +#define U300_GPIO_PIN_CAM_MAIN_STANDBY 4 /* Camera MAIN standby */ +#define U300_GPIO_PIN_GPIO05 5 /* Unrouted */ +#define U300_GPIO_PIN_MS_CD 6 /* Memory Stick Card insertion */ +#define U300_GPIO_PIN_GPIO07 7 /* Test point TP2430 */ + +#define U300_GPIO_PIN_GPIO08 8 /* Test point TP2437 */ +#define U300_GPIO_PIN_GPIO09 9 /* Test point TP2431 */ +#define U300_GPIO_PIN_GPIO10 10 /* Test point TP2432 */ +#define U300_GPIO_PIN_MMC_CLKRET 11 /* Clock return from MMC/SD card */ +#define U300_GPIO_PIN_MMC_CD 12 /* MMC Card insertion detection */ +#define U300_GPIO_PIN_CAM_SUB_STANDBY 13 /* Camera SUB standby */ +#define U300_GPIO_PIN_GPIO14 14 /* Test point TP2436 */ +#define U300_GPIO_PIN_GPIO15 15 /* Unrouted */ + +#define U300_GPIO_PIN_GPIO16 16 /* Test point TP2438 */ +#define U300_GPIO_PIN_PHFSENSE 17 /* Headphone jack sensing */ +#define U300_GPIO_PIN_GPIO18 18 /* Test point TP2439 */ +#define U300_GPIO_PIN_GPIO19 19 /* Routed somewhere */ +#define U300_GPIO_PIN_GPIO20 20 /* Unrouted */ +#define U300_GPIO_PIN_GPIO21 21 /* Unrouted */ +#define U300_GPIO_PIN_GPIO22 22 /* Unrouted */ +#define U300_GPIO_PIN_GPIO23 23 /* Unrouted */ + +#define U300_GPIO_PIN_GPIO24 24 /* Unrouted */ +#define U300_GPIO_PIN_GPIO25 25 /* Unrouted */ +#define U300_GPIO_PIN_GPIO26 26 /* Unrouted */ +#define U300_GPIO_PIN_GPIO27 27 /* Unrouted */ +#define U300_GPIO_PIN_GPIO28 28 /* Unrouted */ +#define U300_GPIO_PIN_GPIO29 29 /* Unrouted */ +#define U300_GPIO_PIN_GPIO30 30 /* Unrouted */ +#define U300_GPIO_PIN_GPIO31 31 /* Unrouted */ + +#define U300_GPIO_PIN_GPIO32 32 /* Unrouted */ +#define U300_GPIO_PIN_GPIO33 33 /* Unrouted */ +#define U300_GPIO_PIN_GPIO34 34 /* Unrouted */ +#define U300_GPIO_PIN_GPIO35 35 /* Unrouted */ +#define U300_GPIO_PIN_GPIO36 36 /* Unrouted */ +#define U300_GPIO_PIN_GPIO37 37 /* Unrouted */ +#define U300_GPIO_PIN_GPIO38 38 /* Unrouted */ +#define U300_GPIO_PIN_GPIO39 39 /* Unrouted */ + +#ifdef CONFIG_MACH_U300_BS335 + +#define U300_GPIO_PIN_GPIO40 40 /* Unrouted */ +#define U300_GPIO_PIN_GPIO41 41 /* Unrouted */ +#define U300_GPIO_PIN_GPIO42 42 /* Unrouted */ +#define U300_GPIO_PIN_GPIO43 43 /* Unrouted */ +#define U300_GPIO_PIN_GPIO44 44 /* Unrouted */ +#define U300_GPIO_PIN_GPIO45 45 /* Unrouted */ +#define U300_GPIO_PIN_GPIO46 46 /* Unrouted */ +#define U300_GPIO_PIN_GPIO47 47 /* Unrouted */ + +#define U300_GPIO_PIN_GPIO48 48 /* Unrouted */ +#define U300_GPIO_PIN_GPIO49 49 /* Unrouted */ +#define U300_GPIO_PIN_GPIO50 50 /* Unrouted */ +#define U300_GPIO_PIN_GPIO51 51 /* Unrouted */ +#define U300_GPIO_PIN_GPIO52 52 /* Unrouted */ +#define U300_GPIO_PIN_GPIO53 53 /* Unrouted */ +#define U300_GPIO_PIN_GPIO54 54 /* Unrouted */ +#define U300_GPIO_PIN_GPIO55 55 /* Unrouted */ +#endif + +#endif + +/* translates a pin number to a port number */ +#define PIN_TO_PORT(val) (val >> 3) + +/* These can be found in arch/arm/mach-u300/gpio.c */ +extern int gpio_request(unsigned gpio, const char *label); +extern void gpio_free(unsigned gpio); +extern int gpio_direction_input(unsigned gpio); +extern int gpio_direction_output(unsigned gpio, int value); +extern int gpio_register_callback(unsigned gpio, + int (*func)(void *arg), + void *); +extern int gpio_unregister_callback(unsigned gpio); +extern void enable_irq_on_gpio_pin(unsigned gpio, int edge); +extern void disable_irq_on_gpio_pin(unsigned gpio); +extern void gpio_pullup(unsigned gpio, int value); +extern int gpio_get_value(unsigned gpio); +extern void gpio_set_value(unsigned gpio, int value); + +/* wrappers to sleep-enable the previous two functions */ +static inline unsigned gpio_to_irq(unsigned gpio) +{ + return PIN_TO_PORT(gpio) + IRQ_U300_GPIO_PORT0; +} + +static inline unsigned irq_to_gpio(unsigned irq) +{ + /* + * FIXME: This is no 1-1 mapping at all, it points to the + * whole block of 8 pins. + */ + return (irq - IRQ_U300_GPIO_PORT0) << 3; +} + +#endif diff --git a/arch/arm/mach-u300/padmux.c b/arch/arm/mach-u300/padmux.c new file mode 100644 index 0000000..f366456 --- /dev/null +++ b/arch/arm/mach-u300/padmux.c @@ -0,0 +1,58 @@ +/* + * + * arch/arm/mach-u300/padmux.c + * + * + * Copyright (C) 2009 ST-Ericsson AB + * License terms: GNU General Public License (GPL) version 2 + * U300 PADMUX functions + * Author: Linus Walleij + * + */ +#include +#include +#include +#include + +#include "padmux.h" + +/* Set the PAD MUX to route the MMC reader correctly to GPIO0. */ +void pmx_set_mission_mode_mmc(void) +{ + u16 val; + + val = readw(U300_SYSCON_VBASE + U300_SYSCON_PMC1LR); + val &= ~U300_SYSCON_PMC1LR_MMCSD_MASK; + writew(val, U300_SYSCON_VBASE + U300_SYSCON_PMC1LR); + val = readw(U300_SYSCON_VBASE + U300_SYSCON_PMC1HR); + val &= ~U300_SYSCON_PMC1HR_APP_GPIO_1_MASK; + val |= U300_SYSCON_PMC1HR_APP_GPIO_1_MMC; + writew(val, U300_SYSCON_VBASE + U300_SYSCON_PMC1HR); +} + +void pmx_set_mission_mode_spi(void) +{ + u16 val; + + /* Set up padmuxing so the SPI port and its chipselects are active */ + val = readw(U300_SYSCON_VBASE + U300_SYSCON_PMC1HR); + /* + * Activate the SPI port (disable the use of these pins for generic + * GPIO, DSP, AAIF + */ + val &= ~U300_SYSCON_PMC1HR_APP_SPI_2_MASK; + val |= U300_SYSCON_PMC1HR_APP_SPI_2_SPI; + /* + * Use GPIO pin SPI CS1 for CS1 actually (it can be used for other + * things also) + */ + val &= ~U300_SYSCON_PMC1HR_APP_SPI_CS_1_MASK; + val |= U300_SYSCON_PMC1HR_APP_SPI_CS_1_SPI; + /* + * Use GPIO pin SPI CS2 for CS2 actually (it can be used for other + * things also) + */ + val &= ~U300_SYSCON_PMC1HR_APP_SPI_CS_2_MASK; + val |= U300_SYSCON_PMC1HR_APP_SPI_CS_2_SPI; + writew(val, U300_SYSCON_VBASE + U300_SYSCON_PMC1HR); +} diff --git a/arch/arm/mach-u300/padmux.h b/arch/arm/mach-u300/padmux.h new file mode 100644 index 0000000..8c2099a --- /dev/null +++ b/arch/arm/mach-u300/padmux.h @@ -0,0 +1,19 @@ +/* + * + * arch/arm/mach-u300/padmux.h + * + * + * Copyright (C) 2009 ST-Ericsson AB + * License terms: GNU General Public License (GPL) version 2 + * U300 PADMUX API + * Author: Linus Walleij + * + */ + +#ifndef __MACH_U300_PADMUX_H +#define __MACH_U300_PADMUX_H + +void pmx_set_mission_mode_mmc(void); +void pmx_set_mission_mode_spi(void); + +#endif -- cgit v0.10.2 From cd27e485410aa7e7464b0126d968fe8c2a5c045b Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 23 Apr 2009 10:21:30 +0100 Subject: [ARM] 5474/1: U300 clocking framework This adds the clocking framework and hooks into the clkdevice for U300 series platforms. Signed-off-by: Linus Walleij Signed-off-by: Russell King diff --git a/arch/arm/mach-u300/clock.c b/arch/arm/mach-u300/clock.c new file mode 100644 index 0000000..d6f8232 --- /dev/null +++ b/arch/arm/mach-u300/clock.c @@ -0,0 +1,1487 @@ +/* + * + * arch/arm/mach-u300/clock.c + * + * + * Copyright (C) 2007-2009 ST-Ericsson AB + * License terms: GNU General Public License (GPL) version 2 + * Define clocks in the app platform. + * Author: Linus Walleij + * Author: Jonas Aaberg + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "clock.h" + +/* + * TODO: + * - move all handling of the CCR register into this file and create + * a spinlock for the CCR register + * - switch to the clkdevice lookup mechanism that maps clocks to + * device ID:s instead when it becomes available in kernel 2.6.29. + * - implement rate get/set for all clocks that need it. + */ + +/* + * Syscon clock I/O registers lock so clock requests don't collide + * NOTE: this is a local lock only used to lock access to clock and + * reset registers in syscon. + */ +static DEFINE_SPINLOCK(syscon_clkreg_lock); +static DEFINE_SPINLOCK(syscon_resetreg_lock); + +/* + * The clocking hierarchy currently looks like this. + * NOTE: the idea is NOT to show how the clocks are routed on the chip! + * The ideas is to show dependencies, so a clock higher up in the + * hierarchy has to be on in order for another clock to be on. Now, + * both CPU and DMA can actually be on top of the hierarchy, and that + * is not modeled currently. Instead we have the backbone AMBA bus on + * top. This bus cannot be programmed in any way but conceptually it + * needs to be active for the bridges and devices to transport data. + * + * Please be aware that a few clocks are hw controlled, which mean that + * the hw itself can turn on/off or change the rate of the clock when + * needed! + * + * AMBA bus + * | + * +- CPU + * +- NANDIF NAND Flash interface + * +- SEMI Shared Memory interface + * +- ISP Image Signal Processor (U335 only) + * +- CDS (U335 only) + * +- DMA Direct Memory Access Controller + * +- AAIF APP/ACC Inteface (Mobile Scalable Link, MSL) + * +- APEX + * +- VIDEO_ENC AVE2/3 Video Encoder + * +- XGAM Graphics Accelerator Controller + * +- AHB + * | + * +- ahb:0 AHB Bridge + * | | + * | +- ahb:1 INTCON Interrupt controller + * | +- ahb:3 MSPRO Memory Stick Pro controller + * | +- ahb:4 EMIF External Memory interface + * | + * +- fast:0 FAST bridge + * | | + * | +- fast:1 MMCSD MMC/SD card reader controller + * | +- fast:2 I2S0 PCM I2S channel 0 controller + * | +- fast:3 I2S1 PCM I2S channel 1 controller + * | +- fast:4 I2C0 I2C channel 0 controller + * | +- fast:5 I2C1 I2C channel 1 controller + * | +- fast:6 SPI SPI controller + * | +- fast:7 UART1 Secondary UART (U335 only) + * | + * +- slow:0 SLOW bridge + * | + * +- slow:1 SYSCON (not possible to control) + * +- slow:2 WDOG Watchdog + * +- slow:3 UART0 primary UART + * +- slow:4 TIMER_APP Application timer - used in Linux + * +- slow:5 KEYPAD controller + * +- slow:6 GPIO controller + * +- slow:7 RTC controller + * +- slow:8 BT Bus Tracer (not used currently) + * +- slow:9 EH Event Handler (not used currently) + * +- slow:a TIMER_ACC Access style timer (not used currently) + * +- slow:b PPM (U335 only, what is that?) + */ + +/* + * Reset control functions. We remember if a block has been + * taken out of reset and don't remove the reset assertion again + * and vice versa. Currently we only remove resets so the + * enablement function is defined out. + */ +static void syscon_block_reset_enable(struct clk *clk) +{ + u16 val; + unsigned long iflags; + + /* Not all blocks support resetting */ + if (!clk->res_reg || !clk->res_mask) + return; + spin_lock_irqsave(&syscon_resetreg_lock, iflags); + val = readw(clk->res_reg); + val |= clk->res_mask; + writew(val, clk->res_reg); + spin_unlock_irqrestore(&syscon_resetreg_lock, iflags); + clk->reset = true; +} + +static void syscon_block_reset_disable(struct clk *clk) +{ + u16 val; + unsigned long iflags; + + /* Not all blocks support resetting */ + if (!clk->res_reg || !clk->res_mask) + return; + spin_lock_irqsave(&syscon_resetreg_lock, iflags); + val = readw(clk->res_reg); + val &= ~clk->res_mask; + writew(val, clk->res_reg); + spin_unlock_irqrestore(&syscon_resetreg_lock, iflags); + clk->reset = false; +} + +int __clk_get(struct clk *clk) +{ + u16 val; + + /* The MMC and MSPRO clocks need some special set-up */ + if (!strcmp(clk->name, "MCLK")) { + /* Set default MMC clock divisor to 18.9 MHz */ + writew(0x0054U, U300_SYSCON_VBASE + U300_SYSCON_MMF0R); + val = readw(U300_SYSCON_VBASE + U300_SYSCON_MMCR); + /* Disable the MMC feedback clock */ + val &= ~U300_SYSCON_MMCR_MMC_FB_CLK_SEL_ENABLE; + /* Disable MSPRO frequency */ + val &= ~U300_SYSCON_MMCR_MSPRO_FREQSEL_ENABLE; + writew(val, U300_SYSCON_VBASE + U300_SYSCON_MMCR); + } + if (!strcmp(clk->name, "MSPRO")) { + val = readw(U300_SYSCON_VBASE + U300_SYSCON_MMCR); + /* Disable the MMC feedback clock */ + val &= ~U300_SYSCON_MMCR_MMC_FB_CLK_SEL_ENABLE; + /* Enable MSPRO frequency */ + val |= U300_SYSCON_MMCR_MSPRO_FREQSEL_ENABLE; + writew(val, U300_SYSCON_VBASE + U300_SYSCON_MMCR); + } + return 1; +} +EXPORT_SYMBOL(__clk_get); + +void __clk_put(struct clk *clk) +{ +} +EXPORT_SYMBOL(__clk_put); + +static void syscon_clk_disable(struct clk *clk) +{ + unsigned long iflags; + + /* Don't touch the hardware controlled clocks */ + if (clk->hw_ctrld) + return; + + spin_lock_irqsave(&syscon_clkreg_lock, iflags); + writew(clk->clk_val, U300_SYSCON_VBASE + U300_SYSCON_SBCDR); + spin_unlock_irqrestore(&syscon_clkreg_lock, iflags); +} + +static void syscon_clk_enable(struct clk *clk) +{ + unsigned long iflags; + + /* Don't touch the hardware controlled clocks */ + if (clk->hw_ctrld) + return; + + spin_lock_irqsave(&syscon_clkreg_lock, iflags); + writew(clk->clk_val, U300_SYSCON_VBASE + U300_SYSCON_SBCER); + spin_unlock_irqrestore(&syscon_clkreg_lock, iflags); +} + +static u16 syscon_clk_get_rate(void) +{ + u16 val; + unsigned long iflags; + + spin_lock_irqsave(&syscon_clkreg_lock, iflags); + val = readw(U300_SYSCON_VBASE + U300_SYSCON_CCR); + val &= U300_SYSCON_CCR_CLKING_PERFORMANCE_MASK; + spin_unlock_irqrestore(&syscon_clkreg_lock, iflags); + return val; +} + +#ifdef CONFIG_MACH_U300_USE_I2S_AS_MASTER +static void enable_i2s0_vcxo(void) +{ + u16 val; + unsigned long iflags; + + spin_lock_irqsave(&syscon_clkreg_lock, iflags); + /* Set I2S0 to use the VCXO 26 MHz clock */ + val = readw(U300_SYSCON_VBASE + U300_SYSCON_CCR); + val |= U300_SYSCON_CCR_TURN_VCXO_ON; + writew(val, U300_SYSCON_VBASE + U300_SYSCON_CCR); + val |= U300_SYSCON_CCR_I2S0_USE_VCXO; + writew(val, U300_SYSCON_VBASE + U300_SYSCON_CCR); + val = readw(U300_SYSCON_VBASE + U300_SYSCON_CEFR); + val |= U300_SYSCON_CEFR_I2S0_CLK_EN; + writew(val, U300_SYSCON_VBASE + U300_SYSCON_CEFR); + spin_unlock_irqrestore(&syscon_clkreg_lock, iflags); +} + +static void enable_i2s1_vcxo(void) +{ + u16 val; + unsigned long iflags; + + spin_lock_irqsave(&syscon_clkreg_lock, iflags); + /* Set I2S1 to use the VCXO 26 MHz clock */ + val = readw(U300_SYSCON_VBASE + U300_SYSCON_CCR); + val |= U300_SYSCON_CCR_TURN_VCXO_ON; + writew(val, U300_SYSCON_VBASE + U300_SYSCON_CCR); + val |= U300_SYSCON_CCR_I2S1_USE_VCXO; + writew(val, U300_SYSCON_VBASE + U300_SYSCON_CCR); + val = readw(U300_SYSCON_VBASE + U300_SYSCON_CEFR); + val |= U300_SYSCON_CEFR_I2S1_CLK_EN; + writew(val, U300_SYSCON_VBASE + U300_SYSCON_CEFR); + spin_unlock_irqrestore(&syscon_clkreg_lock, iflags); +} + +static void disable_i2s0_vcxo(void) +{ + u16 val; + unsigned long iflags; + + spin_lock_irqsave(&syscon_clkreg_lock, iflags); + /* Disable I2S0 use of the VCXO 26 MHz clock */ + val = readw(U300_SYSCON_VBASE + U300_SYSCON_CCR); + val &= ~U300_SYSCON_CCR_I2S0_USE_VCXO; + writew(val, U300_SYSCON_VBASE + U300_SYSCON_CCR); + /* Deactivate VCXO if noone else is using VCXO */ + if (!(val & U300_SYSCON_CCR_I2S1_USE_VCXO)) + val &= ~U300_SYSCON_CCR_TURN_VCXO_ON; + writew(val, U300_SYSCON_VBASE + U300_SYSCON_CCR); + val = readw(U300_SYSCON_VBASE + U300_SYSCON_CEFR); + val &= ~U300_SYSCON_CEFR_I2S0_CLK_EN; + writew(val, U300_SYSCON_VBASE + U300_SYSCON_CEFR); + spin_unlock_irqrestore(&syscon_clkreg_lock, iflags); +} + +static void disable_i2s1_vcxo(void) +{ + u16 val; + unsigned long iflags; + + spin_lock_irqsave(&syscon_clkreg_lock, iflags); + /* Disable I2S1 use of the VCXO 26 MHz clock */ + val = readw(U300_SYSCON_VBASE + U300_SYSCON_CCR); + val &= ~U300_SYSCON_CCR_I2S1_USE_VCXO; + writew(val, U300_SYSCON_VBASE + U300_SYSCON_CCR); + /* Deactivate VCXO if noone else is using VCXO */ + if (!(val & U300_SYSCON_CCR_I2S0_USE_VCXO)) + val &= ~U300_SYSCON_CCR_TURN_VCXO_ON; + writew(val, U300_SYSCON_VBASE + U300_SYSCON_CCR); + val = readw(U300_SYSCON_VBASE + U300_SYSCON_CEFR); + val &= ~U300_SYSCON_CEFR_I2S0_CLK_EN; + writew(val, U300_SYSCON_VBASE + U300_SYSCON_CEFR); + spin_unlock_irqrestore(&syscon_clkreg_lock, iflags); +} +#endif /* CONFIG_MACH_U300_USE_I2S_AS_MASTER */ + + +static void syscon_clk_rate_set_mclk(unsigned long rate) +{ + u16 val; + u32 reg; + unsigned long iflags; + + switch (rate) { + case 18900000: + val = 0x0054; + break; + case 20800000: + val = 0x0044; + break; + case 23100000: + val = 0x0043; + break; + case 26000000: + val = 0x0033; + break; + case 29700000: + val = 0x0032; + break; + case 34700000: + val = 0x0022; + break; + case 41600000: + val = 0x0021; + break; + case 52000000: + val = 0x0011; + break; + case 104000000: + val = 0x0000; + break; + default: + printk(KERN_ERR "Trying to set MCLK to unknown speed! %ld\n", + rate); + return; + } + + spin_lock_irqsave(&syscon_clkreg_lock, iflags); + reg = readw(U300_SYSCON_VBASE + U300_SYSCON_MMF0R) & + ~U300_SYSCON_MMF0R_MASK; + writew(reg | val, U300_SYSCON_VBASE + U300_SYSCON_MMF0R); + spin_unlock_irqrestore(&syscon_clkreg_lock, iflags); +} + +void syscon_clk_rate_set_cpuclk(unsigned long rate) +{ + u16 val; + unsigned long iflags; + + switch (rate) { + case 13000000: + val = U300_SYSCON_CCR_CLKING_PERFORMANCE_LOW_POWER; + break; + case 52000000: + val = U300_SYSCON_CCR_CLKING_PERFORMANCE_INTERMEDIATE; + break; + case 104000000: + val = U300_SYSCON_CCR_CLKING_PERFORMANCE_HIGH; + break; + case 208000000: + val = U300_SYSCON_CCR_CLKING_PERFORMANCE_BEST; + break; + default: + return; + } + spin_lock_irqsave(&syscon_clkreg_lock, iflags); + val |= readw(U300_SYSCON_VBASE + U300_SYSCON_CCR) & + ~U300_SYSCON_CCR_CLKING_PERFORMANCE_MASK ; + writew(val, U300_SYSCON_VBASE + U300_SYSCON_CCR); + spin_unlock_irqrestore(&syscon_clkreg_lock, iflags); +} +EXPORT_SYMBOL(syscon_clk_rate_set_cpuclk); + +void clk_disable(struct clk *clk) +{ + unsigned long iflags; + + spin_lock_irqsave(&clk->lock, iflags); + if (clk->usecount > 0 && !(--clk->usecount)) { + /* some blocks lack clocking registers and cannot be disabled */ + if (clk->disable) + clk->disable(clk); + if (likely((u32)clk->parent)) + clk_disable(clk->parent); + } +#ifdef CONFIG_MACH_U300_USE_I2S_AS_MASTER + if (unlikely(!strcmp(clk->name, "I2S0"))) + disable_i2s0_vcxo(); + if (unlikely(!strcmp(clk->name, "I2S1"))) + disable_i2s1_vcxo(); +#endif + spin_unlock_irqrestore(&clk->lock, iflags); +} +EXPORT_SYMBOL(clk_disable); + +int clk_enable(struct clk *clk) +{ + int ret = 0; + unsigned long iflags; + + spin_lock_irqsave(&clk->lock, iflags); + if (clk->usecount++ == 0) { + if (likely((u32)clk->parent)) + ret = clk_enable(clk->parent); + + if (unlikely(ret != 0)) + clk->usecount--; + else { + /* remove reset line (we never enable reset again) */ + syscon_block_reset_disable(clk); + /* clocks without enable function are always on */ + if (clk->enable) + clk->enable(clk); +#ifdef CONFIG_MACH_U300_USE_I2S_AS_MASTER + if (unlikely(!strcmp(clk->name, "I2S0"))) + enable_i2s0_vcxo(); + if (unlikely(!strcmp(clk->name, "I2S1"))) + enable_i2s1_vcxo(); +#endif + } + } + spin_unlock_irqrestore(&clk->lock, iflags); + return ret; + +} +EXPORT_SYMBOL(clk_enable); + +/* Returns the clock rate in Hz */ +static unsigned long clk_get_rate_cpuclk(struct clk *clk) +{ + u16 val; + + val = syscon_clk_get_rate(); + + switch (val) { + case U300_SYSCON_CCR_CLKING_PERFORMANCE_LOW_POWER: + case U300_SYSCON_CCR_CLKING_PERFORMANCE_LOW: + return 13000000; + case U300_SYSCON_CCR_CLKING_PERFORMANCE_INTERMEDIATE: + return 52000000; + case U300_SYSCON_CCR_CLKING_PERFORMANCE_HIGH: + return 104000000; + case U300_SYSCON_CCR_CLKING_PERFORMANCE_BEST: + return 208000000; + default: + break; + } + return clk->rate; +} + +static unsigned long clk_get_rate_ahb_clk(struct clk *clk) +{ + u16 val; + + val = syscon_clk_get_rate(); + + switch (val) { + case U300_SYSCON_CCR_CLKING_PERFORMANCE_LOW_POWER: + case U300_SYSCON_CCR_CLKING_PERFORMANCE_LOW: + return 6500000; + case U300_SYSCON_CCR_CLKING_PERFORMANCE_INTERMEDIATE: + return 26000000; + case U300_SYSCON_CCR_CLKING_PERFORMANCE_HIGH: + case U300_SYSCON_CCR_CLKING_PERFORMANCE_BEST: + return 52000000; + default: + break; + } + return clk->rate; + +} + +static unsigned long clk_get_rate_emif_clk(struct clk *clk) +{ + u16 val; + + val = syscon_clk_get_rate(); + + switch (val) { + case U300_SYSCON_CCR_CLKING_PERFORMANCE_LOW_POWER: + case U300_SYSCON_CCR_CLKING_PERFORMANCE_LOW: + return 13000000; + case U300_SYSCON_CCR_CLKING_PERFORMANCE_INTERMEDIATE: + return 52000000; + case U300_SYSCON_CCR_CLKING_PERFORMANCE_HIGH: + case U300_SYSCON_CCR_CLKING_PERFORMANCE_BEST: + return 104000000; + default: + break; + } + return clk->rate; + +} + +static unsigned long clk_get_rate_xgamclk(struct clk *clk) +{ + u16 val; + + val = syscon_clk_get_rate(); + + switch (val) { + case U300_SYSCON_CCR_CLKING_PERFORMANCE_LOW_POWER: + case U300_SYSCON_CCR_CLKING_PERFORMANCE_LOW: + return 6500000; + case U300_SYSCON_CCR_CLKING_PERFORMANCE_INTERMEDIATE: + return 26000000; + case U300_SYSCON_CCR_CLKING_PERFORMANCE_HIGH: + case U300_SYSCON_CCR_CLKING_PERFORMANCE_BEST: + return 52000000; + default: + break; + } + + return clk->rate; +} + +static unsigned long clk_get_rate_mclk(struct clk *clk) +{ + u16 val; + + val = syscon_clk_get_rate(); + + switch (val) { + case U300_SYSCON_CCR_CLKING_PERFORMANCE_LOW_POWER: + /* + * Here, the 208 MHz PLL gets shut down and the always + * on 13 MHz PLL used for RTC etc kicks into use + * instead. + */ + return 13000000; + case U300_SYSCON_CCR_CLKING_PERFORMANCE_LOW: + case U300_SYSCON_CCR_CLKING_PERFORMANCE_INTERMEDIATE: + case U300_SYSCON_CCR_CLKING_PERFORMANCE_HIGH: + case U300_SYSCON_CCR_CLKING_PERFORMANCE_BEST: + { + /* + * This clock is under program control. The register is + * divided in two nybbles, bit 7-4 gives cycles-1 to count + * high, bit 3-0 gives cycles-1 to count low. Distribute + * these with no more than 1 cycle difference between + * low and high and add low and high to get the actual + * divisor. The base PLL is 208 MHz. Writing 0x00 will + * divide by 1 and 1 so the highest frequency possible + * is 104 MHz. + * + * e.g. 0x54 => + * f = 208 / ((5+1) + (4+1)) = 208 / 11 = 18.9 MHz + */ + u16 val = readw(U300_SYSCON_VBASE + U300_SYSCON_MMF0R) & + U300_SYSCON_MMF0R_MASK; + switch (val) { + case 0x0054: + return 18900000; + case 0x0044: + return 20800000; + case 0x0043: + return 23100000; + case 0x0033: + return 26000000; + case 0x0032: + return 29700000; + case 0x0022: + return 34700000; + case 0x0021: + return 41600000; + case 0x0011: + return 52000000; + case 0x0000: + return 104000000; + default: + break; + } + } + default: + break; + } + + return clk->rate; +} + +static unsigned long clk_get_rate_i2s_i2c_spi(struct clk *clk) +{ + u16 val; + + val = syscon_clk_get_rate(); + + switch (val) { + case U300_SYSCON_CCR_CLKING_PERFORMANCE_LOW_POWER: + case U300_SYSCON_CCR_CLKING_PERFORMANCE_LOW: + return 13000000; + case U300_SYSCON_CCR_CLKING_PERFORMANCE_INTERMEDIATE: + case U300_SYSCON_CCR_CLKING_PERFORMANCE_HIGH: + case U300_SYSCON_CCR_CLKING_PERFORMANCE_BEST: + return 26000000; + default: + break; + } + + return clk->rate; +} + +unsigned long clk_get_rate(struct clk *clk) +{ + if (clk->get_rate) + return clk->get_rate(clk); + else + return clk->rate; +} +EXPORT_SYMBOL(clk_get_rate); + +static unsigned long clk_round_rate_mclk(struct clk *clk, unsigned long rate) +{ + if (rate >= 18900000) + return 18900000; + if (rate >= 20800000) + return 20800000; + if (rate >= 23100000) + return 23100000; + if (rate >= 26000000) + return 26000000; + if (rate >= 29700000) + return 29700000; + if (rate >= 34700000) + return 34700000; + if (rate >= 41600000) + return 41600000; + if (rate >= 52000000) + return 52000000; + return -EINVAL; +} + +static unsigned long clk_round_rate_cpuclk(struct clk *clk, unsigned long rate) +{ + if (rate >= 13000000) + return 13000000; + if (rate >= 52000000) + return 52000000; + if (rate >= 104000000) + return 104000000; + if (rate >= 208000000) + return 208000000; + return -EINVAL; +} + +/* + * This adjusts a requested rate to the closest exact rate + * a certain clock can provide. For a fixed clock it's + * mostly clk->rate. + */ +long clk_round_rate(struct clk *clk, unsigned long rate) +{ + /* TODO: get apropriate switches for EMIFCLK, AHBCLK and MCLK */ + /* Else default to fixed value */ + + if (clk->round_rate) { + return (long) clk->round_rate(clk, rate); + } else { + printk(KERN_ERR "clock: Failed to round rate of %s\n", + clk->name); + } + return (long) clk->rate; +} +EXPORT_SYMBOL(clk_round_rate); + +static int clk_set_rate_mclk(struct clk *clk, unsigned long rate) +{ + syscon_clk_rate_set_mclk(clk_round_rate(clk, rate)); + return 0; +} + +static int clk_set_rate_cpuclk(struct clk *clk, unsigned long rate) +{ + syscon_clk_rate_set_cpuclk(clk_round_rate(clk, rate)); + return 0; +} + +int clk_set_rate(struct clk *clk, unsigned long rate) +{ + /* TODO: set for EMIFCLK and AHBCLK */ + /* Else assume the clock is fixed and fail */ + if (clk->set_rate) { + return clk->set_rate(clk, rate); + } else { + printk(KERN_ERR "clock: Failed to set %s to %ld hz\n", + clk->name, rate); + return -1; + } +} +EXPORT_SYMBOL(clk_set_rate); + +/* + * Clock definitions. The clock parents are set to respective + * bridge and the clock framework makes sure that the clocks have + * parents activated and are brought out of reset when in use. + * + * Clocks that have hw_ctrld = true are hw controlled, and the hw + * can by itself turn these clocks on and off. + * So in other words, we don't really have to care about them. + */ + +static struct clk amba_clk = { + .name = "AMBA", + .rate = 52000000, /* this varies! */ + .hw_ctrld = true, + .reset = false, +}; + +/* + * These blocks are connected directly to the AMBA bus + * with no bridge. + */ + +static struct clk cpu_clk = { + .name = "CPU", + .parent = &amba_clk, + .rate = 208000000, /* this varies! */ + .hw_ctrld = true, + .reset = true, + .res_reg = U300_SYSCON_VBASE + U300_SYSCON_RRR, + .res_mask = U300_SYSCON_RRR_CPU_RESET_EN, + .set_rate = clk_set_rate_cpuclk, + .get_rate = clk_get_rate_cpuclk, + .round_rate = clk_round_rate_cpuclk, +}; + +static struct clk nandif_clk = { + .name = "NANDIF", + .parent = &amba_clk, + .hw_ctrld = false, + .reset = true, + .res_reg = U300_SYSCON_VBASE + U300_SYSCON_RRR, + .res_mask = U300_SYSCON_RRR_NANDIF_RESET_EN, + .clk_val = U300_SYSCON_SBCER_NANDIF_CLK_EN, + .enable = syscon_clk_enable, + .disable = syscon_clk_disable, +}; + +static struct clk semi_clk = { + .name = "SEMI", + .parent = &amba_clk, + .rate = 0, /* FIXME */ + /* It is not possible to reset SEMI */ + .hw_ctrld = false, + .reset = false, + .clk_val = U300_SYSCON_SBCER_SEMI_CLK_EN, + .enable = syscon_clk_enable, + .disable = syscon_clk_disable, +}; + +#ifdef CONFIG_MACH_U300_BS335 +static struct clk isp_clk = { + .name = "ISP", + .parent = &amba_clk, + .rate = 0, /* FIXME */ + .hw_ctrld = false, + .reset = true, + .res_reg = U300_SYSCON_VBASE + U300_SYSCON_RRR, + .res_mask = U300_SYSCON_RRR_ISP_RESET_EN, + .clk_val = U300_SYSCON_SBCER_ISP_CLK_EN, + .enable = syscon_clk_enable, + .disable = syscon_clk_disable, +}; + +static struct clk cds_clk = { + .name = "CDS", + .parent = &amba_clk, + .rate = 0, /* FIXME */ + .hw_ctrld = false, + .reset = true, + .res_reg = U300_SYSCON_VBASE + U300_SYSCON_RRR, + .res_mask = U300_SYSCON_RRR_CDS_RESET_EN, + .clk_val = U300_SYSCON_SBCER_CDS_CLK_EN, + .enable = syscon_clk_enable, + .disable = syscon_clk_disable, +}; +#endif + +static struct clk dma_clk = { + .name = "DMA", + .parent = &amba_clk, + .rate = 52000000, /* this varies! */ + .hw_ctrld = true, + .reset = true, + .res_reg = U300_SYSCON_VBASE + U300_SYSCON_RRR, + .res_mask = U300_SYSCON_RRR_DMAC_RESET_EN, + .clk_val = U300_SYSCON_SBCER_DMAC_CLK_EN, + .enable = syscon_clk_enable, + .disable = syscon_clk_disable, +}; + +static struct clk aaif_clk = { + .name = "AAIF", + .parent = &amba_clk, + .rate = 52000000, /* this varies! */ + .hw_ctrld = true, + .reset = true, + .res_reg = U300_SYSCON_VBASE + U300_SYSCON_RRR, + .res_mask = U300_SYSCON_RRR_AAIF_RESET_EN, + .clk_val = U300_SYSCON_SBCER_AAIF_CLK_EN, + .enable = syscon_clk_enable, + .disable = syscon_clk_disable, +}; + +static struct clk apex_clk = { + .name = "APEX", + .parent = &amba_clk, + .rate = 0, /* FIXME */ + .hw_ctrld = true, + .reset = true, + .res_reg = U300_SYSCON_VBASE + U300_SYSCON_RRR, + .res_mask = U300_SYSCON_RRR_APEX_RESET_EN, + .clk_val = U300_SYSCON_SBCER_APEX_CLK_EN, + .enable = syscon_clk_enable, + .disable = syscon_clk_disable, +}; + +static struct clk video_enc_clk = { + .name = "VIDEO_ENC", + .parent = &amba_clk, + .rate = 208000000, /* this varies! */ + .hw_ctrld = false, + .reset = false, + .res_reg = U300_SYSCON_VBASE + U300_SYSCON_RRR, + /* This has XGAM in the name but refers to the video encoder */ + .res_mask = U300_SYSCON_RRR_XGAM_VC_SYNC_RESET_EN, + .clk_val = U300_SYSCON_SBCER_VIDEO_ENC_CLK_EN, + .enable = syscon_clk_enable, + .disable = syscon_clk_disable, +}; + +static struct clk xgam_clk = { + .name = "XGAMCLK", + .parent = &amba_clk, + .rate = 52000000, /* this varies! */ + .hw_ctrld = false, + .reset = true, + .res_reg = U300_SYSCON_VBASE + U300_SYSCON_RRR, + .res_mask = U300_SYSCON_RRR_XGAM_RESET_EN, + .clk_val = U300_SYSCON_SBCER_XGAM_CLK_EN, + .get_rate = clk_get_rate_xgamclk, + .enable = syscon_clk_enable, + .disable = syscon_clk_disable, +}; + +/* This clock is used to activate the video encoder */ +static struct clk ahb_clk = { + .name = "AHB", + .parent = &amba_clk, + .rate = 52000000, /* this varies! */ + .hw_ctrld = false, /* This one is set to false due to HW bug */ + .reset = true, + .res_reg = U300_SYSCON_VBASE + U300_SYSCON_RRR, + .res_mask = U300_SYSCON_RRR_AHB_RESET_EN, + .clk_val = U300_SYSCON_SBCER_AHB_CLK_EN, + .enable = syscon_clk_enable, + .disable = syscon_clk_disable, + .get_rate = clk_get_rate_ahb_clk, +}; + + +/* + * Clocks on the AHB bridge + */ + +static struct clk ahb_subsys_clk = { + .name = "AHB_SUBSYS", + .parent = &amba_clk, + .rate = 52000000, /* this varies! */ + .hw_ctrld = true, + .reset = false, + .clk_val = U300_SYSCON_SBCER_AHB_SUBSYS_BRIDGE_CLK_EN, + .enable = syscon_clk_enable, + .disable = syscon_clk_disable, + .get_rate = clk_get_rate_ahb_clk, +}; + +static struct clk intcon_clk = { + .name = "INTCON", + .parent = &ahb_subsys_clk, + .rate = 52000000, /* this varies! */ + .hw_ctrld = false, + .reset = true, + .res_reg = U300_SYSCON_VBASE + U300_SYSCON_RRR, + .res_mask = U300_SYSCON_RRR_INTCON_RESET_EN, + /* INTCON can be reset but not clock-gated */ +}; + +static struct clk mspro_clk = { + .name = "MSPRO", + .parent = &ahb_subsys_clk, + .rate = 0, /* FIXME */ + .hw_ctrld = false, + .reset = true, + .res_reg = U300_SYSCON_VBASE + U300_SYSCON_RRR, + .res_mask = U300_SYSCON_RRR_MSPRO_RESET_EN, + .clk_val = U300_SYSCON_SBCER_MSPRO_CLK_EN, + .enable = syscon_clk_enable, + .disable = syscon_clk_disable, +}; + +static struct clk emif_clk = { + .name = "EMIF", + .parent = &ahb_subsys_clk, + .rate = 104000000, /* this varies! */ + .hw_ctrld = false, + .reset = true, + .res_reg = U300_SYSCON_VBASE + U300_SYSCON_RRR, + .res_mask = U300_SYSCON_RRR_EMIF_RESET_EN, + .clk_val = U300_SYSCON_SBCER_EMIF_CLK_EN, + .enable = syscon_clk_enable, + .disable = syscon_clk_disable, + .get_rate = clk_get_rate_emif_clk, +}; + + +/* + * Clocks on the FAST bridge + */ +static struct clk fast_clk = { + .name = "FAST_BRIDGE", + .parent = &amba_clk, + .rate = 13000000, /* this varies! */ + .hw_ctrld = true, + .reset = true, + .res_reg = U300_SYSCON_VBASE + U300_SYSCON_RFR, + .res_mask = U300_SYSCON_RFR_FAST_BRIDGE_RESET_ENABLE, + .clk_val = U300_SYSCON_SBCER_FAST_BRIDGE_CLK_EN, + .enable = syscon_clk_enable, + .disable = syscon_clk_disable, +}; + +static struct clk mmcsd_clk = { + .name = "MCLK", + .parent = &fast_clk, + .rate = 18900000, /* this varies! */ + .hw_ctrld = false, + .reset = true, + .res_reg = U300_SYSCON_VBASE + U300_SYSCON_RFR, + .res_mask = U300_SYSCON_RFR_MMC_RESET_ENABLE, + .clk_val = U300_SYSCON_SBCER_MMC_CLK_EN, + .get_rate = clk_get_rate_mclk, + .set_rate = clk_set_rate_mclk, + .round_rate = clk_round_rate_mclk, + .disable = syscon_clk_disable, + .enable = syscon_clk_enable, +}; + +static struct clk i2s0_clk = { + .name = "i2s0", + .parent = &fast_clk, + .rate = 26000000, /* this varies! */ + .hw_ctrld = true, + .reset = true, + .res_reg = U300_SYSCON_VBASE + U300_SYSCON_RFR, + .res_mask = U300_SYSCON_RFR_PCM_I2S0_RESET_ENABLE, + .clk_val = U300_SYSCON_SBCER_I2S0_CORE_CLK_EN, + .enable = syscon_clk_enable, + .disable = syscon_clk_disable, + .get_rate = clk_get_rate_i2s_i2c_spi, +}; + +static struct clk i2s1_clk = { + .name = "i2s1", + .parent = &fast_clk, + .rate = 26000000, /* this varies! */ + .hw_ctrld = true, + .reset = true, + .res_reg = U300_SYSCON_VBASE + U300_SYSCON_RFR, + .res_mask = U300_SYSCON_RFR_PCM_I2S1_RESET_ENABLE, + .clk_val = U300_SYSCON_SBCER_I2S1_CORE_CLK_EN, + .enable = syscon_clk_enable, + .disable = syscon_clk_disable, + .get_rate = clk_get_rate_i2s_i2c_spi, +}; + +static struct clk i2c0_clk = { + .name = "I2C0", + .parent = &fast_clk, + .rate = 26000000, /* this varies! */ + .hw_ctrld = false, + .reset = true, + .res_reg = U300_SYSCON_VBASE + U300_SYSCON_RFR, + .res_mask = U300_SYSCON_RFR_I2C0_RESET_ENABLE, + .clk_val = U300_SYSCON_SBCER_I2C0_CLK_EN, + .enable = syscon_clk_enable, + .disable = syscon_clk_disable, + .get_rate = clk_get_rate_i2s_i2c_spi, +}; + +static struct clk i2c1_clk = { + .name = "I2C1", + .parent = &fast_clk, + .rate = 26000000, /* this varies! */ + .hw_ctrld = false, + .reset = true, + .res_reg = U300_SYSCON_VBASE + U300_SYSCON_RFR, + .res_mask = U300_SYSCON_RFR_I2C1_RESET_ENABLE, + .clk_val = U300_SYSCON_SBCER_I2C1_CLK_EN, + .enable = syscon_clk_enable, + .disable = syscon_clk_disable, + .get_rate = clk_get_rate_i2s_i2c_spi, +}; + +static struct clk spi_clk = { + .name = "SPI", + .parent = &fast_clk, + .rate = 26000000, /* this varies! */ + .hw_ctrld = false, + .reset = true, + .res_reg = U300_SYSCON_VBASE + U300_SYSCON_RFR, + .res_mask = U300_SYSCON_RFR_SPI_RESET_ENABLE, + .clk_val = U300_SYSCON_SBCER_SPI_CLK_EN, + .enable = syscon_clk_enable, + .disable = syscon_clk_disable, + .get_rate = clk_get_rate_i2s_i2c_spi, +}; + +#ifdef CONFIG_MACH_U300_BS335 +static struct clk uart1_clk = { + .name = "UART1", + .parent = &fast_clk, + .rate = 13000000, + .hw_ctrld = false, + .reset = true, + .res_reg = U300_SYSCON_VBASE + U300_SYSCON_RFR, + .res_mask = U300_SYSCON_RFR_UART1_RESET_ENABLE, + .clk_val = U300_SYSCON_SBCER_UART1_CLK_EN, + .enable = syscon_clk_enable, + .disable = syscon_clk_disable, +}; +#endif + + +/* + * Clocks on the SLOW bridge + */ +static struct clk slow_clk = { + .name = "SLOW_BRIDGE", + .parent = &amba_clk, + .rate = 13000000, + .hw_ctrld = true, + .reset = true, + .res_reg = U300_SYSCON_VBASE + U300_SYSCON_RSR, + .res_mask = U300_SYSCON_RSR_SLOW_BRIDGE_RESET_EN, + .clk_val = U300_SYSCON_SBCER_SLOW_BRIDGE_CLK_EN, + .enable = syscon_clk_enable, + .disable = syscon_clk_disable, +}; + +/* TODO: implement SYSCON clock? */ + +static struct clk wdog_clk = { + .name = "WDOG", + .parent = &slow_clk, + .hw_ctrld = false, + .rate = 32768, + .reset = false, + /* This is always on, cannot be enabled/disabled or reset */ +}; + +/* This one is hardwired to PLL13 */ +static struct clk uart_clk = { + .name = "UARTCLK", + .parent = &slow_clk, + .rate = 13000000, + .hw_ctrld = false, + .reset = true, + .res_reg = U300_SYSCON_VBASE + U300_SYSCON_RSR, + .res_mask = U300_SYSCON_RSR_UART_RESET_EN, + .clk_val = U300_SYSCON_SBCER_UART_CLK_EN, + .enable = syscon_clk_enable, + .disable = syscon_clk_disable, +}; + +static struct clk keypad_clk = { + .name = "KEYPAD", + .parent = &slow_clk, + .rate = 32768, + .hw_ctrld = false, + .reset = true, + .res_reg = U300_SYSCON_VBASE + U300_SYSCON_RSR, + .res_mask = U300_SYSCON_RSR_KEYPAD_RESET_EN, + .clk_val = U300_SYSCON_SBCER_KEYPAD_CLK_EN, + .enable = syscon_clk_enable, + .disable = syscon_clk_disable, +}; + +static struct clk gpio_clk = { + .name = "GPIO", + .parent = &slow_clk, + .rate = 13000000, + .hw_ctrld = true, + .reset = true, + .res_reg = U300_SYSCON_VBASE + U300_SYSCON_RSR, + .res_mask = U300_SYSCON_RSR_GPIO_RESET_EN, + .clk_val = U300_SYSCON_SBCER_GPIO_CLK_EN, + .enable = syscon_clk_enable, + .disable = syscon_clk_disable, +}; + +static struct clk rtc_clk = { + .name = "RTC", + .parent = &slow_clk, + .rate = 32768, + .hw_ctrld = true, + .reset = true, + .res_reg = U300_SYSCON_VBASE + U300_SYSCON_RSR, + .res_mask = U300_SYSCON_RSR_RTC_RESET_EN, + /* This clock is always on, cannot be enabled/disabled */ +}; + +static struct clk bustr_clk = { + .name = "BUSTR", + .parent = &slow_clk, + .rate = 13000000, + .hw_ctrld = true, + .reset = true, + .res_reg = U300_SYSCON_VBASE + U300_SYSCON_RSR, + .res_mask = U300_SYSCON_RSR_BTR_RESET_EN, + .clk_val = U300_SYSCON_SBCER_BTR_CLK_EN, + .enable = syscon_clk_enable, + .disable = syscon_clk_disable, +}; + +static struct clk evhist_clk = { + .name = "EVHIST", + .parent = &slow_clk, + .rate = 13000000, + .hw_ctrld = true, + .reset = true, + .res_reg = U300_SYSCON_VBASE + U300_SYSCON_RSR, + .res_mask = U300_SYSCON_RSR_EH_RESET_EN, + .clk_val = U300_SYSCON_SBCER_EH_CLK_EN, + .enable = syscon_clk_enable, + .disable = syscon_clk_disable, +}; + +static struct clk timer_clk = { + .name = "TIMER", + .parent = &slow_clk, + .rate = 13000000, + .hw_ctrld = true, + .reset = true, + .res_reg = U300_SYSCON_VBASE + U300_SYSCON_RSR, + .res_mask = U300_SYSCON_RSR_ACC_TMR_RESET_EN, + .clk_val = U300_SYSCON_SBCER_ACC_TMR_CLK_EN, + .enable = syscon_clk_enable, + .disable = syscon_clk_disable, +}; + +static struct clk app_timer_clk = { + .name = "TIMER_APP", + .parent = &slow_clk, + .rate = 13000000, + .hw_ctrld = true, + .reset = true, + .res_reg = U300_SYSCON_VBASE + U300_SYSCON_RSR, + .res_mask = U300_SYSCON_RSR_APP_TMR_RESET_EN, + .clk_val = U300_SYSCON_SBCER_APP_TMR_CLK_EN, + .enable = syscon_clk_enable, + .disable = syscon_clk_disable, +}; + +#ifdef CONFIG_MACH_U300_BS335 +static struct clk ppm_clk = { + .name = "PPM", + .parent = &slow_clk, + .rate = 0, /* FIXME */ + .hw_ctrld = true, /* TODO: Look up if it is hw ctrld or not */ + .reset = true, + .res_reg = U300_SYSCON_VBASE + U300_SYSCON_RSR, + .res_mask = U300_SYSCON_RSR_PPM_RESET_EN, + .clk_val = U300_SYSCON_SBCER_PPM_CLK_EN, + .enable = syscon_clk_enable, + .disable = syscon_clk_disable, +}; +#endif + +#define DEF_LOOKUP(devid, clkref) \ + { \ + .dev_id = devid, \ + .clk = clkref, \ + } + +/* + * Here we only define clocks that are meaningful to + * look up through clockdevice. + */ +static struct clk_lookup lookups[] = { + /* Connected directly to the AMBA bus */ + DEF_LOOKUP("amba", &amba_clk), + DEF_LOOKUP("cpu", &cpu_clk), + DEF_LOOKUP("nandif", &nandif_clk), + DEF_LOOKUP("semi", &semi_clk), +#ifdef CONFIG_MACH_U300_BS335 + DEF_LOOKUP("isp", &isp_clk), + DEF_LOOKUP("cds", &cds_clk), +#endif + DEF_LOOKUP("dma", &dma_clk), + DEF_LOOKUP("aaif", &aaif_clk), + DEF_LOOKUP("apex", &apex_clk), + DEF_LOOKUP("video_enc", &video_enc_clk), + DEF_LOOKUP("xgam", &xgam_clk), + DEF_LOOKUP("ahb", &ahb_clk), + /* AHB bridge clocks */ + DEF_LOOKUP("ahb", &ahb_subsys_clk), + DEF_LOOKUP("intcon", &intcon_clk), + DEF_LOOKUP("mspro", &mspro_clk), + DEF_LOOKUP("pl172", &emif_clk), + /* FAST bridge clocks */ + DEF_LOOKUP("fast", &fast_clk), + DEF_LOOKUP("mmci", &mmcsd_clk), + /* + * The .0 and .1 identifiers on these comes from the platform device + * .id field and are assigned when the platform devices are registered. + */ + DEF_LOOKUP("i2s.0", &i2s0_clk), + DEF_LOOKUP("i2s.1", &i2s1_clk), + DEF_LOOKUP("stddci2c.0", &i2c0_clk), + DEF_LOOKUP("stddci2c.1", &i2c1_clk), + DEF_LOOKUP("pl022", &spi_clk), +#ifdef CONFIG_MACH_U300_BS335 + DEF_LOOKUP("uart1", &uart1_clk), +#endif + /* SLOW bridge clocks */ + DEF_LOOKUP("slow", &slow_clk), + DEF_LOOKUP("wdog", &wdog_clk), + DEF_LOOKUP("uart0", &uart_clk), + DEF_LOOKUP("apptimer", &app_timer_clk), + DEF_LOOKUP("keypad", &keypad_clk), + DEF_LOOKUP("u300-gpio", &gpio_clk), + DEF_LOOKUP("rtc0", &rtc_clk), + DEF_LOOKUP("bustr", &bustr_clk), + DEF_LOOKUP("evhist", &evhist_clk), + DEF_LOOKUP("timer", &timer_clk), +#ifdef CONFIG_MACH_U300_BS335 + DEF_LOOKUP("ppm", &ppm_clk), +#endif +}; + +static void __init clk_register(void) +{ + int i; + + /* Register the lookups */ + for (i = 0; i < ARRAY_SIZE(lookups); i++) + clkdev_add(&lookups[i]); +} + +/* + * These are the clocks for cells registered as primecell drivers + * on the AMBA bus. These must be on during AMBA device registration + * since the bus probe will attempt to read magic configuration + * registers for these devices. If they are deactivated these probes + * will fail. + * + * + * Please note that on emif, both RAM and NAND is connected in dual + * RAM phones. On single RAM phones, ram is on semi and NAND on emif. + * + */ +void u300_clock_primecells(void) +{ + clk_enable(&intcon_clk); + clk_enable(&uart_clk); +#ifdef CONFIG_MACH_U300_BS335 + clk_enable(&uart1_clk); +#endif + clk_enable(&spi_clk); + + clk_enable(&mmcsd_clk); + +} +EXPORT_SYMBOL(u300_clock_primecells); + +void u300_unclock_primecells(void) +{ + + clk_disable(&intcon_clk); + clk_disable(&uart_clk); +#ifdef CONFIG_MACH_U300_BS335 + clk_disable(&uart1_clk); +#endif + clk_disable(&spi_clk); + clk_disable(&mmcsd_clk); + +} +EXPORT_SYMBOL(u300_unclock_primecells); + +/* + * The interrupt controller is enabled before the clock API is registered. + */ +void u300_enable_intcon_clock(void) +{ + clk_enable(&intcon_clk); +} +EXPORT_SYMBOL(u300_enable_intcon_clock); + +/* + * The timer is enabled before the clock API is registered. + */ +void u300_enable_timer_clock(void) +{ + clk_enable(&app_timer_clk); +} +EXPORT_SYMBOL(u300_enable_timer_clock); + +#if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_U300_DEBUG)) +/* + * The following makes it possible to view the status (especially + * reference count and reset status) for the clocks in the platform + * by looking into the special file /u300_clocks + */ + +/* A list of all clocks in the platform */ +static struct clk *clks[] = { + /* Top node clock for the AMBA bus */ + &amba_clk, + /* Connected directly to the AMBA bus */ + &cpu_clk, + &nandif_clk, + &semi_clk, +#ifdef CONFIG_MACH_U300_BS335 + &isp_clk, + &cds_clk, +#endif + &dma_clk, + &aaif_clk, + &apex_clk, + &video_enc_clk, + &xgam_clk, + &ahb_clk, + + /* AHB bridge clocks */ + &ahb_subsys_clk, + &intcon_clk, + &mspro_clk, + &emif_clk, + /* FAST bridge clocks */ + &fast_clk, + &mmcsd_clk, + &i2s0_clk, + &i2s1_clk, + &i2c0_clk, + &i2c1_clk, + &spi_clk, +#ifdef CONFIG_MACH_U300_BS335 + &uart1_clk, +#endif + /* SLOW bridge clocks */ + &slow_clk, + &wdog_clk, + &uart_clk, + &app_timer_clk, + &keypad_clk, + &gpio_clk, + &rtc_clk, + &bustr_clk, + &evhist_clk, + &timer_clk, +#ifdef CONFIG_MACH_U300_BS335 + &ppm_clk, +#endif +}; + +static int u300_clocks_show(struct seq_file *s, void *data) +{ + struct clk *clk; + int i; + + seq_printf(s, "CLOCK DEVICE RESET STATE\t" \ + "ACTIVE\tUSERS\tHW CTRL FREQ\n"); + seq_printf(s, "---------------------------------------------" \ + "-----------------------------------------\n"); + for (i = 0; i < ARRAY_SIZE(clks); i++) { + clk = clks[i]; + if (clk != ERR_PTR(-ENOENT)) { + /* Format clock and device name nicely */ + char cdp[33]; + int chars; + + chars = snprintf(&cdp[0], 17, "%s", clk->name); + while (chars < 16) { + cdp[chars] = ' '; + chars++; + } + chars = snprintf(&cdp[16], 17, "%s", clk->dev ? + dev_name(clk->dev) : "N/A"); + while (chars < 16) { + cdp[chars+16] = ' '; + chars++; + } + cdp[32] = '\0'; + if (clk->get_rate) + seq_printf(s, + "%s%s\t%s\t%d\t%s\t%lu Hz\n", + &cdp[0], + clk->reset ? + "ASSERTED" : "RELEASED", + clk->usecount ? "ON" : "OFF", + clk->usecount, + clk->hw_ctrld ? "YES" : "NO ", + clk->get_rate(clk)); + else + seq_printf(s, + "%s%s\t%s\t%d\t%s\t" \ + "(unknown rate)\n", + &cdp[0], + clk->reset ? + "ASSERTED" : "RELEASED", + clk->usecount ? "ON" : "OFF", + clk->usecount, + clk->hw_ctrld ? "YES" : "NO "); + } + } + return 0; +} + +static int u300_clocks_open(struct inode *inode, struct file *file) +{ + return single_open(file, u300_clocks_show, NULL); +} + +static const struct file_operations u300_clocks_operations = { + .open = u300_clocks_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void init_clk_read_procfs(void) +{ + /* Expose a simple debugfs interface to view all clocks */ + (void) debugfs_create_file("u300_clocks", S_IFREG | S_IRUGO, + NULL, NULL, &u300_clocks_operations); +} +#else +static inline void init_clk_read_procfs(void) +{ +} +#endif + +static int __init u300_clock_init(void) +{ + u16 val; + + /* + * FIXME: shall all this powermanagement stuff really live here??? + */ + + /* Set system to run at PLL208, max performance, a known state. */ + val = readw(U300_SYSCON_VBASE + U300_SYSCON_CCR); + val &= ~U300_SYSCON_CCR_CLKING_PERFORMANCE_MASK; + writew(val, U300_SYSCON_VBASE + U300_SYSCON_CCR); + /* Wait for the PLL208 to lock if not locked in yet */ + while (!(readw(U300_SYSCON_VBASE + U300_SYSCON_CSR) & + U300_SYSCON_CSR_PLL208_LOCK_IND)); + + /* Power management enable */ + val = readw(U300_SYSCON_VBASE + U300_SYSCON_PMCR); + val |= U300_SYSCON_PMCR_PWR_MGNT_ENABLE; + writew(val, U300_SYSCON_VBASE + U300_SYSCON_PMCR); + + clk_register(); + + init_clk_read_procfs(); + + /* + * Some of these may be on when we boot the system so make sure they + * are turned OFF. + */ + syscon_block_reset_enable(&timer_clk); + timer_clk.disable(&timer_clk); + + /* + * These shall be turned on by default when we boot the system + * so make sure they are ON. (Adding CPU here is a bit too much.) + * These clocks will be claimed by drivers later. + */ + syscon_block_reset_disable(&semi_clk); + syscon_block_reset_disable(&emif_clk); + semi_clk.enable(&semi_clk); + emif_clk.enable(&emif_clk); + + return 0; +} +/* initialize clocking early to be available later in the boot */ +core_initcall(u300_clock_init); diff --git a/arch/arm/mach-u300/clock.h b/arch/arm/mach-u300/clock.h new file mode 100644 index 0000000..fc6d9cc --- /dev/null +++ b/arch/arm/mach-u300/clock.h @@ -0,0 +1,53 @@ +/* + * arch/arm/mach-u300/include/mach/clock.h + * + * Copyright (C) 2004 - 2005 Nokia corporation + * Written by Tuukka Tikkanen + * Based on clocks.h by Tony Lindgren, Gordon McNutt and RidgeRun, Inc + * Copyright (C) 2007-2009 ST-Ericsson AB + * Adopted to ST-Ericsson U300 platforms by + * Jonas Aaberg + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#ifndef __MACH_CLOCK_H +#define __MACH_CLOCK_H + +#include + +struct clk { + struct list_head node; + struct module *owner; + struct device *dev; + const char *name; + struct clk *parent; + + spinlock_t lock; + unsigned long rate; + bool reset; + __u16 clk_val; + __s8 usecount; + __u32 res_reg; + __u16 res_mask; + + bool hw_ctrld; + + void (*recalc) (struct clk *); + int (*set_rate) (struct clk *, unsigned long); + unsigned long (*get_rate) (struct clk *); + unsigned long (*round_rate) (struct clk *, unsigned long); + void (*init) (struct clk *); + void (*enable) (struct clk *); + void (*disable) (struct clk *); +}; + +void u300_clock_primecells(void); +void u300_unclock_primecells(void); +void u300_enable_intcon_clock(void); +void u300_enable_timer_clock(void); + +#endif diff --git a/arch/arm/mach-u300/include/mach/clkdev.h b/arch/arm/mach-u300/include/mach/clkdev.h new file mode 100644 index 0000000..92e3cc8 --- /dev/null +++ b/arch/arm/mach-u300/include/mach/clkdev.h @@ -0,0 +1,7 @@ +#ifndef __MACH_CLKDEV_H +#define __MACH_CLKDEV_H + +int __clk_get(struct clk *clk); +void __clk_put(struct clk *clk); + +#endif -- cgit v0.10.2 From bb3cee2b35d2b9edab71997bd06040ff37483e08 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 23 Apr 2009 10:22:13 +0100 Subject: [ARM] 5473/1: U300 core machine support This adds core support for the ST-Ericsson U300 series platforms: U300, U330, U335 and U365. Supports memory mappings, interrupt controller, system timer (clocksource and clockevents), and binds to the existing drivers for the PrimeCells used in this design: PL190 (VIC), PL180 (MMC/SD host) and PL011 (UART). This is intented to serve as starting point for our mainling work, more patches to follow. Signed-off-by: Linus Walleij Signed-off-by: Russell King diff --git a/arch/arm/mach-u300/Kconfig b/arch/arm/mach-u300/Kconfig new file mode 100644 index 0000000..337b9aa --- /dev/null +++ b/arch/arm/mach-u300/Kconfig @@ -0,0 +1,105 @@ +if ARCH_U300 + +menu "ST-Ericsson AB U300/U330/U335/U365 Platform" + +comment "ST-Ericsson Mobile Platform Products" + +config MACH_U300 + bool "U300" + +comment "ST-Ericsson U300/U330/U335/U365 Feature Selections" + +choice + prompt "U300/U330/U335/U365 system type" + default MACH_U300_BS2X + ---help--- + You need to select the target system, i.e. the + U300/U330/U335/U365 board that you want to compile your kernel + for. + +config MACH_U300_BS2X + bool "S26/S26/B25/B26 Test Products" + depends on MACH_U300 + help + Select this if you're developing on the + S26/S25 test products. (Also works on + B26/B25 big boards.) + +config MACH_U300_BS330 + bool "S330/B330 Test Products" + depends on MACH_U300 + help + Select this if you're developing on the + S330/B330 test products. + +config MACH_U300_BS335 + bool "S335/B335 Test Products" + depends on MACH_U300 + help + Select this if you're developing on the + S335/B335 test products. + +config MACH_U300_BS365 + bool "S365/B365 Test Products" + depends on MACH_U300 + help + Select this if you're developing on the + S365/B365 test products. + +endchoice + +choice + prompt "Memory configuration" + default MACH_U300_SINGLE_RAM + ---help--- + You have to config the kernel according to the physical memory + configuration. + +config MACH_U300_SINGLE_RAM + bool "Single RAM" + help + Select this if you want support for Single RAM phones. + +config MACH_U300_DUAL_RAM + bool "Dual RAM" + help + Select this if you want support for Dual RAM phones. + This is two RAM memorys on different EMIFs. +endchoice + +config U300_DEBUG + bool "Debug support for U300" + depends on PM + help + Debug support for U300 in sysfs, procfs etc. + +config MACH_U300_SEMI_IS_SHARED + bool "The SEMI is used by both the access and application side" + depends on MACH_U300 + help + This makes it possible to use the SEMI (Shared External + Memory Interface) from both from access and application + side. + +comment "All the settings below must match the bootloader's settings" + +config MACH_U300_ACCESS_MEM_SIZE + int "Access CPU memory allocation" + range 7 25 + depends on MACH_U300_SINGLE_RAM + default 13 + help + How much memory in MiB that the Access side CPU has allocated + +config MACH_U300_2MB_ALIGNMENT_FIX + bool "2MiB alignment fix" + depends on MACH_U300_SINGLE_RAM + default y + help + If yes and the Access side CPU has allocated an odd size in + MiB, this fix gives you one MiB extra that would otherwise be + lost due to Linux 2 MiB alignment policy. + +endmenu + +endif diff --git a/arch/arm/mach-u300/Makefile b/arch/arm/mach-u300/Makefile new file mode 100644 index 0000000..24950e0 --- /dev/null +++ b/arch/arm/mach-u300/Makefile @@ -0,0 +1,11 @@ +# +# Makefile for the linux kernel, U300 machine. +# + +obj-y := core.o clock.o timer.o gpio.o padmux.o +obj-m := +obj-n := +obj- := + +obj-$(CONFIG_ARCH_U300) += u300.o +obj-$(CONFIG_MMC) += mmc.o diff --git a/arch/arm/mach-u300/core.c b/arch/arm/mach-u300/core.c new file mode 100644 index 0000000..1f2ed21 --- /dev/null +++ b/arch/arm/mach-u300/core.c @@ -0,0 +1,649 @@ +/* + * + * arch/arm/mach-u300/core.c + * + * + * Copyright (C) 2007-2009 ST-Ericsson AB + * License terms: GNU General Public License (GPL) version 2 + * Core platform support, IRQ handling and device definitions. + * Author: Linus Walleij + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "clock.h" +#include "mmc.h" + +/* + * Static I/O mappings that are needed for booting the U300 platforms. The + * only things we need are the areas where we find the timer, syscon and + * intcon, since the remaining device drivers will map their own memory + * physical to virtual as the need arise. + */ +static struct map_desc u300_io_desc[] __initdata = { + { + .virtual = U300_SLOW_PER_VIRT_BASE, + .pfn = __phys_to_pfn(U300_SLOW_PER_PHYS_BASE), + .length = SZ_64K, + .type = MT_DEVICE, + }, + { + .virtual = U300_AHB_PER_VIRT_BASE, + .pfn = __phys_to_pfn(U300_AHB_PER_PHYS_BASE), + .length = SZ_32K, + .type = MT_DEVICE, + }, + { + .virtual = U300_FAST_PER_VIRT_BASE, + .pfn = __phys_to_pfn(U300_FAST_PER_PHYS_BASE), + .length = SZ_32K, + .type = MT_DEVICE, + }, + { + .virtual = 0xffff2000, /* TCM memory */ + .pfn = __phys_to_pfn(0xffff2000), + .length = SZ_16K, + .type = MT_DEVICE, + }, + + /* + * This overlaps with the IRQ vectors etc at 0xffff0000, so these + * may have to be moved to 0x00000000 in order to use the ROM. + */ + /* + { + .virtual = U300_BOOTROM_VIRT_BASE, + .pfn = __phys_to_pfn(U300_BOOTROM_PHYS_BASE), + .length = SZ_64K, + .type = MT_ROM, + }, + */ +}; + +void __init u300_map_io(void) +{ + iotable_init(u300_io_desc, ARRAY_SIZE(u300_io_desc)); +} + +/* + * Declaration of devices found on the U300 board and + * their respective memory locations. + */ +static struct amba_device uart0_device = { + .dev = { + .init_name = "uart0", /* Slow device at 0x3000 offset */ + .platform_data = NULL, + }, + .res = { + .start = U300_UART0_BASE, + .end = U300_UART0_BASE + SZ_4K - 1, + .flags = IORESOURCE_MEM, + }, + .irq = { IRQ_U300_UART0, NO_IRQ }, +}; + +/* The U335 have an additional UART1 on the APP CPU */ +#ifdef CONFIG_MACH_U300_BS335 +static struct amba_device uart1_device = { + .dev = { + .init_name = "uart1", /* Fast device at 0x7000 offset */ + .platform_data = NULL, + }, + .res = { + .start = U300_UART1_BASE, + .end = U300_UART1_BASE + SZ_4K - 1, + .flags = IORESOURCE_MEM, + }, + .irq = { IRQ_U300_UART1, NO_IRQ }, +}; +#endif + +static struct amba_device pl172_device = { + .dev = { + .init_name = "pl172", /* AHB device at 0x4000 offset */ + .platform_data = NULL, + }, + .res = { + .start = U300_EMIF_CFG_BASE, + .end = U300_EMIF_CFG_BASE + SZ_4K - 1, + .flags = IORESOURCE_MEM, + }, +}; + + +/* + * Everything within this next ifdef deals with external devices connected to + * the APP SPI bus. + */ +static struct amba_device pl022_device = { + .dev = { + .coherent_dma_mask = ~0, + .init_name = "pl022", /* Fast device at 0x6000 offset */ + }, + .res = { + .start = U300_SPI_BASE, + .end = U300_SPI_BASE + SZ_4K - 1, + .flags = IORESOURCE_MEM, + }, + .irq = {IRQ_U300_SPI, NO_IRQ }, + /* + * This device has a DMA channel but the Linux driver does not use + * it currently. + */ +}; + +static struct amba_device mmcsd_device = { + .dev = { + .init_name = "mmci", /* Fast device at 0x1000 offset */ + .platform_data = NULL, /* Added later */ + }, + .res = { + .start = U300_MMCSD_BASE, + .end = U300_MMCSD_BASE + SZ_4K - 1, + .flags = IORESOURCE_MEM, + }, + .irq = {IRQ_U300_MMCSD_MCIINTR0, IRQ_U300_MMCSD_MCIINTR1 }, + /* + * This device has a DMA channel but the Linux driver does not use + * it currently. + */ +}; + +/* + * The order of device declaration may be important, since some devices + * have dependencies on other devices being initialized first. + */ +static struct amba_device *amba_devs[] __initdata = { + &uart0_device, +#ifdef CONFIG_MACH_U300_BS335 + &uart1_device, +#endif + &pl022_device, + &pl172_device, + &mmcsd_device, +}; + +/* Here follows a list of all hw resources that the platform devices + * allocate. Note, clock dependencies are not included + */ + +static struct resource gpio_resources[] = { + { + .start = U300_GPIO_BASE, + .end = (U300_GPIO_BASE + SZ_4K - 1), + .flags = IORESOURCE_MEM, + }, + { + .name = "gpio0", + .start = IRQ_U300_GPIO_PORT0, + .end = IRQ_U300_GPIO_PORT0, + .flags = IORESOURCE_IRQ, + }, + { + .name = "gpio1", + .start = IRQ_U300_GPIO_PORT1, + .end = IRQ_U300_GPIO_PORT1, + .flags = IORESOURCE_IRQ, + }, + { + .name = "gpio2", + .start = IRQ_U300_GPIO_PORT2, + .end = IRQ_U300_GPIO_PORT2, + .flags = IORESOURCE_IRQ, + }, +#ifdef U300_COH901571_3 + { + .name = "gpio3", + .start = IRQ_U300_GPIO_PORT3, + .end = IRQ_U300_GPIO_PORT3, + .flags = IORESOURCE_IRQ, + }, + { + .name = "gpio4", + .start = IRQ_U300_GPIO_PORT4, + .end = IRQ_U300_GPIO_PORT4, + .flags = IORESOURCE_IRQ, + }, +#ifdef CONFIG_MACH_U300_BS335 + { + .name = "gpio5", + .start = IRQ_U300_GPIO_PORT5, + .end = IRQ_U300_GPIO_PORT5, + .flags = IORESOURCE_IRQ, + }, + { + .name = "gpio6", + .start = IRQ_U300_GPIO_PORT6, + .end = IRQ_U300_GPIO_PORT6, + .flags = IORESOURCE_IRQ, + }, +#endif /* CONFIG_MACH_U300_BS335 */ +#endif /* U300_COH901571_3 */ +}; + +static struct resource keypad_resources[] = { + { + .start = U300_KEYPAD_BASE, + .end = U300_KEYPAD_BASE + SZ_4K - 1, + .flags = IORESOURCE_MEM, + }, + { + .name = "coh901461-press", + .start = IRQ_U300_KEYPAD_KEYBF, + .end = IRQ_U300_KEYPAD_KEYBF, + .flags = IORESOURCE_IRQ, + }, + { + .name = "coh901461-release", + .start = IRQ_U300_KEYPAD_KEYBR, + .end = IRQ_U300_KEYPAD_KEYBR, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct resource rtc_resources[] = { + { + .start = U300_RTC_BASE, + .end = U300_RTC_BASE + SZ_4K - 1, + .flags = IORESOURCE_MEM, + }, + { + .start = IRQ_U300_RTC, + .end = IRQ_U300_RTC, + .flags = IORESOURCE_IRQ, + }, +}; + +/* + * Fsmc does have IRQs: #43 and #44 (NFIF and NFIF2) + * but these are not yet used by the driver. + */ +static struct resource fsmc_resources[] = { + { + .start = U300_NAND_IF_PHYS_BASE, + .end = U300_NAND_IF_PHYS_BASE + SZ_4K - 1, + .flags = IORESOURCE_MEM, + }, +}; + +static struct resource i2c0_resources[] = { + { + .start = U300_I2C0_BASE, + .end = U300_I2C0_BASE + SZ_4K - 1, + .flags = IORESOURCE_MEM, + }, + { + .start = IRQ_U300_I2C0, + .end = IRQ_U300_I2C0, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct resource i2c1_resources[] = { + { + .start = U300_I2C1_BASE, + .end = U300_I2C1_BASE + SZ_4K - 1, + .flags = IORESOURCE_MEM, + }, + { + .start = IRQ_U300_I2C1, + .end = IRQ_U300_I2C1, + .flags = IORESOURCE_IRQ, + }, + +}; + +static struct resource wdog_resources[] = { + { + .start = U300_WDOG_BASE, + .end = U300_WDOG_BASE + SZ_4K - 1, + .flags = IORESOURCE_MEM, + }, + { + .start = IRQ_U300_WDOG, + .end = IRQ_U300_WDOG, + .flags = IORESOURCE_IRQ, + } +}; + +/* TODO: These should be protected by suitable #ifdef's */ +static struct resource ave_resources[] = { + { + .name = "AVE3e I/O Area", + .start = U300_VIDEOENC_BASE, + .end = U300_VIDEOENC_BASE + SZ_512K - 1, + .flags = IORESOURCE_MEM, + }, + { + .name = "AVE3e IRQ0", + .start = IRQ_U300_VIDEO_ENC_0, + .end = IRQ_U300_VIDEO_ENC_0, + .flags = IORESOURCE_IRQ, + }, + { + .name = "AVE3e IRQ1", + .start = IRQ_U300_VIDEO_ENC_1, + .end = IRQ_U300_VIDEO_ENC_1, + .flags = IORESOURCE_IRQ, + }, + { + .name = "AVE3e Physmem Area", + .start = 0, /* 0 will be remapped to reserved memory */ + .end = SZ_1M - 1, + .flags = IORESOURCE_MEM, + }, + /* + * The AVE3e requires two regions of 256MB that it considers + * "invisible". The hardware will not be able to access these + * adresses, so they should never point to system RAM. + */ + { + .name = "AVE3e Reserved 0", + .start = 0xd0000000, + .end = 0xd0000000 + SZ_256M - 1, + .flags = IORESOURCE_MEM, + }, + { + .name = "AVE3e Reserved 1", + .start = 0xe0000000, + .end = 0xe0000000 + SZ_256M - 1, + .flags = IORESOURCE_MEM, + }, +}; + +static struct platform_device wdog_device = { + .name = "wdog", + .id = -1, + .num_resources = ARRAY_SIZE(wdog_resources), + .resource = wdog_resources, +}; + +static struct platform_device i2c0_device = { + .name = "stddci2c", + .id = 0, + .num_resources = ARRAY_SIZE(i2c0_resources), + .resource = i2c0_resources, +}; + +static struct platform_device i2c1_device = { + .name = "stddci2c", + .id = 1, + .num_resources = ARRAY_SIZE(i2c1_resources), + .resource = i2c1_resources, +}; + +static struct platform_device gpio_device = { + .name = "u300-gpio", + .id = -1, + .num_resources = ARRAY_SIZE(gpio_resources), + .resource = gpio_resources, +}; + +static struct platform_device keypad_device = { + .name = "keypad", + .id = -1, + .num_resources = ARRAY_SIZE(keypad_resources), + .resource = keypad_resources, +}; + +static struct platform_device rtc_device = { + .name = "rtc0", + .id = -1, + .num_resources = ARRAY_SIZE(rtc_resources), + .resource = rtc_resources, +}; + +static struct platform_device fsmc_device = { + .name = "nandif", + .id = -1, + .num_resources = ARRAY_SIZE(fsmc_resources), + .resource = fsmc_resources, +}; + +static struct platform_device ave_device = { + .name = "video_enc", + .id = -1, + .num_resources = ARRAY_SIZE(ave_resources), + .resource = ave_resources, +}; + +/* + * Notice that AMBA devices are initialized before platform devices. + * + */ +static struct platform_device *platform_devs[] __initdata = { + &i2c0_device, + &i2c1_device, + &keypad_device, + &rtc_device, + &gpio_device, + &fsmc_device, + &wdog_device, + &ave_device +}; + + +/* + * Interrupts: the U300 platforms have two pl190 ARM PrimeCells connected + * together so some interrupts are connected to the first one and some + * to the second one. + */ +void __init u300_init_irq(void) +{ + u32 mask[2] = {0, 0}; + int i; + + for (i = 0; i < NR_IRQS; i++) + set_bit(i, (unsigned long *) &mask[0]); + u300_enable_intcon_clock(); + vic_init((void __iomem *) U300_INTCON0_VBASE, 0, mask[0]); + vic_init((void __iomem *) U300_INTCON1_VBASE, 32, mask[1]); +} + + +/* + * U300 platforms peripheral handling + */ +struct db_chip { + u16 chipid; + const char *name; +}; + +/* + * This is a list of the Digital Baseband chips used in the U300 platform. + */ +static struct db_chip db_chips[] __initdata = { + { + .chipid = 0xb800, + .name = "DB3000", + }, + { + .chipid = 0xc000, + .name = "DB3100", + }, + { + .chipid = 0xc800, + .name = "DB3150", + }, + { + .chipid = 0xd800, + .name = "DB3200", + }, + { + .chipid = 0xe000, + .name = "DB3250", + }, + { + .chipid = 0xe800, + .name = "DB3210", + }, + { + .chipid = 0xf000, + .name = "DB3350 P1x", + }, + { + .chipid = 0xf100, + .name = "DB3350 P2x", + }, + { + .chipid = 0x0000, /* List terminator */ + .name = NULL, + } +}; + +static void u300_init_check_chip(void) +{ + + u16 val; + struct db_chip *chip; + const char *chipname; + const char unknown[] = "UNKNOWN"; + + /* Read out and print chip ID */ + val = readw(U300_SYSCON_VBASE + U300_SYSCON_CIDR); + /* This is in funky bigendian order... */ + val = (val & 0xFFU) << 8 | (val >> 8); + chip = db_chips; + chipname = unknown; + + for ( ; chip->chipid; chip++) { + if (chip->chipid == (val & 0xFF00U)) { + chipname = chip->name; + break; + } + } + printk(KERN_INFO "Initializing U300 system on %s baseband chip " \ + "(chip ID 0x%04x)\n", chipname, val); + +#ifdef CONFIG_MACH_U300_BS26 + if ((val & 0xFF00U) != 0xc800) { + printk(KERN_ERR "Platform configured for BS25/BS26 " \ + "with DB3150 but %s detected, expect problems!", + chipname); + } +#endif +#ifdef CONFIG_MACH_U300_BS330 + if ((val & 0xFF00U) != 0xd800) { + printk(KERN_ERR "Platform configured for BS330 " \ + "with DB3200 but %s detected, expect problems!", + chipname); + } +#endif +#ifdef CONFIG_MACH_U300_BS335 + if ((val & 0xFF00U) != 0xf000 && (val & 0xFF00U) != 0xf100) { + printk(KERN_ERR "Platform configured for BS365 " \ + " with DB3350 but %s detected, expect problems!", + chipname); + } +#endif +#ifdef CONFIG_MACH_U300_BS365 + if ((val & 0xFF00U) != 0xe800) { + printk(KERN_ERR "Platform configured for BS365 " \ + "with DB3210 but %s detected, expect problems!", + chipname); + } +#endif + + +} + +/* + * Some devices and their resources require reserved physical memory from + * the end of the available RAM. This function traverses the list of devices + * and assigns actual adresses to these. + */ +static void __init u300_assign_physmem(void) +{ + unsigned long curr_start = __pa(high_memory); + int i, j; + + for (i = 0; i < ARRAY_SIZE(platform_devs); i++) { + for (j = 0; j < platform_devs[i]->num_resources; j++) { + struct resource *const res = + &platform_devs[i]->resource[j]; + + if (IORESOURCE_MEM == res->flags && + 0 == res->start) { + res->start = curr_start; + res->end += curr_start; + curr_start += (res->end - res->start + 1); + + printk(KERN_INFO "core.c: Mapping RAM " \ + "%#x-%#x to device %s:%s\n", + res->start, res->end, + platform_devs[i]->name, res->name); + } + } + } +} + +void __init u300_init_devices(void) +{ + int i; + u16 val; + + /* Check what platform we run and print some status information */ + u300_init_check_chip(); + + /* Set system to run at PLL208, max performance, a known state. */ + val = readw(U300_SYSCON_VBASE + U300_SYSCON_CCR); + val &= ~U300_SYSCON_CCR_CLKING_PERFORMANCE_MASK; + writew(val, U300_SYSCON_VBASE + U300_SYSCON_CCR); + /* Wait for the PLL208 to lock if not locked in yet */ + while (!(readw(U300_SYSCON_VBASE + U300_SYSCON_CSR) & + U300_SYSCON_CSR_PLL208_LOCK_IND)); + + /* Register the AMBA devices in the AMBA bus abstraction layer */ + u300_clock_primecells(); + for (i = 0; i < ARRAY_SIZE(amba_devs); i++) { + struct amba_device *d = amba_devs[i]; + amba_device_register(d, &iomem_resource); + } + u300_unclock_primecells(); + + u300_assign_physmem(); + + /* Register the platform devices */ + platform_add_devices(platform_devs, ARRAY_SIZE(platform_devs)); + +#ifndef CONFIG_MACH_U300_SEMI_IS_SHARED + /* + * Enable SEMI self refresh. Self-refresh of the SDRAM is entered when + * both subsystems are requesting this mode. + * If we not share the Acc SDRAM, this is never the case. Therefore + * enable it here from the App side. + */ + val = readw(U300_SYSCON_VBASE + U300_SYSCON_SMCR) | + U300_SYSCON_SMCR_SEMI_SREFREQ_ENABLE; + writew(val, U300_SYSCON_VBASE + U300_SYSCON_SMCR); +#endif /* CONFIG_MACH_U300_SEMI_IS_SHARED */ +} + +static int core_module_init(void) +{ + /* + * This needs to be initialized later: it needs the input framework + * to be initialized first. + */ + return mmc_init(&mmcsd_device); +} +module_init(core_module_init); diff --git a/arch/arm/mach-u300/include/mach/debug-macro.S b/arch/arm/mach-u300/include/mach/debug-macro.S new file mode 100644 index 0000000..f3a1cbb --- /dev/null +++ b/arch/arm/mach-u300/include/mach/debug-macro.S @@ -0,0 +1,22 @@ +/* + * + * arch-arm/mach-u300/include/mach/debug-macro.S + * + * + * Copyright (C) 2006-2009 ST-Ericsson AB + * License terms: GNU General Public License (GPL) version 2 + * Debugging macro include header. + * Author: Linus Walleij + */ +#include + + .macro addruart,rx + /* If we move the adress using MMU, use this. */ + mrc p15, 0, \rx, c1, c0 + tst \rx, #1 @ MMU enabled? + ldreq \rx, = U300_SLOW_PER_PHYS_BASE @ MMU off, physical address + ldrne \rx, = U300_SLOW_PER_VIRT_BASE @ MMU on, virtual address + orr \rx, \rx, #0x00003000 + .endm + +#include diff --git a/arch/arm/mach-u300/include/mach/entry-macro.S b/arch/arm/mach-u300/include/mach/entry-macro.S new file mode 100644 index 0000000..20731ae --- /dev/null +++ b/arch/arm/mach-u300/include/mach/entry-macro.S @@ -0,0 +1,40 @@ +/* + * + * arch-arm/mach-u300/include/mach/entry-macro.S + * + * + * Copyright (C) 2006-2009 ST-Ericsson AB + * License terms: GNU General Public License (GPL) version 2 + * Low-level IRQ helper macros for ST-Ericsson U300 + * Author: Linus Walleij + */ +#include +#include + + .macro disable_fiq + .endm + + .macro get_irqnr_preamble, base, tmp + .endm + + .macro arch_ret_to_user, tmp1, tmp2 + .endm + + .macro get_irqnr_and_base, irqnr, irqstat, base, tmp + ldr \base, = U300_AHB_PER_VIRT_BASE-U300_AHB_PER_PHYS_BASE+U300_INTCON0_BASE + ldr \irqstat, [\base, #VIC_IRQ_STATUS] @ get masked status + mov \irqnr, #0 + teq \irqstat, #0 + bne 1002f +1001: ldr \base, = U300_AHB_PER_VIRT_BASE-U300_AHB_PER_PHYS_BASE+U300_INTCON1_BASE + ldr \irqstat, [\base, #VIC_IRQ_STATUS] @ get masked status + mov \irqnr, #32 + teq \irqstat, #0 + beq 1003f +1002: tst \irqstat, #1 + bne 1003f + add \irqnr, \irqnr, #1 + movs \irqstat, \irqstat, lsr #1 + bne 1002b +1003: /* EQ will be set if no irqs pending */ + .endm diff --git a/arch/arm/mach-u300/include/mach/memory.h b/arch/arm/mach-u300/include/mach/memory.h new file mode 100644 index 0000000..bf134bc --- /dev/null +++ b/arch/arm/mach-u300/include/mach/memory.h @@ -0,0 +1,42 @@ +/* + * + * arch/arm/mach-u300/include/mach/memory.h + * + * + * Copyright (C) 2007-2009 ST-Ericsson AB + * License terms: GNU General Public License (GPL) version 2 + * Memory virtual/physical mapping constants. + * Author: Linus Walleij + * Author: Jonas Aaberg + */ + +#ifndef __MACH_MEMORY_H +#define __MACH_MEMORY_H + +#ifdef CONFIG_MACH_U300_DUAL_RAM + +#define PHYS_OFFSET UL(0x48000000) +#define BOOT_PARAMS_OFFSET (PHYS_OFFSET + 0x100) + +#else + +#ifdef CONFIG_MACH_U300_2MB_ALIGNMENT_FIX +#define PHYS_OFFSET (0x28000000 + \ + (CONFIG_MACH_U300_ACCESS_MEM_SIZE - \ + (CONFIG_MACH_U300_ACCESS_MEM_SIZE & 1))*1024*1024) +#else +#define PHYS_OFFSET (0x28000000 + \ + (CONFIG_MACH_U300_ACCESS_MEM_SIZE + \ + (CONFIG_MACH_U300_ACCESS_MEM_SIZE & 1))*1024*1024) +#endif +#define BOOT_PARAMS_OFFSET (0x28000000 + \ + (CONFIG_MACH_U300_ACCESS_MEM_SIZE + \ + (CONFIG_MACH_U300_ACCESS_MEM_SIZE & 1))*1024*1024 + 0x100) +#endif + +/* + * We enable a real big DMA buffer if need be. + */ +#define CONSISTENT_DMA_SIZE SZ_4M + +#endif diff --git a/arch/arm/mach-u300/include/mach/platform.h b/arch/arm/mach-u300/include/mach/platform.h new file mode 100644 index 0000000..77d9210 --- /dev/null +++ b/arch/arm/mach-u300/include/mach/platform.h @@ -0,0 +1,19 @@ +/* + * + * arch/arm/mach-u300/include/mach/platform.h + * + * + * Copyright (C) 2006-2009 ST-Ericsson AB + * License terms: GNU General Public License (GPL) version 2 + * Basic platform init and mapping functions. + * Author: Linus Walleij + */ + +#ifndef __ASSEMBLY__ + +void u300_map_io(void); +void u300_init_irq(void); +void u300_init_devices(void); +extern struct sys_timer u300_timer; + +#endif diff --git a/arch/arm/mach-u300/include/mach/system.h b/arch/arm/mach-u300/include/mach/system.h new file mode 100644 index 0000000..8daf136 --- /dev/null +++ b/arch/arm/mach-u300/include/mach/system.h @@ -0,0 +1,42 @@ +/* + * + * arch/arm/mach-u300/include/mach/system.h + * + * + * Copyright (C) 2007-2009 ST-Ericsson AB + * License terms: GNU General Public License (GPL) version 2 + * System shutdown and reset functions. + * Author: Linus Walleij + */ +#include +#include +#include +#include + +/* Forward declare this function from the watchdog */ +void coh901327_watchdog_reset(void); + +static inline void arch_idle(void) +{ + cpu_do_idle(); +} + +static void arch_reset(char mode, const char *cmd) +{ + switch (mode) { + case 's': + case 'h': + printk(KERN_CRIT "RESET: shutting down/rebooting system\n"); + /* Disable interrupts */ + local_irq_disable(); +#ifdef CONFIG_COH901327_WATCHDOG + coh901327_watchdog_reset(); +#endif + break; + default: + /* Do nothing */ + break; + } + /* Wait for system do die/reset. */ + while (1); +} diff --git a/arch/arm/mach-u300/include/mach/timex.h b/arch/arm/mach-u300/include/mach/timex.h new file mode 100644 index 0000000..f233b72 --- /dev/null +++ b/arch/arm/mach-u300/include/mach/timex.h @@ -0,0 +1,17 @@ +/* + * + * arch/arm/mach-u300/include/mach/timex.h + * + * + * Copyright (C) 2006-2009 ST-Ericsson AB + * License terms: GNU General Public License (GPL) version 2 + * Platform tick rate definition. + * Author: Linus Walleij + */ +#ifndef __MACH_TIMEX_H +#define __MACH_TIMEX_H + +/* This is for the APP OS GP1 (General Purpose 1) timer */ +#define CLOCK_TICK_RATE 1000000 + +#endif diff --git a/arch/arm/mach-u300/include/mach/uncompress.h b/arch/arm/mach-u300/include/mach/uncompress.h new file mode 100644 index 0000000..29acb71 --- /dev/null +++ b/arch/arm/mach-u300/include/mach/uncompress.h @@ -0,0 +1,46 @@ +/* + * arch/arm/mach-u300/include/mach/uncompress.h + * + * Copyright (C) 2003 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#define AMBA_UART_DR (*(volatile unsigned char *)0xc0013000) +#define AMBA_UART_LCRH (*(volatile unsigned char *)0xc001302C) +#define AMBA_UART_CR (*(volatile unsigned char *)0xc0013030) +#define AMBA_UART_FR (*(volatile unsigned char *)0xc0013018) + +/* + * This does not append a newline + */ +static inline void putc(int c) +{ + while (AMBA_UART_FR & (1 << 5)) + barrier(); + + AMBA_UART_DR = c; +} + +static inline void flush(void) +{ + while (AMBA_UART_FR & (1 << 3)) + barrier(); +} + +/* + * nothing to do + */ +#define arch_decomp_setup() +#define arch_decomp_wdog() diff --git a/arch/arm/mach-u300/include/mach/vmalloc.h b/arch/arm/mach-u300/include/mach/vmalloc.h new file mode 100644 index 0000000..b00c51a --- /dev/null +++ b/arch/arm/mach-u300/include/mach/vmalloc.h @@ -0,0 +1,12 @@ +/* + * + * arch/arm/mach-u300/include/mach/vmalloc.h + * + * + * Copyright (C) 2006-2009 ST-Ericsson AB + * License terms: GNU General Public License (GPL) version 2 + * Virtual memory allocations + * End must be above the I/O registers and on an even 2MiB boundary. + * Author: Linus Walleij + */ +#define VMALLOC_END 0xfe800000 diff --git a/arch/arm/mach-u300/mmc.c b/arch/arm/mach-u300/mmc.c new file mode 100644 index 0000000..3138d39 --- /dev/null +++ b/arch/arm/mach-u300/mmc.c @@ -0,0 +1,216 @@ +/* + * + * arch/arm/mach-u300/mmc.c + * + * + * Copyright (C) 2009 ST-Ericsson AB + * License terms: GNU General Public License (GPL) version 2 + * + * Author: Linus Walleij + * Author: Johan Lundin + * Author: Jonas Aaberg + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "mmc.h" + +struct mmci_card_event { + struct input_dev *mmc_input; + int mmc_inserted; + struct work_struct workq; + struct mmc_platform_data mmc0_plat_data; +}; + +static unsigned int mmc_status(struct device *dev) +{ + struct mmci_card_event *mmci_card = container_of( + dev->platform_data, + struct mmci_card_event, mmc0_plat_data); + + return mmci_card->mmc_inserted; +} + +/* + * Here follows a large chunk of code which will only be enabled if you + * have both the AB3100 chip mounted and the MMC subsystem activated. + */ + +static u32 mmc_translate_vdd(struct device *dev, unsigned int voltage) +{ + int v; + + /* + * MMC Spec: + * bit 7: 1.70 - 1.95V + * bit 8 - 14: 2.0 - 2.6V + * bit 15 - 23: 2.7 - 3.6V + * + * ab3100 voltages: + * 000 - 2.85V + * 001 - 2.75V + * 010 - 1.8V + * 011 - 1.5V + */ + switch (voltage) { + case 8: + v = 3; + break; + case 9: + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + v = 1; + break; + case 16: + v = 1; + break; + case 17: + case 18: + case 19: + case 20: + case 21: + case 22: + case 23: + case 24: + v = 0; + break; + default: + v = 0; + break; + } + + /* PL180 voltage register bits */ + return v << 2; +} + + + +static int mmci_callback(void *data) +{ + struct mmci_card_event *mmci_card = data; + + disable_irq_on_gpio_pin(U300_GPIO_PIN_MMC_CD); + schedule_work(&mmci_card->workq); + + return 0; +} + + +static ssize_t gpio_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct mmci_card_event *mmci_card = container_of( + dev->platform_data, + struct mmci_card_event, mmc0_plat_data); + + + return sprintf(buf, "%d\n", !mmci_card->mmc_inserted); +} + +static DEVICE_ATTR(mmc_inserted, S_IRUGO, gpio_show, NULL); + +static void _mmci_callback(struct work_struct *ws) +{ + + struct mmci_card_event *mmci_card = container_of( + ws, + struct mmci_card_event, workq); + + mdelay(20); + + mmci_card->mmc_inserted = !!gpio_get_value(U300_GPIO_PIN_MMC_CD); + + input_report_switch(mmci_card->mmc_input, KEY_INSERT, + !mmci_card->mmc_inserted); + input_sync(mmci_card->mmc_input); + + pr_debug("MMC/SD card was %s\n", + mmci_card->mmc_inserted ? "removed" : "inserted"); + + enable_irq_on_gpio_pin(U300_GPIO_PIN_MMC_CD, !mmci_card->mmc_inserted); +} + +int __devinit mmc_init(struct amba_device *adev) +{ + struct mmci_card_event *mmci_card; + struct device *mmcsd_device = &adev->dev; + int ret = 0; + + mmci_card = kzalloc(sizeof(struct mmci_card_event), GFP_KERNEL); + if (!mmci_card) + return -ENOMEM; + + /* Nominally 2.85V on our platform */ + mmci_card->mmc0_plat_data.ocr_mask = MMC_VDD_28_29; + mmci_card->mmc0_plat_data.translate_vdd = mmc_translate_vdd; + mmci_card->mmc0_plat_data.status = mmc_status; + + mmcsd_device->platform_data = (void *) &mmci_card->mmc0_plat_data; + + INIT_WORK(&mmci_card->workq, _mmci_callback); + + ret = gpio_request(U300_GPIO_PIN_MMC_CD, "MMC card detection"); + if (ret) { + printk(KERN_CRIT "Could not allocate MMC card detection " \ + "GPIO pin\n"); + goto out; + } + + ret = gpio_direction_input(U300_GPIO_PIN_MMC_CD); + if (ret) { + printk(KERN_CRIT "Invalid GPIO pin requested\n"); + goto out; + } + + ret = sysfs_create_file(&mmcsd_device->kobj, + &dev_attr_mmc_inserted.attr); + if (ret) + goto out; + + mmci_card->mmc_input = input_allocate_device(); + if (!mmci_card->mmc_input) { + printk(KERN_CRIT "Could not allocate MMC input device\n"); + return -ENOMEM; + } + + mmci_card->mmc_input->name = "MMC insert notification"; + mmci_card->mmc_input->id.bustype = BUS_HOST; + mmci_card->mmc_input->id.vendor = 0; + mmci_card->mmc_input->id.product = 0; + mmci_card->mmc_input->id.version = 0x0100; + mmci_card->mmc_input->dev.parent = mmcsd_device; + input_set_capability(mmci_card->mmc_input, EV_SW, KEY_INSERT); + + /* + * Since this must always be compiled into the kernel, this input + * is never unregistered or free:ed. + */ + ret = input_register_device(mmci_card->mmc_input); + if (ret) { + input_free_device(mmci_card->mmc_input); + goto out; + } + + input_set_drvdata(mmci_card->mmc_input, mmci_card); + + ret = gpio_register_callback(U300_GPIO_PIN_MMC_CD, mmci_callback, + mmci_card); + + schedule_work(&mmci_card->workq); + + printk(KERN_INFO "Registered MMC insert/remove notification\n"); +out: + return ret; +} diff --git a/arch/arm/mach-u300/mmc.h b/arch/arm/mach-u300/mmc.h new file mode 100644 index 0000000..92b8512 --- /dev/null +++ b/arch/arm/mach-u300/mmc.h @@ -0,0 +1,18 @@ +/* + * + * arch/arm/mach-u300/mmc.h + * + * + * Copyright (C) 2009 ST-Ericsson AB + * License terms: GNU General Public License (GPL) version 2 + * + * Author: Jonas Aaberg + */ +#ifndef MMC_H +#define MMC_H + +#include + +int __devinit mmc_init(struct amba_device *adev); + +#endif diff --git a/arch/arm/mach-u300/timer.c b/arch/arm/mach-u300/timer.c new file mode 100644 index 0000000..57b5351 --- /dev/null +++ b/arch/arm/mach-u300/timer.c @@ -0,0 +1,422 @@ +/* + * + * arch/arm/mach-u300/timer.c + * + * + * Copyright (C) 2007-2009 ST-Ericsson AB + * License terms: GNU General Public License (GPL) version 2 + * Timer COH 901 328, runs the OS timer interrupt. + * Author: Linus Walleij + */ +#include +#include +#include +#include +#include +#include +#include + +#include + +/* Generic stuff */ +#include +#include +#include + +#include "clock.h" + +/* + * APP side special timer registers + * This timer contains four timers which can fire an interrupt each. + * OS (operating system) timer @ 32768 Hz + * DD (device driver) timer @ 1 kHz + * GP1 (general purpose 1) timer @ 1MHz + * GP2 (general purpose 2) timer @ 1MHz + */ + +/* Reset OS Timer 32bit (-/W) */ +#define U300_TIMER_APP_ROST (0x0000) +#define U300_TIMER_APP_ROST_TIMER_RESET (0x00000000) +/* Enable OS Timer 32bit (-/W) */ +#define U300_TIMER_APP_EOST (0x0004) +#define U300_TIMER_APP_EOST_TIMER_ENABLE (0x00000000) +/* Disable OS Timer 32bit (-/W) */ +#define U300_TIMER_APP_DOST (0x0008) +#define U300_TIMER_APP_DOST_TIMER_DISABLE (0x00000000) +/* OS Timer Mode Register 32bit (-/W) */ +#define U300_TIMER_APP_SOSTM (0x000c) +#define U300_TIMER_APP_SOSTM_MODE_CONTINUOUS (0x00000000) +#define U300_TIMER_APP_SOSTM_MODE_ONE_SHOT (0x00000001) +/* OS Timer Status Register 32bit (R/-) */ +#define U300_TIMER_APP_OSTS (0x0010) +#define U300_TIMER_APP_OSTS_TIMER_STATE_MASK (0x0000000F) +#define U300_TIMER_APP_OSTS_TIMER_STATE_IDLE (0x00000001) +#define U300_TIMER_APP_OSTS_TIMER_STATE_ACTIVE (0x00000002) +#define U300_TIMER_APP_OSTS_ENABLE_IND (0x00000010) +#define U300_TIMER_APP_OSTS_MODE_MASK (0x00000020) +#define U300_TIMER_APP_OSTS_MODE_CONTINUOUS (0x00000000) +#define U300_TIMER_APP_OSTS_MODE_ONE_SHOT (0x00000020) +#define U300_TIMER_APP_OSTS_IRQ_ENABLED_IND (0x00000040) +#define U300_TIMER_APP_OSTS_IRQ_PENDING_IND (0x00000080) +/* OS Timer Current Count Register 32bit (R/-) */ +#define U300_TIMER_APP_OSTCC (0x0014) +/* OS Timer Terminal Count Register 32bit (R/W) */ +#define U300_TIMER_APP_OSTTC (0x0018) +/* OS Timer Interrupt Enable Register 32bit (-/W) */ +#define U300_TIMER_APP_OSTIE (0x001c) +#define U300_TIMER_APP_OSTIE_IRQ_DISABLE (0x00000000) +#define U300_TIMER_APP_OSTIE_IRQ_ENABLE (0x00000001) +/* OS Timer Interrupt Acknowledge Register 32bit (-/W) */ +#define U300_TIMER_APP_OSTIA (0x0020) +#define U300_TIMER_APP_OSTIA_IRQ_ACK (0x00000080) + +/* Reset DD Timer 32bit (-/W) */ +#define U300_TIMER_APP_RDDT (0x0040) +#define U300_TIMER_APP_RDDT_TIMER_RESET (0x00000000) +/* Enable DD Timer 32bit (-/W) */ +#define U300_TIMER_APP_EDDT (0x0044) +#define U300_TIMER_APP_EDDT_TIMER_ENABLE (0x00000000) +/* Disable DD Timer 32bit (-/W) */ +#define U300_TIMER_APP_DDDT (0x0048) +#define U300_TIMER_APP_DDDT_TIMER_DISABLE (0x00000000) +/* DD Timer Mode Register 32bit (-/W) */ +#define U300_TIMER_APP_SDDTM (0x004c) +#define U300_TIMER_APP_SDDTM_MODE_CONTINUOUS (0x00000000) +#define U300_TIMER_APP_SDDTM_MODE_ONE_SHOT (0x00000001) +/* DD Timer Status Register 32bit (R/-) */ +#define U300_TIMER_APP_DDTS (0x0050) +#define U300_TIMER_APP_DDTS_TIMER_STATE_MASK (0x0000000F) +#define U300_TIMER_APP_DDTS_TIMER_STATE_IDLE (0x00000001) +#define U300_TIMER_APP_DDTS_TIMER_STATE_ACTIVE (0x00000002) +#define U300_TIMER_APP_DDTS_ENABLE_IND (0x00000010) +#define U300_TIMER_APP_DDTS_MODE_MASK (0x00000020) +#define U300_TIMER_APP_DDTS_MODE_CONTINUOUS (0x00000000) +#define U300_TIMER_APP_DDTS_MODE_ONE_SHOT (0x00000020) +#define U300_TIMER_APP_DDTS_IRQ_ENABLED_IND (0x00000040) +#define U300_TIMER_APP_DDTS_IRQ_PENDING_IND (0x00000080) +/* DD Timer Current Count Register 32bit (R/-) */ +#define U300_TIMER_APP_DDTCC (0x0054) +/* DD Timer Terminal Count Register 32bit (R/W) */ +#define U300_TIMER_APP_DDTTC (0x0058) +/* DD Timer Interrupt Enable Register 32bit (-/W) */ +#define U300_TIMER_APP_DDTIE (0x005c) +#define U300_TIMER_APP_DDTIE_IRQ_DISABLE (0x00000000) +#define U300_TIMER_APP_DDTIE_IRQ_ENABLE (0x00000001) +/* DD Timer Interrupt Acknowledge Register 32bit (-/W) */ +#define U300_TIMER_APP_DDTIA (0x0060) +#define U300_TIMER_APP_DDTIA_IRQ_ACK (0x00000080) + +/* Reset GP1 Timer 32bit (-/W) */ +#define U300_TIMER_APP_RGPT1 (0x0080) +#define U300_TIMER_APP_RGPT1_TIMER_RESET (0x00000000) +/* Enable GP1 Timer 32bit (-/W) */ +#define U300_TIMER_APP_EGPT1 (0x0084) +#define U300_TIMER_APP_EGPT1_TIMER_ENABLE (0x00000000) +/* Disable GP1 Timer 32bit (-/W) */ +#define U300_TIMER_APP_DGPT1 (0x0088) +#define U300_TIMER_APP_DGPT1_TIMER_DISABLE (0x00000000) +/* GP1 Timer Mode Register 32bit (-/W) */ +#define U300_TIMER_APP_SGPT1M (0x008c) +#define U300_TIMER_APP_SGPT1M_MODE_CONTINUOUS (0x00000000) +#define U300_TIMER_APP_SGPT1M_MODE_ONE_SHOT (0x00000001) +/* GP1 Timer Status Register 32bit (R/-) */ +#define U300_TIMER_APP_GPT1S (0x0090) +#define U300_TIMER_APP_GPT1S_TIMER_STATE_MASK (0x0000000F) +#define U300_TIMER_APP_GPT1S_TIMER_STATE_IDLE (0x00000001) +#define U300_TIMER_APP_GPT1S_TIMER_STATE_ACTIVE (0x00000002) +#define U300_TIMER_APP_GPT1S_ENABLE_IND (0x00000010) +#define U300_TIMER_APP_GPT1S_MODE_MASK (0x00000020) +#define U300_TIMER_APP_GPT1S_MODE_CONTINUOUS (0x00000000) +#define U300_TIMER_APP_GPT1S_MODE_ONE_SHOT (0x00000020) +#define U300_TIMER_APP_GPT1S_IRQ_ENABLED_IND (0x00000040) +#define U300_TIMER_APP_GPT1S_IRQ_PENDING_IND (0x00000080) +/* GP1 Timer Current Count Register 32bit (R/-) */ +#define U300_TIMER_APP_GPT1CC (0x0094) +/* GP1 Timer Terminal Count Register 32bit (R/W) */ +#define U300_TIMER_APP_GPT1TC (0x0098) +/* GP1 Timer Interrupt Enable Register 32bit (-/W) */ +#define U300_TIMER_APP_GPT1IE (0x009c) +#define U300_TIMER_APP_GPT1IE_IRQ_DISABLE (0x00000000) +#define U300_TIMER_APP_GPT1IE_IRQ_ENABLE (0x00000001) +/* GP1 Timer Interrupt Acknowledge Register 32bit (-/W) */ +#define U300_TIMER_APP_GPT1IA (0x00a0) +#define U300_TIMER_APP_GPT1IA_IRQ_ACK (0x00000080) + +/* Reset GP2 Timer 32bit (-/W) */ +#define U300_TIMER_APP_RGPT2 (0x00c0) +#define U300_TIMER_APP_RGPT2_TIMER_RESET (0x00000000) +/* Enable GP2 Timer 32bit (-/W) */ +#define U300_TIMER_APP_EGPT2 (0x00c4) +#define U300_TIMER_APP_EGPT2_TIMER_ENABLE (0x00000000) +/* Disable GP2 Timer 32bit (-/W) */ +#define U300_TIMER_APP_DGPT2 (0x00c8) +#define U300_TIMER_APP_DGPT2_TIMER_DISABLE (0x00000000) +/* GP2 Timer Mode Register 32bit (-/W) */ +#define U300_TIMER_APP_SGPT2M (0x00cc) +#define U300_TIMER_APP_SGPT2M_MODE_CONTINUOUS (0x00000000) +#define U300_TIMER_APP_SGPT2M_MODE_ONE_SHOT (0x00000001) +/* GP2 Timer Status Register 32bit (R/-) */ +#define U300_TIMER_APP_GPT2S (0x00d0) +#define U300_TIMER_APP_GPT2S_TIMER_STATE_MASK (0x0000000F) +#define U300_TIMER_APP_GPT2S_TIMER_STATE_IDLE (0x00000001) +#define U300_TIMER_APP_GPT2S_TIMER_STATE_ACTIVE (0x00000002) +#define U300_TIMER_APP_GPT2S_ENABLE_IND (0x00000010) +#define U300_TIMER_APP_GPT2S_MODE_MASK (0x00000020) +#define U300_TIMER_APP_GPT2S_MODE_CONTINUOUS (0x00000000) +#define U300_TIMER_APP_GPT2S_MODE_ONE_SHOT (0x00000020) +#define U300_TIMER_APP_GPT2S_IRQ_ENABLED_IND (0x00000040) +#define U300_TIMER_APP_GPT2S_IRQ_PENDING_IND (0x00000080) +/* GP2 Timer Current Count Register 32bit (R/-) */ +#define U300_TIMER_APP_GPT2CC (0x00d4) +/* GP2 Timer Terminal Count Register 32bit (R/W) */ +#define U300_TIMER_APP_GPT2TC (0x00d8) +/* GP2 Timer Interrupt Enable Register 32bit (-/W) */ +#define U300_TIMER_APP_GPT2IE (0x00dc) +#define U300_TIMER_APP_GPT2IE_IRQ_DISABLE (0x00000000) +#define U300_TIMER_APP_GPT2IE_IRQ_ENABLE (0x00000001) +/* GP2 Timer Interrupt Acknowledge Register 32bit (-/W) */ +#define U300_TIMER_APP_GPT2IA (0x00e0) +#define U300_TIMER_APP_GPT2IA_IRQ_ACK (0x00000080) + +/* Clock request control register - all four timers */ +#define U300_TIMER_APP_CRC (0x100) +#define U300_TIMER_APP_CRC_CLOCK_REQUEST_ENABLE (0x00000001) + +#define TICKS_PER_JIFFY ((CLOCK_TICK_RATE + (HZ/2)) / HZ) +#define US_PER_TICK ((1000000 + (HZ/2)) / HZ) + +/* + * The u300_set_mode() function is always called first, if we + * have oneshot timer active, the oneshot scheduling function + * u300_set_next_event() is called immediately after. + */ +static void u300_set_mode(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + switch (mode) { + case CLOCK_EVT_MODE_PERIODIC: + /* Disable interrupts on GPT1 */ + writel(U300_TIMER_APP_GPT1IE_IRQ_DISABLE, + U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT1IE); + /* Disable GP1 while we're reprogramming it. */ + writel(U300_TIMER_APP_DGPT1_TIMER_DISABLE, + U300_TIMER_APP_VBASE + U300_TIMER_APP_DGPT1); + /* + * Set the periodic mode to a certain number of ticks per + * jiffy. + */ + writel(TICKS_PER_JIFFY, + U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT1TC); + /* + * Set continuous mode, so the timer keeps triggering + * interrupts. + */ + writel(U300_TIMER_APP_SGPT1M_MODE_CONTINUOUS, + U300_TIMER_APP_VBASE + U300_TIMER_APP_SGPT1M); + /* Enable timer interrupts */ + writel(U300_TIMER_APP_GPT1IE_IRQ_ENABLE, + U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT1IE); + /* Then enable the OS timer again */ + writel(U300_TIMER_APP_EGPT1_TIMER_ENABLE, + U300_TIMER_APP_VBASE + U300_TIMER_APP_EGPT1); + break; + case CLOCK_EVT_MODE_ONESHOT: + /* Just break; here? */ + /* + * The actual event will be programmed by the next event hook, + * so we just set a dummy value somewhere at the end of the + * universe here. + */ + /* Disable interrupts on GPT1 */ + writel(U300_TIMER_APP_GPT1IE_IRQ_DISABLE, + U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT1IE); + /* Disable GP1 while we're reprogramming it. */ + writel(U300_TIMER_APP_DGPT1_TIMER_DISABLE, + U300_TIMER_APP_VBASE + U300_TIMER_APP_DGPT1); + /* + * Expire far in the future, u300_set_next_event() will be + * called soon... + */ + writel(0xFFFFFFFF, U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT1TC); + /* We run one shot per tick here! */ + writel(U300_TIMER_APP_SGPT1M_MODE_ONE_SHOT, + U300_TIMER_APP_VBASE + U300_TIMER_APP_SGPT1M); + /* Enable interrupts for this timer */ + writel(U300_TIMER_APP_GPT1IE_IRQ_ENABLE, + U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT1IE); + /* Enable timer */ + writel(U300_TIMER_APP_EGPT1_TIMER_ENABLE, + U300_TIMER_APP_VBASE + U300_TIMER_APP_EGPT1); + break; + case CLOCK_EVT_MODE_UNUSED: + case CLOCK_EVT_MODE_SHUTDOWN: + /* Disable interrupts on GP1 */ + writel(U300_TIMER_APP_GPT1IE_IRQ_DISABLE, + U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT1IE); + /* Disable GP1 */ + writel(U300_TIMER_APP_DGPT1_TIMER_DISABLE, + U300_TIMER_APP_VBASE + U300_TIMER_APP_DGPT1); + break; + case CLOCK_EVT_MODE_RESUME: + /* Ignore this call */ + break; + } +} + +/* + * The app timer in one shot mode obviously has to be reprogrammed + * in EXACTLY this sequence to work properly. Do NOT try to e.g. replace + * the interrupt disable + timer disable commands with a reset command, + * it will fail miserably. Apparently (and I found this the hard way) + * the timer is very sensitive to the instruction order, though you don't + * get that impression from the data sheet. + */ +static int u300_set_next_event(unsigned long cycles, + struct clock_event_device *evt) + +{ + /* Disable interrupts on GPT1 */ + writel(U300_TIMER_APP_GPT1IE_IRQ_DISABLE, + U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT1IE); + /* Disable GP1 while we're reprogramming it. */ + writel(U300_TIMER_APP_DGPT1_TIMER_DISABLE, + U300_TIMER_APP_VBASE + U300_TIMER_APP_DGPT1); + /* Reset the General Purpose timer 1. */ + writel(U300_TIMER_APP_RGPT1_TIMER_RESET, + U300_TIMER_APP_VBASE + U300_TIMER_APP_RGPT1); + /* IRQ in n * cycles */ + writel(cycles, U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT1TC); + /* + * We run one shot per tick here! (This is necessary to reconfigure, + * the timer will tilt if you don't!) + */ + writel(U300_TIMER_APP_SGPT1M_MODE_ONE_SHOT, + U300_TIMER_APP_VBASE + U300_TIMER_APP_SGPT1M); + /* Enable timer interrupts */ + writel(U300_TIMER_APP_GPT1IE_IRQ_ENABLE, + U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT1IE); + /* Then enable the OS timer again */ + writel(U300_TIMER_APP_EGPT1_TIMER_ENABLE, + U300_TIMER_APP_VBASE + U300_TIMER_APP_EGPT1); + return 0; +} + + +/* Use general purpose timer 1 as clock event */ +static struct clock_event_device clockevent_u300_1mhz = { + .name = "GPT1", + .rating = 300, /* Reasonably fast and accurate clock event */ + .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, + /* 22 calculated using the algorithm in arch/mips/kernel/time.c */ + .shift = 22, + .set_next_event = u300_set_next_event, + .set_mode = u300_set_mode, +}; + +/* Clock event timer interrupt handler */ +static irqreturn_t u300_timer_interrupt(int irq, void *dev_id) +{ + struct clock_event_device *evt = &clockevent_u300_1mhz; + /* ACK/Clear timer IRQ for the APP GPT1 Timer */ + writel(U300_TIMER_APP_GPT1IA_IRQ_ACK, + U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT1IA); + evt->event_handler(evt); + return IRQ_HANDLED; +} + +static struct irqaction u300_timer_irq = { + .name = "U300 Timer Tick", + .flags = IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL, + .handler = u300_timer_interrupt, +}; + +/* Use general purpose timer 2 as clock source */ +static cycle_t u300_get_cycles(void) +{ + return (cycles_t) readl(U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT2CC); +} + +static struct clocksource clocksource_u300_1mhz = { + .name = "GPT2", + .rating = 300, /* Reasonably fast and accurate clock source */ + .read = u300_get_cycles, + .mask = CLOCKSOURCE_MASK(32), /* 32 bits */ + /* 22 calculated using the algorithm in arch/mips/kernel/time.c */ + .shift = 22, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + + +/* + * This sets up the system timers, clock source and clock event. + */ +static void __init u300_timer_init(void) +{ + u300_enable_timer_clock(); + /* + * Disable the "OS" and "DD" timers - these are designed for Symbian! + * Example usage in cnh1601578 cpu subsystem pd_timer_app.c + */ + writel(U300_TIMER_APP_CRC_CLOCK_REQUEST_ENABLE, + U300_TIMER_APP_VBASE + U300_TIMER_APP_CRC); + writel(U300_TIMER_APP_ROST_TIMER_RESET, + U300_TIMER_APP_VBASE + U300_TIMER_APP_ROST); + writel(U300_TIMER_APP_DOST_TIMER_DISABLE, + U300_TIMER_APP_VBASE + U300_TIMER_APP_DOST); + writel(U300_TIMER_APP_RDDT_TIMER_RESET, + U300_TIMER_APP_VBASE + U300_TIMER_APP_RDDT); + writel(U300_TIMER_APP_DDDT_TIMER_DISABLE, + U300_TIMER_APP_VBASE + U300_TIMER_APP_DDDT); + + /* Reset the General Purpose timer 1. */ + writel(U300_TIMER_APP_RGPT1_TIMER_RESET, + U300_TIMER_APP_VBASE + U300_TIMER_APP_RGPT1); + + /* Set up the IRQ handler */ + setup_irq(IRQ_U300_TIMER_APP_GP1, &u300_timer_irq); + + /* Reset the General Purpose timer 2 */ + writel(U300_TIMER_APP_RGPT2_TIMER_RESET, + U300_TIMER_APP_VBASE + U300_TIMER_APP_RGPT2); + /* Set this timer to run around forever */ + writel(0xFFFFFFFFU, U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT2TC); + /* Set continuous mode so it wraps around */ + writel(U300_TIMER_APP_SGPT2M_MODE_CONTINUOUS, + U300_TIMER_APP_VBASE + U300_TIMER_APP_SGPT2M); + /* Disable timer interrupts */ + writel(U300_TIMER_APP_GPT2IE_IRQ_DISABLE, + U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT2IE); + /* Then enable the GP2 timer to use as a free running us counter */ + writel(U300_TIMER_APP_EGPT2_TIMER_ENABLE, + U300_TIMER_APP_VBASE + U300_TIMER_APP_EGPT2); + + /* This is a pure microsecond clock source */ + clocksource_u300_1mhz.mult = + clocksource_khz2mult(1000, clocksource_u300_1mhz.shift); + if (clocksource_register(&clocksource_u300_1mhz)) + printk(KERN_ERR "timer: failed to initialize clock " + "source %s\n", clocksource_u300_1mhz.name); + + clockevent_u300_1mhz.mult = + div_sc(1000000, NSEC_PER_SEC, clockevent_u300_1mhz.shift); + /* 32bit counter, so 32bits delta is max */ + clockevent_u300_1mhz.max_delta_ns = + clockevent_delta2ns(0xffffffff, &clockevent_u300_1mhz); + /* This timer is slow enough to set for 1 cycle == 1 MHz */ + clockevent_u300_1mhz.min_delta_ns = + clockevent_delta2ns(1, &clockevent_u300_1mhz); + clockevent_u300_1mhz.cpumask = cpumask_of(0); + clockevents_register_device(&clockevent_u300_1mhz); + /* + * TODO: init and register the rest of the timers too, they can be + * used by hrtimers! + */ +} + +/* + * Very simple system timer that only register the clock event and + * clock source. + */ +struct sys_timer u300_timer = { + .init = u300_timer_init, +}; diff --git a/arch/arm/mach-u300/u300.c b/arch/arm/mach-u300/u300.c new file mode 100644 index 0000000..d2a0b88 --- /dev/null +++ b/arch/arm/mach-u300/u300.c @@ -0,0 +1,55 @@ +/* + * + * arch/arm/mach-u300/u300.c + * + * + * Copyright (C) 2006-2009 ST-Ericsson AB + * License terms: GNU General Public License (GPL) version 2 + * Platform machine definition. + * Author: Linus Walleij + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void __init u300_init_machine(void) +{ + u300_init_devices(); +} + +#ifdef CONFIG_MACH_U300_BS2X +#define MACH_U300_STRING "Ericsson AB U300 S25/S26/B25/B26 Prototype Board" +#endif + +#ifdef CONFIG_MACH_U300_BS330 +#define MACH_U300_STRING "Ericsson AB U330 S330/B330 Prototype Board" +#endif + +#ifdef CONFIG_MACH_U300_BS335 +#define MACH_U300_STRING "Ericsson AB U335 S335/B335 Prototype Board" +#endif + +#ifdef CONFIG_MACH_U300_BS365 +#define MACH_U300_STRING "Ericsson AB U365 S365/B365 Prototype Board" +#endif + +MACHINE_START(U300, MACH_U300_STRING) + /* Maintainer: Linus Walleij */ + .phys_io = U300_AHB_PER_PHYS_BASE, + .io_pg_offst = ((U300_AHB_PER_VIRT_BASE) >> 18) & 0xfffc, + .boot_params = BOOT_PARAMS_OFFSET, + .map_io = u300_map_io, + .init_irq = u300_init_irq, + .timer = &u300_timer, + .init_machine = u300_init_machine, +MACHINE_END -- cgit v0.10.2 From d98aac7592114241f378bc8d5b3e424cced7ded2 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 27 Apr 2009 10:21:46 +0100 Subject: [ARM] 5480/1: U300-v5 integrate into the ARM architecture This hooks the U300 support into Kbuild and makes a small hook in mmu.c for supporting an odd memory alignment with shared memory on these systems. This is rebased to RMK:s GIT HEAD. This patch tries to add the Kconfig option in alphabetic order by option text and the Makefile entry after config symbol. Signed-off-by: Linus Walleij Signed-off-by: Russell King diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index a930e5c..44456e1 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -594,6 +594,20 @@ config ARCH_LH7A40X core with a wide array of integrated devices for hand-held and low-power applications. +config ARCH_U300 + bool "ST-Ericsson U300 Series" + depends on MMU + select CPU_ARM926T + select ARM_AMBA + select ARM_VIC + select GENERIC_TIME + select GENERIC_CLOCKEVENTS + select HAVE_CLK + select COMMON_CLKDEV + select GENERIC_GPIO + help + Support for ST-Ericsson U300 series mobile platforms. + config ARCH_DAVINCI bool "TI DaVinci" select CPU_ARM926T @@ -705,6 +719,8 @@ source "arch/arm/mach-ks8695/Kconfig" source "arch/arm/mach-msm/Kconfig" +source "arch/arm/mach-u300/Kconfig" + source "arch/arm/mach-w90x900/Kconfig" # Definitions to make life easier diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 885a837..20084c5 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -149,6 +149,7 @@ machine-$(CONFIG_ARCH_S3C24A0) := s3c24a0 machine-$(CONFIG_ARCH_S3C64XX) := s3c6400 s3c6410 machine-$(CONFIG_ARCH_SA1100) := sa1100 machine-$(CONFIG_ARCH_SHARK) := shark +machine-$(CONFIG_ARCH_U300) := u300 machine-$(CONFIG_ARCH_VERSATILE) := versatile machine-$(CONFIG_ARCH_W90X900) := w90x900 machine-$(CONFIG_FOOTBRIDGE) := footbridge diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index e6344ec..39fca4e 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -839,6 +839,20 @@ void __init reserve_node_zero(pg_data_t *pgdat) reserve_bootmem_node(pgdat, 0xa0200000, 0x1000, BOOTMEM_EXCLUSIVE); + /* + * U300 - This platform family can share physical memory + * between two ARM cpus, one running Linux and the other + * running another OS. + */ + if (machine_is_u300()) { +#ifdef CONFIG_MACH_U300_SINGLE_RAM +#if ((CONFIG_MACH_U300_ACCESS_MEM_SIZE & 1) == 1) && \ + CONFIG_MACH_U300_2MB_ALIGNMENT_FIX + res_size = 0x00100000; +#endif +#endif + } + #ifdef CONFIG_SA1111 /* * Because of the SA1111 DMA bug, we want to preserve our -- cgit v0.10.2 From 6d0485a99366d4e0e7e725f14995c74cb7ca4499 Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Tue, 31 Mar 2009 17:13:15 +0100 Subject: [ARM] 5438/1: AT91: manage clock by functionality instead of CPUs In clock.c file the clock management is grouped by cpu with cpu_is_xxx() function. This lead to some kind of difficulties to read this file and maintainability issues as the number of AT91 cpus & PLLs/clocks is growing. In this patch, I try to group clock functionality together and match cpus with this functionality set. An update to at91_pmc.h is needed to cover some new PMC possibilities (and some update in comments). Signed-off-by: Nicolas Ferre Acked-by: Andrew Victor Signed-off-by: Russell King diff --git a/arch/arm/mach-at91/clock.c b/arch/arm/mach-at91/clock.c index e434510..bac578f 100644 --- a/arch/arm/mach-at91/clock.c +++ b/arch/arm/mach-at91/clock.c @@ -43,6 +43,25 @@ #define clk_is_sys(x) ((x)->type & CLK_TYPE_SYSTEM) +/* + * Chips have some kind of clocks : group them by functionality + */ +#define cpu_has_utmi() ( cpu_is_at91cap9() \ + || cpu_is_at91sam9rl()) + +#define cpu_has_800M_plla() (cpu_is_at91sam9g20()) + +#define cpu_has_pllb() (!cpu_is_at91sam9rl()) + +#define cpu_has_upll() (0) + +/* USB host HS & FS */ +#define cpu_has_uhp() (!cpu_is_at91sam9rl()) + +/* USB device FS only */ +#define cpu_has_udpfs() (!cpu_is_at91sam9rl()) + + static LIST_HEAD(clocks); static DEFINE_SPINLOCK(clk_lock); @@ -140,7 +159,7 @@ static struct clk utmi_clk = { }; static struct clk uhpck = { .name = "uhpck", - .parent = &pllb, + /*.parent = ... we choose parent at runtime */ .mode = pmc_sys_mode, }; @@ -173,7 +192,11 @@ static struct clk __init *at91_css_to_clk(unsigned long css) case AT91_PMC_CSS_PLLA: return &plla; case AT91_PMC_CSS_PLLB: - return &pllb; + if (cpu_has_upll()) + /* CSS_PLLB == CSS_UPLL */ + return &utmi_clk; + else if (cpu_has_pllb()) + return &pllb; } return NULL; @@ -322,7 +345,7 @@ int clk_set_rate(struct clk *clk, unsigned long rate) u32 pckr; pckr = at91_sys_read(AT91_PMC_PCKR(clk->id)); - pckr &= AT91_PMC_CSS_PLLB; /* clock selection */ + pckr &= AT91_PMC_CSS; /* clock selection */ pckr |= prescale << 2; at91_sys_write(AT91_PMC_PCKR(clk->id), pckr); clk->rate_hz = actual; @@ -361,7 +384,7 @@ int clk_set_parent(struct clk *clk, struct clk *parent) } EXPORT_SYMBOL(clk_set_parent); -/* establish PCK0..PCK3 parentage and rate */ +/* establish PCK0..PCKN parentage and rate */ static void __init init_programmable_clock(struct clk *clk) { struct clk *parent; @@ -389,11 +412,13 @@ static int at91_clk_show(struct seq_file *s, void *unused) seq_printf(s, "MOR = %8x\n", at91_sys_read(AT91_CKGR_MOR)); seq_printf(s, "MCFR = %8x\n", at91_sys_read(AT91_CKGR_MCFR)); seq_printf(s, "PLLA = %8x\n", at91_sys_read(AT91_CKGR_PLLAR)); - if (!cpu_is_at91sam9rl()) + if (cpu_has_pllb()) seq_printf(s, "PLLB = %8x\n", at91_sys_read(AT91_CKGR_PLLBR)); - if (cpu_is_at91cap9() || cpu_is_at91sam9rl()) + if (cpu_has_utmi()) seq_printf(s, "UCKR = %8x\n", uckr = at91_sys_read(AT91_CKGR_UCKR)); seq_printf(s, "MCKR = %8x\n", at91_sys_read(AT91_PMC_MCKR)); + if (cpu_has_upll()) + seq_printf(s, "USB = %8x\n", at91_sys_read(AT91_PMC_USB)); seq_printf(s, "SR = %8x\n", sr = at91_sys_read(AT91_PMC_SR)); seq_printf(s, "\n"); @@ -554,16 +579,60 @@ static struct clk *const standard_pmc_clocks[] __initdata = { &clk32k, &main_clk, &plla, - &pllb, - - /* PLLB children (USB) */ - &udpck, - &uhpck, /* MCK */ &mck }; +/* PLLB generated USB full speed clock init */ +static void __init at91_pllb_usbfs_clock_init(unsigned long main_clock) +{ + /* + * USB clock init: choose 48 MHz PLLB value, + * disable 48MHz clock during usb peripheral suspend. + * + * REVISIT: assumes MCK doesn't derive from PLLB! + */ + uhpck.parent = &pllb; + + at91_pllb_usb_init = at91_pll_calc(main_clock, 48000000 * 2) | AT91_PMC_USB96M; + pllb.rate_hz = at91_pll_rate(&pllb, main_clock, at91_pllb_usb_init); + if (cpu_is_at91rm9200()) { + uhpck.pmc_mask = AT91RM9200_PMC_UHP; + udpck.pmc_mask = AT91RM9200_PMC_UDP; + at91_sys_write(AT91_PMC_SCER, AT91RM9200_PMC_MCKUDP); + } else if (cpu_is_at91sam9260() || cpu_is_at91sam9261() || cpu_is_at91sam9263() || cpu_is_at91sam9g20()) { + uhpck.pmc_mask = AT91SAM926x_PMC_UHP; + udpck.pmc_mask = AT91SAM926x_PMC_UDP; + } else if (cpu_is_at91cap9()) { + uhpck.pmc_mask = AT91CAP9_PMC_UHP; + } + at91_sys_write(AT91_CKGR_PLLBR, 0); + + udpck.rate_hz = at91_usb_rate(&pllb, pllb.rate_hz, at91_pllb_usb_init); + uhpck.rate_hz = at91_usb_rate(&pllb, pllb.rate_hz, at91_pllb_usb_init); +} + +/* UPLL generated USB full speed clock init */ +static void __init at91_upll_usbfs_clock_init(unsigned long main_clock) +{ + /* + * USB clock init: choose 480 MHz from UPLL, + */ + unsigned int usbr = AT91_PMC_USBS_UPLL; + + /* Setup divider by 10 to reach 48 MHz */ + usbr |= ((10 - 1) << 8) & AT91_PMC_OHCIUSBDIV; + + at91_sys_write(AT91_PMC_USB, usbr); + + /* Now set uhpck values */ + uhpck.parent = &utmi_clk; + uhpck.pmc_mask = AT91SAM926x_PMC_UHP; + uhpck.rate_hz = utmi_clk.parent->rate_hz; + uhpck.rate_hz /= 1 + ((at91_sys_read(AT91_PMC_USB) & AT91_PMC_OHCIUSBDIV) >> 8); +} + int __init at91_clock_init(unsigned long main_clock) { unsigned tmp, freq, mckr; @@ -585,43 +654,37 @@ int __init at91_clock_init(unsigned long main_clock) /* report if PLLA is more than mildly overclocked */ plla.rate_hz = at91_pll_rate(&plla, main_clock, at91_sys_read(AT91_CKGR_PLLAR)); - if ((!cpu_is_at91sam9g20() && plla.rate_hz > 209000000) - || (cpu_is_at91sam9g20() && plla.rate_hz > 800000000)) + if ((!cpu_has_800M_plla() && plla.rate_hz > 209000000) + || (cpu_has_800M_plla() && plla.rate_hz > 800000000)) pr_info("Clocks: PLLA overclocked, %ld MHz\n", plla.rate_hz / 1000000); - /* - * USB clock init: choose 48 MHz PLLB value, - * disable 48MHz clock during usb peripheral suspend. - * - * REVISIT: assumes MCK doesn't derive from PLLB! - */ - at91_pllb_usb_init = at91_pll_calc(main_clock, 48000000 * 2) | AT91_PMC_USB96M; - pllb.rate_hz = at91_pll_rate(&pllb, main_clock, at91_pllb_usb_init); - if (cpu_is_at91rm9200()) { - uhpck.pmc_mask = AT91RM9200_PMC_UHP; - udpck.pmc_mask = AT91RM9200_PMC_UDP; - at91_sys_write(AT91_PMC_SCER, AT91RM9200_PMC_MCKUDP); - } else if (cpu_is_at91sam9260() || cpu_is_at91sam9261() || cpu_is_at91sam9263() || cpu_is_at91sam9g20()) { - uhpck.pmc_mask = AT91SAM926x_PMC_UHP; - udpck.pmc_mask = AT91SAM926x_PMC_UDP; - } else if (cpu_is_at91cap9()) { - uhpck.pmc_mask = AT91CAP9_PMC_UHP; + + if (cpu_has_upll() && !cpu_has_pllb()) { + /* setup UTMI clock as the fourth primary clock + * (instead of pllb) */ + utmi_clk.type |= CLK_TYPE_PRIMARY; + utmi_clk.id = 3; } - at91_sys_write(AT91_CKGR_PLLBR, 0); - udpck.rate_hz = at91_usb_rate(&pllb, pllb.rate_hz, at91_pllb_usb_init); - uhpck.rate_hz = at91_usb_rate(&pllb, pllb.rate_hz, at91_pllb_usb_init); /* * USB HS clock init */ - if (cpu_is_at91cap9() || cpu_is_at91sam9rl()) { + if (cpu_has_utmi()) /* * multiplier is hard-wired to 40 * (obtain the USB High Speed 480 MHz when input is 12 MHz) */ utmi_clk.rate_hz = 40 * utmi_clk.parent->rate_hz; - } + + /* + * USB FS clock init + */ + if (cpu_has_pllb()) + at91_pllb_usbfs_clock_init(main_clock); + if (cpu_has_upll()) + /* assumes that we choose UPLL for USB and not PLLA */ + at91_upll_usbfs_clock_init(main_clock); /* * MCK and CPU derive from one of those primary clocks. @@ -631,21 +694,31 @@ int __init at91_clock_init(unsigned long main_clock) mck.parent = at91_css_to_clk(mckr & AT91_PMC_CSS); freq = mck.parent->rate_hz; freq /= (1 << ((mckr & AT91_PMC_PRES) >> 2)); /* prescale */ - if (cpu_is_at91rm9200()) + if (cpu_is_at91rm9200()) { mck.rate_hz = freq / (1 + ((mckr & AT91_PMC_MDIV) >> 8)); /* mdiv */ - else if (cpu_is_at91sam9g20()) { + } else if (cpu_is_at91sam9g20()) { mck.rate_hz = (mckr & AT91_PMC_MDIV) ? freq / ((mckr & AT91_PMC_MDIV) >> 7) : freq; /* mdiv ; (x >> 7) = ((x >> 8) * 2) */ if (mckr & AT91_PMC_PDIV) freq /= 2; /* processor clock division */ - } else + } else { mck.rate_hz = freq / (1 << ((mckr & AT91_PMC_MDIV) >> 8)); /* mdiv */ + } /* Register the PMC's standard clocks */ for (i = 0; i < ARRAY_SIZE(standard_pmc_clocks); i++) list_add_tail(&standard_pmc_clocks[i]->node, &clocks); - if (cpu_is_at91cap9() || cpu_is_at91sam9rl()) + if (cpu_has_pllb()) + list_add_tail(&pllb.node, &clocks); + + if (cpu_has_uhp()) + list_add_tail(&uhpck.node, &clocks); + + if (cpu_has_udpfs()) + list_add_tail(&udpck.node, &clocks); + + if (cpu_has_utmi()) list_add_tail(&utmi_clk.node, &clocks); /* MCK and CPU clock are "always on" */ diff --git a/arch/arm/mach-at91/include/mach/at91_pmc.h b/arch/arm/mach-at91/include/mach/at91_pmc.h index 9561e33..64589ea 100644 --- a/arch/arm/mach-at91/include/mach/at91_pmc.h +++ b/arch/arm/mach-at91/include/mach/at91_pmc.h @@ -23,7 +23,7 @@ #define AT91_PMC_PCK (1 << 0) /* Processor Clock */ #define AT91RM9200_PMC_UDP (1 << 1) /* USB Devcice Port Clock [AT91RM9200 only] */ #define AT91RM9200_PMC_MCKUDP (1 << 2) /* USB Device Port Master Clock Automatic Disable on Suspend [AT91RM9200 only] */ -#define AT91CAP9_PMC_DDR (1 << 2) /* DDR Clock [AT91CAP9 revC only] */ +#define AT91CAP9_PMC_DDR (1 << 2) /* DDR Clock [CAP9 revC & some SAM9 only] */ #define AT91RM9200_PMC_UHP (1 << 4) /* USB Host Port Clock [AT91RM9200 only] */ #define AT91SAM926x_PMC_UHP (1 << 6) /* USB Host Port Clock [AT91SAM926x only] */ #define AT91CAP9_PMC_UHP (1 << 6) /* USB Host Port Clock [AT91CAP9 only] */ @@ -39,11 +39,11 @@ #define AT91_PMC_PCDR (AT91_PMC + 0x14) /* Peripheral Clock Disable Register */ #define AT91_PMC_PCSR (AT91_PMC + 0x18) /* Peripheral Clock Status Register */ -#define AT91_CKGR_UCKR (AT91_PMC + 0x1C) /* UTMI Clock Register [SAM9RL, CAP9] */ +#define AT91_CKGR_UCKR (AT91_PMC + 0x1C) /* UTMI Clock Register [some SAM9, CAP9] */ #define AT91_PMC_UPLLEN (1 << 16) /* UTMI PLL Enable */ #define AT91_PMC_UPLLCOUNT (0xf << 20) /* UTMI PLL Start-up Time */ #define AT91_PMC_BIASEN (1 << 24) /* UTMI BIAS Enable */ -#define AT91_PMC_BIASCOUNT (0xf << 28) /* UTMI PLL Start-up Time */ +#define AT91_PMC_BIASCOUNT (0xf << 28) /* UTMI BIAS Start-up Time */ #define AT91_CKGR_MOR (AT91_PMC + 0x20) /* Main Oscillator Register [not on SAM9RL] */ #define AT91_PMC_MOSCEN (1 << 0) /* Main Oscillator Enable */ @@ -72,6 +72,7 @@ #define AT91_PMC_CSS_MAIN (1 << 0) #define AT91_PMC_CSS_PLLA (2 << 0) #define AT91_PMC_CSS_PLLB (3 << 0) +#define AT91_PMC_CSS_UPLL (3 << 0) /* [some SAM9 only] */ #define AT91_PMC_PRES (7 << 2) /* Master Clock Prescaler */ #define AT91_PMC_PRES_1 (0 << 2) #define AT91_PMC_PRES_2 (1 << 2) @@ -88,12 +89,25 @@ #define AT91SAM9_PMC_MDIV_1 (0 << 8) /* [SAM9,CAP9 only] */ #define AT91SAM9_PMC_MDIV_2 (1 << 8) #define AT91SAM9_PMC_MDIV_4 (2 << 8) -#define AT91SAM9_PMC_MDIV_6 (3 << 8) +#define AT91SAM9_PMC_MDIV_6 (3 << 8) /* [some SAM9 only] */ +#define AT91SAM9_PMC_MDIV_3 (3 << 8) /* [some SAM9 only] */ #define AT91_PMC_PDIV (1 << 12) /* Processor Clock Division [some SAM9 only] */ #define AT91_PMC_PDIV_1 (0 << 12) #define AT91_PMC_PDIV_2 (1 << 12) +#define AT91_PMC_PLLADIV2 (1 << 12) /* PLLA divisor by 2 [some SAM9 only] */ +#define AT91_PMC_PLLADIV2_OFF (0 << 12) +#define AT91_PMC_PLLADIV2_ON (1 << 12) -#define AT91_PMC_PCKR(n) (AT91_PMC + 0x40 + ((n) * 4)) /* Programmable Clock 0-3 Registers */ +#define AT91_PMC_USB (AT91_PMC + 0x38) /* USB Clock Register [some SAM9 only] */ +#define AT91_PMC_USBS (0x1 << 0) /* USB OHCI Input clock selection */ +#define AT91_PMC_USBS_PLLA (0 << 0) +#define AT91_PMC_USBS_UPLL (1 << 0) +#define AT91_PMC_OHCIUSBDIV (0xF << 8) /* Divider for USB OHCI Clock */ + +#define AT91_PMC_PCKR(n) (AT91_PMC + 0x40 + ((n) * 4)) /* Programmable Clock 0-N Registers */ +#define AT91_PMC_CSSMCK (0x1 << 8) /* CSS or Master Clock Selection */ +#define AT91_PMC_CSSMCK_CSS (0 << 8) +#define AT91_PMC_CSSMCK_MCK (1 << 8) #define AT91_PMC_IER (AT91_PMC + 0x60) /* Interrupt Enable Register */ #define AT91_PMC_IDR (AT91_PMC + 0x64) /* Interrupt Disable Register */ @@ -102,7 +116,7 @@ #define AT91_PMC_LOCKA (1 << 1) /* PLLA Lock */ #define AT91_PMC_LOCKB (1 << 2) /* PLLB Lock */ #define AT91_PMC_MCKRDY (1 << 3) /* Master Clock */ -#define AT91_PMC_LOCKU (1 << 6) /* UPLL Lock [AT91CAP9 only] */ +#define AT91_PMC_LOCKU (1 << 6) /* UPLL Lock [some SAM9, AT91CAP9 only] */ #define AT91_PMC_OSCSEL (1 << 7) /* Slow Clock Oscillator [AT91CAP9 revC only] */ #define AT91_PMC_PCK0RDY (1 << 8) /* Programmable Clock 0 */ #define AT91_PMC_PCK1RDY (1 << 9) /* Programmable Clock 1 */ -- cgit v0.10.2 From 5beae6efd1004b44c3e257dc96087978e4c763c1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 29 Apr 2009 00:16:21 -0400 Subject: tracing: fix ref count in splice pages The pages allocated for the splice binary buffer did not initialize the ref count correctly. This caused pages not to be freed and causes a drastic memory leak. Thanks to logdev I was able to trace the tracer to find where the leak was. [ Impact: stop memory leak when using splice ] Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5d704a4..9058240 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3531,6 +3531,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, if (!ref) break; + ref->ref = 1; ref->buffer = info->tr->buffer; ref->page = ring_buffer_alloc_read_page(ref->buffer); if (!ref->page) { -- cgit v0.10.2 From 93459c6cb9816c52200993d29dd18cea1daee335 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 29 Apr 2009 00:23:13 -0400 Subject: tracing: only add splice page if entries exist The splice code allocates a page even when the ring buffer is empty. It detects the ring buffer being empty when it it fails to copy anything from the ring buffer into the page. This patch adds a check to see if there is anything in the ring buffer before allocating a page. Thanks to logdev for letting me trace the tracer to find this. [ Impact: speed up due to removing unnecessary allocation ] Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9058240..0aeb3b9 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3508,7 +3508,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, .spd_release = buffer_spd_release, }; struct buffer_ref *ref; - int size, i; + int entries, size, i; size_t ret; if (*ppos & (PAGE_SIZE - 1)) { @@ -3523,7 +3523,9 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, len &= PAGE_MASK; } - for (i = 0; i < PIPE_BUFFERS && len; i++, len -= PAGE_SIZE) { + entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); + + for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) { struct page *page; int r; @@ -3564,6 +3566,8 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, spd.partial[i].private = (unsigned long)ref; spd.nr_pages++; *ppos += PAGE_SIZE; + + entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); } spd.nr_pages = i; -- cgit v0.10.2 From f2957f1f196b0217644a17c1379855a118a37d72 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 29 Apr 2009 00:26:30 -0400 Subject: tracing: have splice only copy full pages Splice works with pages, it is much more effecient to use an entire page than to copy bits over several pages. Using logdev to trace the internals of the splice mechanism, I was able to see that splice can be very aggressive. When tracing is occurring, and the reader caught up to the writer, and the writer is on the reader page, the reader will copy what is there into the splice page. Splice may iterate over several pages and if the writer is still writing to the page, the reader will keep copying bits to new pages to pass to userspace. This patch changes it to only pass data to userspace if the page is full (the writer has left the page). This has a small side effect that splice can not read a partial page, and must wait for the page to fill. This should not be an issue. If tracing has stopped, then a use of "read" will still read all of the page. [ Impact: better performance for ring buffer splice code ] Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0aeb3b9..f5427e0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3542,7 +3542,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, } r = ring_buffer_read_page(ref->buffer, &ref->page, - len, info->cpu, 0); + len, info->cpu, 1); if (r < 0) { ring_buffer_free_read_page(ref->buffer, ref->page); -- cgit v0.10.2 From 7d7d2b803159d4edeb051b0e5efbc1a8d9ef1c67 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 27 Apr 2009 12:37:49 -0400 Subject: ring-buffer: fix printk output The warning output in trace_recursive_lock uses %d for a long when it should be %ld. [ Impact: fix compile warning ] Signed-off-by: Steven Rostedt diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 9692f10..f4cc590 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1491,7 +1491,7 @@ static int trace_recursive_lock(void) /* Disable all tracing before we do anything else */ tracing_off_permanent(); - printk_once(KERN_WARNING "Tracing recursion: depth[%d]:" + printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:" "HC[%lu]:SC[%lu]:NMI[%lu]\n", current->trace_recursion, hardirq_count() >> HARDIRQ_SHIFT, -- cgit v0.10.2 From aee6a166a5401dcfcb17fcdc055e5edf2a4f4042 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Wed, 29 Apr 2009 09:47:17 +0200 Subject: x86: beautify vmlinux_32.lds.S Beautify vmlinux_32.lds.S: - Use tabs for indent - Located curly braces like in C code - Rearranged a few comments To see actual differences use "git diff -b" which ignore 'whitespace' changes. The beautification is done to prepare a unification of the _32 and _64 variants of the linker scripts. [ Impact: cleanup ] Signed-off-by: Sam Ravnborg Cc: Tim Abbott Cc: Linus Torvalds LKML-Reference: <1240991249-27117-1-git-send-email-sam@ravnborg.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index 62ad500..fffa45a 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -22,196 +22,220 @@ ENTRY(phys_startup_32) jiffies = jiffies_64; PHDRS { - text PT_LOAD FLAGS(5); /* R_E */ - data PT_LOAD FLAGS(7); /* RWE */ - note PT_NOTE FLAGS(0); /* ___ */ + text PT_LOAD FLAGS(5); /* R_E */ + data PT_LOAD FLAGS(7); /* RWE */ + note PT_NOTE FLAGS(0); /* ___ */ } SECTIONS { - . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR; - phys_startup_32 = startup_32 - LOAD_OFFSET; - - .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) { - _text = .; /* Text and read-only data */ - *(.text.head) - } :text = 0x9090 - - /* read-only */ - .text : AT(ADDR(.text) - LOAD_OFFSET) { - . = ALIGN(PAGE_SIZE); /* not really needed, already page aligned */ - *(.text.page_aligned) - TEXT_TEXT - SCHED_TEXT - LOCK_TEXT - KPROBES_TEXT - IRQENTRY_TEXT - *(.fixup) - *(.gnu.warning) - _etext = .; /* End of text section */ - } :text = 0x9090 - - NOTES :text :note - - . = ALIGN(16); /* Exception table */ - __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { - __start___ex_table = .; - *(__ex_table) - __stop___ex_table = .; - } :text = 0x9090 - - RODATA - - /* writeable */ - . = ALIGN(PAGE_SIZE); - .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */ - DATA_DATA - CONSTRUCTORS + . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR; + phys_startup_32 = startup_32 - LOAD_OFFSET; + + /* Text and read-only data */ + .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) { + _text = .; + *(.text.head) + } :text = 0x9090 + + /* read-only */ + .text : AT(ADDR(.text) - LOAD_OFFSET) { + /* not really needed, already page aligned */ + . = ALIGN(PAGE_SIZE); + *(.text.page_aligned) + TEXT_TEXT + SCHED_TEXT + LOCK_TEXT + KPROBES_TEXT + IRQENTRY_TEXT + *(.fixup) + *(.gnu.warning) + /* End of text section */ + _etext = .; + } :text = 0x9090 + + NOTES :text :note + + /* Exception table */ + . = ALIGN(16); + __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { + __start___ex_table = .; + *(__ex_table) + __stop___ex_table = .; + } :text = 0x9090 + + RODATA + + /* writeable */ + . = ALIGN(PAGE_SIZE); + /* Data */ + .data : AT(ADDR(.data) - LOAD_OFFSET) { + DATA_DATA + CONSTRUCTORS } :data - . = ALIGN(PAGE_SIZE); - .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { - __nosave_begin = .; - *(.data.nosave) - . = ALIGN(PAGE_SIZE); - __nosave_end = .; - } - - . = ALIGN(PAGE_SIZE); - .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { - *(.data.page_aligned) - *(.data.idt) - } - - . = ALIGN(32); - .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { - *(.data.cacheline_aligned) - } - - /* rarely changed data like cpu maps */ - . = ALIGN(32); - .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { - *(.data.read_mostly) - _edata = .; /* End of data section */ - } - - . = ALIGN(THREAD_SIZE); /* init_task */ - .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { - *(.data.init_task) - } - - /* might get freed after init */ - . = ALIGN(PAGE_SIZE); - .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { - __smp_locks = .; - *(.smp_locks) - __smp_locks_end = .; - } - /* will be freed after init - * Following ALIGN() is required to make sure no other data falls on the - * same page where __smp_alt_end is pointing as that page might be freed - * after boot. Always make sure that ALIGN() directive is present after - * the section which contains __smp_alt_end. - */ - . = ALIGN(PAGE_SIZE); - - /* will be freed after init */ - . = ALIGN(PAGE_SIZE); /* Init code and data */ - .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { - __init_begin = .; - _sinittext = .; - INIT_TEXT - _einittext = .; - } - .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { - INIT_DATA - } - . = ALIGN(16); - .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { - __setup_start = .; - *(.init.setup) - __setup_end = .; - } - .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { - __initcall_start = .; - INITCALLS - __initcall_end = .; - } - .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { - __con_initcall_start = .; - *(.con_initcall.init) - __con_initcall_end = .; - } - .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { - __x86_cpu_dev_start = .; - *(.x86_cpu_dev.init) - __x86_cpu_dev_end = .; - } - SECURITY_INIT - . = ALIGN(4); - .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { - __alt_instructions = .; - *(.altinstructions) - __alt_instructions_end = .; - } - .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { - *(.altinstr_replacement) - } - . = ALIGN(4); - .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { - __parainstructions = .; - *(.parainstructions) - __parainstructions_end = .; - } - /* .exit.text is discard at runtime, not link time, to deal with references - from .altinstructions and .eh_frame */ - .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { - EXIT_TEXT - } - .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { - EXIT_DATA - } + . = ALIGN(PAGE_SIZE); + .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { + __nosave_begin = .; + *(.data.nosave) + . = ALIGN(PAGE_SIZE); + __nosave_end = .; + } + + . = ALIGN(PAGE_SIZE); + .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { + *(.data.page_aligned) + *(.data.idt) + } + + . = ALIGN(32); + .data.cacheline_aligned : + AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { + *(.data.cacheline_aligned) + } + + /* rarely changed data like cpu maps */ + . = ALIGN(32); + .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { + *(.data.read_mostly) + + /* End of data section */ + _edata = .; + } + + /* init_task */ + . = ALIGN(THREAD_SIZE); + .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { + *(.data.init_task) + } + + . = ALIGN(PAGE_SIZE); + .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { + /* might get freed after init */ + __smp_locks = .; + *(.smp_locks) + __smp_locks_end = .; + } + /* will be freed after init + * Following ALIGN() is required to make sure no other data falls on the + * same page where __smp_alt_end is pointing as that page might be freed + * after boot. Always make sure that ALIGN() directive is present after + * the section which contains __smp_alt_end. + */ + . = ALIGN(PAGE_SIZE); + + /* Init code and data - will be freed after init */ + . = ALIGN(PAGE_SIZE); + .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { + __init_begin = .; + _sinittext = .; + INIT_TEXT + _einittext = .; + } + + .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { + INIT_DATA + } + + . = ALIGN(16); + .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { + __setup_start = .; + *(.init.setup) + __setup_end = .; + } + .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { + __initcall_start = .; + INITCALLS + __initcall_end = .; + } + + .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { + __con_initcall_start = .; + *(.con_initcall.init) + __con_initcall_end = .; + } + + .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { + __x86_cpu_dev_start = .; + *(.x86_cpu_dev.init) + __x86_cpu_dev_end = .; + } + + SECURITY_INIT + + . = ALIGN(4); + .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { + __alt_instructions = .; + *(.altinstructions) + __alt_instructions_end = .; + } + + .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { + *(.altinstr_replacement) + } + + . = ALIGN(4); + .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { + __parainstructions = .; + *(.parainstructions) + __parainstructions_end = .; + } + + /* + * .exit.text is discard at runtime, not link time, to deal with + * references from .altinstructions and .eh_frame + */ + .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { + EXIT_TEXT + } + + .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { + EXIT_DATA + } + #if defined(CONFIG_BLK_DEV_INITRD) - . = ALIGN(PAGE_SIZE); - .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { - __initramfs_start = .; - *(.init.ramfs) - __initramfs_end = .; - } + . = ALIGN(PAGE_SIZE); + .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { + __initramfs_start = .; + *(.init.ramfs) + __initramfs_end = .; + } #endif - PERCPU(PAGE_SIZE) - . = ALIGN(PAGE_SIZE); - /* freed after init ends here */ - - .bss : AT(ADDR(.bss) - LOAD_OFFSET) { - __init_end = .; - __bss_start = .; /* BSS */ - *(.bss.page_aligned) - *(.bss) - . = ALIGN(4); - __bss_stop = .; - } - .brk : AT(ADDR(.brk) - LOAD_OFFSET) { + PERCPU(PAGE_SIZE) + . = ALIGN(PAGE_SIZE); - __brk_base = . ; - . += 64 * 1024 ; /* 64k alignment slop space */ - *(.brk_reservation) /* areas brk users have reserved */ - __brk_limit = . ; - } + /* freed after init ends here */ + + /* BSS */ + .bss : AT(ADDR(.bss) - LOAD_OFFSET) { + __init_end = .; + __bss_start = .; + *(.bss.page_aligned) + *(.bss) + . = ALIGN(4); + __bss_stop = .; + } - .end : AT(ADDR(.end) - LOAD_OFFSET) { - _end = . ; - } + .brk : AT(ADDR(.brk) - LOAD_OFFSET) { + . = ALIGN(PAGE_SIZE); + __brk_base = .; + . += 64 * 1024; /* 64k alignment slop space */ + *(.brk_reservation) /* areas brk users have reserved */ + __brk_limit = .; + } - /* Sections to be discarded */ - /DISCARD/ : { - *(.exitcall.exit) - *(.discard) + .end : AT(ADDR(.end) - LOAD_OFFSET) { + _end = . ; } - STABS_DEBUG + /* Sections to be discarded */ + /DISCARD/ : { + *(.exitcall.exit) + *(.discard) + } - DWARF_DEBUG + STABS_DEBUG + DWARF_DEBUG } /* -- cgit v0.10.2 From 17ce265d6a1789eae5eb739a3bb7fcffdb3e87c5 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Wed, 29 Apr 2009 09:47:18 +0200 Subject: x86, vmlinux.lds: unify header/footer Merge everything except PHDRS and SECTIONS into vmlinux.lds.S. [ Impact: cleanup ] Signed-off-by: Sam Ravnborg Cc: Tim Abbott Cc: Linus Torvalds LKML-Reference: <1240991249-27117-2-git-send-email-sam@ravnborg.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 849ee61..d113642 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -1,5 +1,82 @@ +/* + * ld script for the x86 kernel + * + * Historic 32-bit version written by Martin Mares + * + * Modernisation and unification done by Sam Ravnborg + * + * + * Don't define absolute symbols until and unless you know that symbol + * value is should remain constant even if kernel image is relocated + * at run time. Absolute symbols are not relocated. If symbol value should + * change if kernel is relocated, make the symbol section relative and + * put it inside the section definition. + */ + +#ifdef CONFIG_X86_32 +#define LOAD_OFFSET __PAGE_OFFSET +#else +#define LOAD_OFFSET __START_KERNEL_map +#endif + +#include +#include +#include +#include +#include +#include + +#undef i386 /* in case the preprocessor is a 32bit one */ + +OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT) + +#ifdef CONFIG_X86_32 +OUTPUT_ARCH(i386) +ENTRY(phys_startup_32) +jiffies = jiffies_64; +#else +OUTPUT_ARCH(i386:x86-64) +ENTRY(phys_startup_64) +jiffies_64 = jiffies; +#endif + + #ifdef CONFIG_X86_32 # include "vmlinux_32.lds.S" #else # include "vmlinux_64.lds.S" #endif + + +#ifdef CONFIG_X86_32 +ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), + "kernel image bigger than KERNEL_IMAGE_SIZE") +#else +/* + * Per-cpu symbols which need to be offset from __per_cpu_load + * for the boot processor. + */ +#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load +INIT_PER_CPU(gdt_page); +INIT_PER_CPU(irq_stack_union); + +/* + * Build-time check on the image size: + */ +ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), + "kernel image bigger than KERNEL_IMAGE_SIZE") + +#ifdef CONFIG_SMP +ASSERT((per_cpu__irq_stack_union == 0), + "irq_stack_union is not at start of per-cpu area"); +#endif + +#endif /* CONFIG_X86_32 */ + +#ifdef CONFIG_KEXEC +#include + +ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, + "kexec control code size is too big") +#endif + diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index fffa45a..4c985fc 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -1,26 +1,3 @@ -/* ld script to make i386 Linux kernel - * Written by Martin Mares ; - * - * Don't define absolute symbols until and unless you know that symbol - * value is should remain constant even if kernel image is relocated - * at run time. Absolute symbols are not relocated. If symbol value should - * change if kernel is relocated, make the symbol section relative and - * put it inside the section definition. - */ - -#define LOAD_OFFSET __PAGE_OFFSET - -#include -#include -#include -#include -#include - -OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") -OUTPUT_ARCH(i386) -ENTRY(phys_startup_32) -jiffies = jiffies_64; - PHDRS { text PT_LOAD FLAGS(5); /* R_E */ data PT_LOAD FLAGS(7); /* RWE */ @@ -237,17 +214,3 @@ SECTIONS STABS_DEBUG DWARF_DEBUG } - -/* - * Build-time check on the image size: - */ -ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), - "kernel image bigger than KERNEL_IMAGE_SIZE") - -#ifdef CONFIG_KEXEC -/* Link time checks */ -#include - -ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, - "kexec control code size is too big") -#endif diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 6d5a5b0..7f1cc3d 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -1,19 +1,3 @@ -/* ld script to make x86-64 Linux kernel - * Written by Martin Mares ; - */ - -#define LOAD_OFFSET __START_KERNEL_map - -#include -#include -#include - -#undef i386 /* in case the preprocessor is a 32bit one */ - -OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64") -OUTPUT_ARCH(i386:x86-64) -ENTRY(phys_startup_64) -jiffies_64 = jiffies; PHDRS { text PT_LOAD FLAGS(5); /* R_E */ data PT_LOAD FLAGS(7); /* RWE */ @@ -308,29 +292,3 @@ SECTIONS STABS_DEBUG DWARF_DEBUG } - -/* - * Per-cpu symbols which need to be offset from __per_cpu_load - * for the boot processor. - */ -#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load -INIT_PER_CPU(gdt_page); -INIT_PER_CPU(irq_stack_union); - -/* - * Build-time check on the image size: - */ -ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), - "kernel image bigger than KERNEL_IMAGE_SIZE") - -#ifdef CONFIG_SMP -ASSERT((per_cpu__irq_stack_union == 0), - "irq_stack_union is not at start of per-cpu area"); -#endif - -#ifdef CONFIG_KEXEC -#include - -ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, - "kexec control code size is too big") -#endif -- cgit v0.10.2 From afb8095a7eab32e5760613fa73d2f80a39cc45bf Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Wed, 29 Apr 2009 09:47:19 +0200 Subject: x86, vmlinux.lds: unify PHDRS PHDRS are not equal for the two - so use ifdefs to cover up for that. On the assumption that they may become equal the ifdef is inside the PHDRS definiton. [ Impact: cleanup ] Signed-off-by: Sam Ravnborg Cc: Tim Abbott Cc: Linus Torvalds LKML-Reference: <1240991249-27117-3-git-send-email-sam@ravnborg.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index d113642..1a1b303 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -40,6 +40,19 @@ ENTRY(phys_startup_64) jiffies_64 = jiffies; #endif +PHDRS { + text PT_LOAD FLAGS(5); /* R_E */ + data PT_LOAD FLAGS(7); /* RWE */ +#ifdef CONFIG_X86_64 + user PT_LOAD FLAGS(7); /* RWE */ + data.init PT_LOAD FLAGS(7); /* RWE */ +#ifdef CONFIG_SMP + percpu PT_LOAD FLAGS(7); /* RWE */ +#endif + data.init2 PT_LOAD FLAGS(7); /* RWE */ +#endif + note PT_NOTE FLAGS(0); /* ___ */ +} #ifdef CONFIG_X86_32 # include "vmlinux_32.lds.S" diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index 4c985fc..4fd40dc 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -1,8 +1,3 @@ -PHDRS { - text PT_LOAD FLAGS(5); /* R_E */ - data PT_LOAD FLAGS(7); /* RWE */ - note PT_NOTE FLAGS(0); /* ___ */ -} SECTIONS { . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR; diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 7f1cc3d..6e7cbee 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -1,14 +1,3 @@ -PHDRS { - text PT_LOAD FLAGS(5); /* R_E */ - data PT_LOAD FLAGS(7); /* RWE */ - user PT_LOAD FLAGS(7); /* RWE */ - data.init PT_LOAD FLAGS(7); /* RWE */ -#ifdef CONFIG_SMP - percpu PT_LOAD FLAGS(7); /* RWE */ -#endif - data.init2 PT_LOAD FLAGS(7); /* RWE */ - note PT_NOTE FLAGS(0); /* ___ */ -} SECTIONS { . = __START_KERNEL; -- cgit v0.10.2 From 444e0ae4831f99ba25062d9a5ccb7117c62841a0 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Wed, 29 Apr 2009 09:47:20 +0200 Subject: x86, vmlinux.lds: unify start/end of SECTIONS [ Impact: cleanup ] Signed-off-by: Sam Ravnborg Cc: Tim Abbott Cc: Linus Torvalds LKML-Reference: <1240991249-27117-4-git-send-email-sam@ravnborg.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 1a1b303..845776f 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -54,12 +54,26 @@ PHDRS { note PT_NOTE FLAGS(0); /* ___ */ } +SECTIONS +{ +#ifdef CONFIG_X86_32 + . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR; + phys_startup_32 = startup_32 - LOAD_OFFSET; +#else + . = __START_KERNEL; + phys_startup_64 = startup_64 - LOAD_OFFSET; +#endif + #ifdef CONFIG_X86_32 # include "vmlinux_32.lds.S" #else # include "vmlinux_64.lds.S" #endif + STABS_DEBUG + DWARF_DEBUG +} + #ifdef CONFIG_X86_32 ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index 4fd40dc..3d3d49c 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -1,8 +1,3 @@ -SECTIONS -{ - . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR; - phys_startup_32 = startup_32 - LOAD_OFFSET; - /* Text and read-only data */ .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) { _text = .; @@ -205,7 +200,3 @@ SECTIONS *(.exitcall.exit) *(.discard) } - - STABS_DEBUG - DWARF_DEBUG -} diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 6e7cbee..2d7fa20 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -1,8 +1,3 @@ -SECTIONS -{ - . = __START_KERNEL; - phys_startup_64 = startup_64 - LOAD_OFFSET; - /* Text and read-only data */ .text : AT(ADDR(.text) - LOAD_OFFSET) { _text = .; @@ -277,7 +272,3 @@ SECTIONS *(.eh_frame) *(.discard) } - - STABS_DEBUG - DWARF_DEBUG -} -- cgit v0.10.2 From dfc20895d944cfa81d8ff00809b68ecb8f72cbb0 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Wed, 29 Apr 2009 09:47:21 +0200 Subject: x86, vmlinux.lds: unify .text output sections 32 bit x86 had a dedicated .text.head output section, whereas 64 bit had it all in a single output section. In the unified version the dedicated .text.head output section was kept to have full control over the head code. 32 bit: - Moved definition of _stext to the linker script. The definition is located _after_ .text.page_aligned as this is what 32 bit did before. The ALIGN(8) was introduced so we hit the exact same address (on the tested config) before and after the move. I assume that it is a bug that _stext did not cover the .text.page_aligned section - if this is true it can be fixed in a follow-up patch (and the ugly ALIGN() can be dropped). [ Impact: 64-bit: cleanup, 32-bit: use the 64-bit linker script ] Signed-off-by: Sam Ravnborg Cc: Tim Abbott Cc: Linus Torvalds LKML-Reference: <1240991249-27117-5-git-send-email-sam@ravnborg.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 3068388..dc5ed4b 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -608,13 +608,6 @@ ignore_int: ENTRY(initial_code) .long i386_start_kernel -.section .text -/* - * Real beginning of normal "text" segment - */ -ENTRY(stext) -ENTRY(_stext) - /* * BSS section */ diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 845776f..a7c88bb 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -64,6 +64,37 @@ SECTIONS phys_startup_64 = startup_64 - LOAD_OFFSET; #endif + /* Text and read-only data */ + + /* bootstrapping code */ + .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) { + _text = .; + *(.text.head) + } :text = 0x9090 + + /* The rest of the text */ + .text : AT(ADDR(.text) - LOAD_OFFSET) { +#ifdef CONFIG_X86_32 + /* not really needed, already page aligned */ + . = ALIGN(PAGE_SIZE); + *(.text.page_aligned) +#endif + . = ALIGN(8); + _stext = .; + TEXT_TEXT + SCHED_TEXT + LOCK_TEXT + KPROBES_TEXT + IRQENTRY_TEXT + *(.fixup) + *(.gnu.warning) + /* End of text section */ + _etext = .; + } :text = 0x9090 + + NOTES :text :note + + #ifdef CONFIG_X86_32 # include "vmlinux_32.lds.S" #else diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index 3d3d49c..8540092 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -1,27 +1,3 @@ - /* Text and read-only data */ - .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) { - _text = .; - *(.text.head) - } :text = 0x9090 - - /* read-only */ - .text : AT(ADDR(.text) - LOAD_OFFSET) { - /* not really needed, already page aligned */ - . = ALIGN(PAGE_SIZE); - *(.text.page_aligned) - TEXT_TEXT - SCHED_TEXT - LOCK_TEXT - KPROBES_TEXT - IRQENTRY_TEXT - *(.fixup) - *(.gnu.warning) - /* End of text section */ - _etext = .; - } :text = 0x9090 - - NOTES :text :note - /* Exception table */ . = ALIGN(16); __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 2d7fa20..b5d4367 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -1,23 +1,3 @@ - /* Text and read-only data */ - .text : AT(ADDR(.text) - LOAD_OFFSET) { - _text = .; - /* First the code that has to be first for bootstrapping */ - *(.text.head) - _stext = .; - /* Then the rest */ - TEXT_TEXT - SCHED_TEXT - LOCK_TEXT - KPROBES_TEXT - IRQENTRY_TEXT - *(.fixup) - *(.gnu.warning) - /* End of text section */ - _etext = .; - } :text = 0x9090 - - NOTES :text :note - /* Exception table */ . = ALIGN(16); __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { -- cgit v0.10.2 From 448bc3ab0d03e77fee8e4264de0d001fc87bc164 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Wed, 29 Apr 2009 09:47:22 +0200 Subject: x86, vmlinux.lds: unify exception table [ Impact: cleanup ] Signed-off-by: Sam Ravnborg Cc: Tim Abbott Cc: Linus Torvalds LKML-Reference: <1240991249-27117-6-git-send-email-sam@ravnborg.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index a7c88bb..67164f6 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -94,6 +94,16 @@ SECTIONS NOTES :text :note + /* Exception table */ + . = ALIGN(16); + __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { + __start___ex_table = .; + *(__ex_table) + __stop___ex_table = .; + } :text = 0x9090 + + RODATA + #ifdef CONFIG_X86_32 # include "vmlinux_32.lds.S" diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index 8540092..920cc69 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -1,13 +1,3 @@ - /* Exception table */ - . = ALIGN(16); - __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { - __start___ex_table = .; - *(__ex_table) - __stop___ex_table = .; - } :text = 0x9090 - - RODATA - /* writeable */ . = ALIGN(PAGE_SIZE); /* Data */ diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index b5d4367..641f3f9 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -1,13 +1,3 @@ - /* Exception table */ - . = ALIGN(16); - __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { - __start___ex_table = .; - *(__ex_table) - __stop___ex_table = .; - } :text = 0x9090 - - RODATA - /* Align data segment to page size boundary */ . = ALIGN(PAGE_SIZE); /* Data */ -- cgit v0.10.2 From 1f6397bac55040cd520d9eaf299e155a7aa01d5f Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Wed, 29 Apr 2009 09:47:23 +0200 Subject: x86, vmlinux.lds: unify data output sections For 64 bit the following functional changes are introduced: - .data.page_aligned has moved - .data.cacheline_aligned has moved - .data.read_mostly has moved - ALIGN() moved out of output section for .data.cacheline_aligned - ALIGN() moved out of output section for .data.page_aligned Notice that 32 bit and 64 bit has different location of _edata. .data_nosave is 32 bit only as 64 bit is special due to PERCPU. [ Impact: 32-bit: cleanup, 64-bit: use 32-bit linker script ] Signed-off-by: Sam Ravnborg Cc: Tim Abbott Cc: Linus Torvalds LKML-Reference: <1240991249-27117-7-git-send-email-sam@ravnborg.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 67164f6..067bdb0 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -104,6 +104,61 @@ SECTIONS RODATA + /* Data */ + . = ALIGN(PAGE_SIZE); + .data : AT(ADDR(.data) - LOAD_OFFSET) { + DATA_DATA + CONSTRUCTORS + +#ifdef CONFIG_X86_64 + /* End of data section */ + _edata = .; +#endif + } :data + +#ifdef CONFIG_X86_32 + /* 32 bit has nosave before _edata */ + . = ALIGN(PAGE_SIZE); + .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { + __nosave_begin = .; + *(.data.nosave) + . = ALIGN(PAGE_SIZE); + __nosave_end = .; + } +#endif + + . = ALIGN(PAGE_SIZE); + .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { + *(.data.page_aligned) + *(.data.idt) + } + +#ifdef CONFIG_X86_32 + . = ALIGN(32); +#else + . = ALIGN(PAGE_SIZE); + . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); +#endif + .data.cacheline_aligned : + AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { + *(.data.cacheline_aligned) + } + + /* rarely changed data like cpu maps */ +#ifdef CONFIG_X86_32 + . = ALIGN(32); +#else + . = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES); +#endif + .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { + *(.data.read_mostly) + +#ifdef CONFIG_X86_32 + /* End of data section */ + _edata = .; +#endif + } + #ifdef CONFIG_X86_32 # include "vmlinux_32.lds.S" diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index 920cc69..8ade846 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -1,40 +1,3 @@ - /* writeable */ - . = ALIGN(PAGE_SIZE); - /* Data */ - .data : AT(ADDR(.data) - LOAD_OFFSET) { - DATA_DATA - CONSTRUCTORS - } :data - - . = ALIGN(PAGE_SIZE); - .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { - __nosave_begin = .; - *(.data.nosave) - . = ALIGN(PAGE_SIZE); - __nosave_end = .; - } - - . = ALIGN(PAGE_SIZE); - .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { - *(.data.page_aligned) - *(.data.idt) - } - - . = ALIGN(32); - .data.cacheline_aligned : - AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { - *(.data.cacheline_aligned) - } - - /* rarely changed data like cpu maps */ - . = ALIGN(32); - .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { - *(.data.read_mostly) - - /* End of data section */ - _edata = .; - } - /* init_task */ . = ALIGN(THREAD_SIZE); .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 641f3f9..8262701 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -1,26 +1,3 @@ - /* Align data segment to page size boundary */ - . = ALIGN(PAGE_SIZE); - /* Data */ - .data : AT(ADDR(.data) - LOAD_OFFSET) { - DATA_DATA - CONSTRUCTORS - /* End of data section */ - _edata = .; - } :data - - - .data.cacheline_aligned : - AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { - . = ALIGN(PAGE_SIZE); - . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); - *(.data.cacheline_aligned) - } - - . = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES); - .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { - *(.data.read_mostly) - } - #define VSYSCALL_ADDR (-10*1024*1024) #define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.read_mostly) + \ SIZEOF(.data.read_mostly) + 4095) & ~(4095)) @@ -95,11 +72,6 @@ *(.data.init_task) } :data.init - .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { - . = ALIGN(PAGE_SIZE); - *(.data.page_aligned) - } - .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { /* might get freed after init */ . = ALIGN(PAGE_SIZE); -- cgit v0.10.2 From ff6f87e1626e10beef675084c9b5384a9477e3d5 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Wed, 29 Apr 2009 09:47:24 +0200 Subject: x86, vmlinux.lds: move vsyscall output sections [ Impact: cleanup ] Signed-off-by: Sam Ravnborg Cc: Tim Abbott Cc: Linus Torvalds LKML-Reference: <1240991249-27117-8-git-send-email-sam@ravnborg.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 067bdb0..b3106c2 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -159,6 +159,77 @@ SECTIONS #endif } +#ifdef CONFIG_X86_64 + +#define VSYSCALL_ADDR (-10*1024*1024) +#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.read_mostly) + \ + SIZEOF(.data.read_mostly) + 4095) & ~(4095)) +#define VSYSCALL_VIRT_ADDR ((ADDR(.data.read_mostly) + \ + SIZEOF(.data.read_mostly) + 4095) & ~(4095)) + +#define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR) +#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET) + +#define VVIRT_OFFSET (VSYSCALL_ADDR - VSYSCALL_VIRT_ADDR) +#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) + + . = VSYSCALL_ADDR; + .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { + *(.vsyscall_0) + } :user + + __vsyscall_0 = VSYSCALL_VIRT_ADDR; + + . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); + .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { + *(.vsyscall_fn) + } + + . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); + .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) { + *(.vsyscall_gtod_data) + } + + vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data); + .vsyscall_clock : AT(VLOAD(.vsyscall_clock)) { + *(.vsyscall_clock) + } + vsyscall_clock = VVIRT(.vsyscall_clock); + + + .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) { + *(.vsyscall_1) + } + .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) { + *(.vsyscall_2) + } + + .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { + *(.vgetcpu_mode) + } + vgetcpu_mode = VVIRT(.vgetcpu_mode); + + . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); + .jiffies : AT(VLOAD(.jiffies)) { + *(.jiffies) + } + jiffies = VVIRT(.jiffies); + + .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) { + *(.vsyscall_3) + } + + . = VSYSCALL_VIRT_ADDR + PAGE_SIZE; + +#undef VSYSCALL_ADDR +#undef VSYSCALL_PHYS_ADDR +#undef VSYSCALL_VIRT_ADDR +#undef VLOAD_OFFSET +#undef VLOAD +#undef VVIRT_OFFSET +#undef VVIRT + +#endif /* CONFIG_X86_64 */ #ifdef CONFIG_X86_32 # include "vmlinux_32.lds.S" diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 8262701..013aa0e 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -1,71 +1,3 @@ -#define VSYSCALL_ADDR (-10*1024*1024) -#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.read_mostly) + \ - SIZEOF(.data.read_mostly) + 4095) & ~(4095)) -#define VSYSCALL_VIRT_ADDR ((ADDR(.data.read_mostly) + \ - SIZEOF(.data.read_mostly) + 4095) & ~(4095)) - -#define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR) -#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET) - -#define VVIRT_OFFSET (VSYSCALL_ADDR - VSYSCALL_VIRT_ADDR) -#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) - - . = VSYSCALL_ADDR; - .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { - *(.vsyscall_0) - } :user - - __vsyscall_0 = VSYSCALL_VIRT_ADDR; - - . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); - .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { - *(.vsyscall_fn) - } - - . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); - .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) { - *(.vsyscall_gtod_data) - } - - vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data); - .vsyscall_clock : AT(VLOAD(.vsyscall_clock)) { - *(.vsyscall_clock) - } - vsyscall_clock = VVIRT(.vsyscall_clock); - - - .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) { - *(.vsyscall_1) - } - .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) { - *(.vsyscall_2) - } - - .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { - *(.vgetcpu_mode) - } - vgetcpu_mode = VVIRT(.vgetcpu_mode); - - . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); - .jiffies : AT(VLOAD(.jiffies)) { - *(.jiffies) - } - jiffies = VVIRT(.jiffies); - - .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) { - *(.vsyscall_3) - } - - . = VSYSCALL_VIRT_ADDR + PAGE_SIZE; - -#undef VSYSCALL_ADDR -#undef VSYSCALL_PHYS_ADDR -#undef VSYSCALL_VIRT_ADDR -#undef VLOAD_OFFSET -#undef VLOAD -#undef VVIRT_OFFSET -#undef VVIRT - /* init_task */ .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { . = ALIGN(THREAD_SIZE); -- cgit v0.10.2 From e58bdaa8f810332e5c1760ce496b01e07d51642c Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Wed, 29 Apr 2009 09:47:25 +0200 Subject: x86, vmlinux.lds: unify first part of initdata 32-bit: - Move definition of __init_begin outside output_section because it covers more than one section - Move ALIGN() for end-of-section inside .smp_locks output section. Same effect but the intent is better documented that we need both start and end aligned. 64-bit: - Move ALIGN() outside output section in .init.setup - Deleted unused __smp_alt_* symbols None of the above should result in any functional change. [ Impact: refactor and unify linker script ] Signed-off-by: Sam Ravnborg Cc: Tim Abbott Cc: Linus Torvalds LKML-Reference: <1240991249-27117-9-git-send-email-sam@ravnborg.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index b3106c2..8b203c4 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -231,6 +231,67 @@ SECTIONS #endif /* CONFIG_X86_64 */ + /* init_task */ + . = ALIGN(THREAD_SIZE); + .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { + *(.data.init_task) + } +#ifdef CONFIG_X86_64 + :data.init +#endif + + /* + * smp_locks might be freed after init + * start/end must be page aligned + */ + . = ALIGN(PAGE_SIZE); + .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { + __smp_locks = .; + *(.smp_locks) + __smp_locks_end = .; + . = ALIGN(PAGE_SIZE); + } + + /* Init code and data - will be freed after init */ + . = ALIGN(PAGE_SIZE); + __init_begin = .; /* paired with __init_end */ + .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { + _sinittext = .; + INIT_TEXT + _einittext = .; + } + + .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { + INIT_DATA + } + + . = ALIGN(16); + .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { + __setup_start = .; + *(.init.setup) + __setup_end = .; + } + .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { + __initcall_start = .; + INITCALLS + __initcall_end = .; + } + + .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { + __con_initcall_start = .; + *(.con_initcall.init) + __con_initcall_end = .; + } + + .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { + __x86_cpu_dev_start = .; + *(.x86_cpu_dev.init) + __x86_cpu_dev_end = .; + } + + SECURITY_INIT + + #ifdef CONFIG_X86_32 # include "vmlinux_32.lds.S" #else diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index 8ade846..d8ba539 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -1,63 +1,3 @@ - /* init_task */ - . = ALIGN(THREAD_SIZE); - .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { - *(.data.init_task) - } - - . = ALIGN(PAGE_SIZE); - .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { - /* might get freed after init */ - __smp_locks = .; - *(.smp_locks) - __smp_locks_end = .; - } - /* will be freed after init - * Following ALIGN() is required to make sure no other data falls on the - * same page where __smp_alt_end is pointing as that page might be freed - * after boot. Always make sure that ALIGN() directive is present after - * the section which contains __smp_alt_end. - */ - . = ALIGN(PAGE_SIZE); - - /* Init code and data - will be freed after init */ - . = ALIGN(PAGE_SIZE); - .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { - __init_begin = .; - _sinittext = .; - INIT_TEXT - _einittext = .; - } - - .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { - INIT_DATA - } - - . = ALIGN(16); - .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { - __setup_start = .; - *(.init.setup) - __setup_end = .; - } - .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { - __initcall_start = .; - INITCALLS - __initcall_end = .; - } - - .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { - __con_initcall_start = .; - *(.con_initcall.init) - __con_initcall_end = .; - } - - .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { - __x86_cpu_dev_start = .; - *(.x86_cpu_dev.init) - __x86_cpu_dev_end = .; - } - - SECURITY_INIT - . = ALIGN(4); .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { __alt_instructions = .; diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 013aa0e..0e8054e 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -1,62 +1,3 @@ - /* init_task */ - .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { - . = ALIGN(THREAD_SIZE); - *(.data.init_task) - } :data.init - - .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { - /* might get freed after init */ - . = ALIGN(PAGE_SIZE); - __smp_alt_begin = .; - __smp_locks = .; - *(.smp_locks) - __smp_locks_end = .; - . = ALIGN(PAGE_SIZE); - __smp_alt_end = .; - } - - /* Init code and data */ - . = ALIGN(PAGE_SIZE); - __init_begin = .; /* paired with __init_end */ - .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { - _sinittext = .; - INIT_TEXT - _einittext = .; - } - - .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { - __initdata_begin = .; - INIT_DATA - __initdata_end = .; - } - - .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { - . = ALIGN(16); - __setup_start = .; - *(.init.setup) - __setup_end = .; - } - - .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { - __initcall_start = .; - INITCALLS - __initcall_end = .; - } - - .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { - __con_initcall_start = .; - *(.con_initcall.init) - __con_initcall_end = .; - } - - .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { - __x86_cpu_dev_start = .; - *(.x86_cpu_dev.init) - __x86_cpu_dev_end = .; - } - - SECURITY_INIT - . = ALIGN(8); .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { __parainstructions = .; -- cgit v0.10.2 From ae61836289a415351caa524d328110aaeae100d4 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Wed, 29 Apr 2009 09:47:26 +0200 Subject: x86, vmlinux.lds: unify parainstructions 32 bit: - increase alignment from 4 to 8 for .parainstructions - increase alignment from 4 to 8 for .altinstructions 64 bit: - move ALIGN() outside output section for .altinstructions None of the above should result in any functional change. [ Impact: refactor and unify linker script ] Signed-off-by: Sam Ravnborg Cc: Tim Abbott Cc: Linus Torvalds LKML-Reference: <1240991249-27117-10-git-send-email-sam@ravnborg.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 8b203c4..c8dd71e 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -291,6 +291,24 @@ SECTIONS SECURITY_INIT + . = ALIGN(8); + .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { + __parainstructions = .; + *(.parainstructions) + __parainstructions_end = .; + } + + . = ALIGN(8); + .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { + __alt_instructions = .; + *(.altinstructions) + __alt_instructions_end = .; + } + + .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { + *(.altinstr_replacement) + } + #ifdef CONFIG_X86_32 # include "vmlinux_32.lds.S" diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index d8ba539..5df9647 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -1,21 +1,3 @@ - . = ALIGN(4); - .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { - __alt_instructions = .; - *(.altinstructions) - __alt_instructions_end = .; - } - - .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { - *(.altinstr_replacement) - } - - . = ALIGN(4); - .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { - __parainstructions = .; - *(.parainstructions) - __parainstructions_end = .; - } - /* * .exit.text is discard at runtime, not link time, to deal with * references from .altinstructions and .eh_frame diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 0e8054e..9ef7096 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -1,21 +1,3 @@ - . = ALIGN(8); - .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { - __parainstructions = .; - *(.parainstructions) - __parainstructions_end = .; - } - - .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { - . = ALIGN(8); - __alt_instructions = .; - *(.altinstructions) - __alt_instructions_end = .; - } - - .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { - *(.altinstr_replacement) - } - /* * .exit.text is discard at runtime, not link time, to deal with * references from .altinstructions and .eh_frame -- cgit v0.10.2 From bf6a57418d5445c98047edbec022c9e54d1526e6 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Wed, 29 Apr 2009 09:47:27 +0200 Subject: x86, vmlinux.lds: unify .exit.* and .init.ramfs [ Impact: cleanup ] Signed-off-by: Sam Ravnborg Cc: Tim Abbott Cc: Linus Torvalds LKML-Reference: <1240991249-27117-11-git-send-email-sam@ravnborg.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index c8dd71e..1ab62a5 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -309,6 +309,26 @@ SECTIONS *(.altinstr_replacement) } + /* + * .exit.text is discard at runtime, not link time, to deal with + * references from .altinstructions and .eh_frame + */ + .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { + EXIT_TEXT + } + + .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { + EXIT_DATA + } + +#ifdef CONFIG_BLK_DEV_INITRD + . = ALIGN(PAGE_SIZE); + .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { + __initramfs_start = .; + *(.init.ramfs) + __initramfs_end = .; + } +#endif #ifdef CONFIG_X86_32 # include "vmlinux_32.lds.S" diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index 5df9647..36c8fbd 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -1,24 +1,3 @@ - /* - * .exit.text is discard at runtime, not link time, to deal with - * references from .altinstructions and .eh_frame - */ - .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { - EXIT_TEXT - } - - .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { - EXIT_DATA - } - -#if defined(CONFIG_BLK_DEV_INITRD) - . = ALIGN(PAGE_SIZE); - .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { - __initramfs_start = .; - *(.init.ramfs) - __initramfs_end = .; - } -#endif - PERCPU(PAGE_SIZE) . = ALIGN(PAGE_SIZE); diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 9ef7096..1aa5362 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -1,24 +1,3 @@ - /* - * .exit.text is discard at runtime, not link time, to deal with - * references from .altinstructions and .eh_frame - */ - .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { - EXIT_TEXT - } - - .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { - EXIT_DATA - } - -#ifdef CONFIG_BLK_DEV_INITRD - . = ALIGN(PAGE_SIZE); - .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { - __initramfs_start = .; - *(.init.ramfs) - __initramfs_end = .; - } -#endif - #ifdef CONFIG_SMP /* * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the -- cgit v0.10.2 From 9d16e78318f174fd4b07916a93e41749d5199267 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Wed, 29 Apr 2009 09:47:28 +0200 Subject: x86, vmlinux.lds: unify percpu 32 bit: - move __init_end outside the .bss output section It really did not belong in there [ Impact: 64-bit: cleanup, 32-bit: refactor linker script ] Signed-off-by: Sam Ravnborg Cc: Tim Abbott Cc: Linus Torvalds LKML-Reference: <1240991249-27117-12-git-send-email-sam@ravnborg.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 1ab62a5..1ea2b85 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -330,6 +330,36 @@ SECTIONS } #endif +#if defined(CONFIG_X86_64) && defined(CONFIG_SMP) + /* + * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the + * output PHDR, so the next output section - __data_nosave - should + * start another section data.init2. Also, pda should be at the head of + * percpu area. Preallocate it and define the percpu offset symbol + * so that it can be accessed as a percpu variable. + */ + . = ALIGN(PAGE_SIZE); + PERCPU_VADDR(0, :percpu) +#else + PERCPU(PAGE_SIZE) +#endif + + . = ALIGN(PAGE_SIZE); + /* freed after init ends here */ + __init_end = .; + +#ifdef CONFIG_X86_64 + .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { + . = ALIGN(PAGE_SIZE); + __nosave_begin = .; + *(.data.nosave) + . = ALIGN(PAGE_SIZE); + __nosave_end = .; + } :data.init2 + /* use another section data.init2, see PERCPU_VADDR() above */ +#endif + + #ifdef CONFIG_X86_32 # include "vmlinux_32.lds.S" #else diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index 36c8fbd..d23ee2c 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -1,11 +1,5 @@ - PERCPU(PAGE_SIZE) - - . = ALIGN(PAGE_SIZE); - /* freed after init ends here */ - /* BSS */ .bss : AT(ADDR(.bss) - LOAD_OFFSET) { - __init_end = .; __bss_start = .; *(.bss.page_aligned) *(.bss) diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 1aa5362..a539366 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -1,29 +1,3 @@ -#ifdef CONFIG_SMP - /* - * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the - * output PHDR, so the next output section - __data_nosave - should - * start another section data.init2. Also, pda should be at the head of - * percpu area. Preallocate it and define the percpu offset symbol - * so that it can be accessed as a percpu variable. - */ - . = ALIGN(PAGE_SIZE); - PERCPU_VADDR(0, :percpu) -#else - PERCPU(PAGE_SIZE) -#endif - - . = ALIGN(PAGE_SIZE); - __init_end = .; - - .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { - . = ALIGN(PAGE_SIZE); - __nosave_begin = .; - *(.data.nosave) - . = ALIGN(PAGE_SIZE); - __nosave_end = .; - } :data.init2 - /* use another section data.init2, see PERCPU_VADDR() above */ - .bss : AT(ADDR(.bss) - LOAD_OFFSET) { . = ALIGN(PAGE_SIZE); __bss_start = .; /* BSS */ -- cgit v0.10.2 From 091e52c3551d3031343df24b573b770b4c6c72b6 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Wed, 29 Apr 2009 09:47:29 +0200 Subject: x86, vmlinux.lds: unify remaining parts 32 bit: - explicit page align .bss - move ALING() out of .brk output section - discard *(.eh_frame) 64 bit: - move ALIGN() out of .bss output section - move ALIGN() out of .brk output section - use a dedicated section to define _end [ Impact: unify and fix section alignments in linker script ] Signed-off-by: Sam Ravnborg Cc: Tim Abbott Cc: Linus Torvalds LKML-Reference: <1240991249-27117-13-git-send-email-sam@ravnborg.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 1ea2b85..ef3e4f1 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -359,12 +359,34 @@ SECTIONS /* use another section data.init2, see PERCPU_VADDR() above */ #endif + /* BSS */ + . = ALIGN(PAGE_SIZE); + .bss : AT(ADDR(.bss) - LOAD_OFFSET) { + __bss_start = .; + *(.bss.page_aligned) + *(.bss) + . = ALIGN(4); + __bss_stop = .; + } -#ifdef CONFIG_X86_32 -# include "vmlinux_32.lds.S" -#else -# include "vmlinux_64.lds.S" -#endif + . = ALIGN(PAGE_SIZE); + .brk : AT(ADDR(.brk) - LOAD_OFFSET) { + __brk_base = .; + . += 64 * 1024; /* 64k alignment slop space */ + *(.brk_reservation) /* areas brk users have reserved */ + __brk_limit = .; + } + + .end : AT(ADDR(.end) - LOAD_OFFSET) { + _end = .; + } + + /* Sections to be discarded */ + /DISCARD/ : { + *(.exitcall.exit) + *(.eh_frame) + *(.discard) + } STABS_DEBUG DWARF_DEBUG diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S deleted file mode 100644 index d23ee2c..0000000 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ /dev/null @@ -1,26 +0,0 @@ - /* BSS */ - .bss : AT(ADDR(.bss) - LOAD_OFFSET) { - __bss_start = .; - *(.bss.page_aligned) - *(.bss) - . = ALIGN(4); - __bss_stop = .; - } - - .brk : AT(ADDR(.brk) - LOAD_OFFSET) { - . = ALIGN(PAGE_SIZE); - __brk_base = .; - . += 64 * 1024; /* 64k alignment slop space */ - *(.brk_reservation) /* areas brk users have reserved */ - __brk_limit = .; - } - - .end : AT(ADDR(.end) - LOAD_OFFSET) { - _end = . ; - } - - /* Sections to be discarded */ - /DISCARD/ : { - *(.exitcall.exit) - *(.discard) - } diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S deleted file mode 100644 index a539366..0000000 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ /dev/null @@ -1,24 +0,0 @@ - .bss : AT(ADDR(.bss) - LOAD_OFFSET) { - . = ALIGN(PAGE_SIZE); - __bss_start = .; /* BSS */ - *(.bss.page_aligned) - *(.bss) - __bss_stop = .; - } - - .brk : AT(ADDR(.brk) - LOAD_OFFSET) { - . = ALIGN(PAGE_SIZE); - __brk_base = .; - . += 64 * 1024; /* 64k alignment slop space */ - *(.brk_reservation) /* areas brk users have reserved */ - __brk_limit = .; - } - - _end = . ; - - /* Sections to be discarded */ - /DISCARD/ : { - *(.exitcall.exit) - *(.eh_frame) - *(.discard) - } -- cgit v0.10.2 From 91fd7fe809bdf4d8aa56559d17b9f25a1a6fe732 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 29 Apr 2009 10:58:38 +0200 Subject: x86, vmlinux.lds: add copyright Acked-by: Sam Ravnborg Cc: Tim Abbott Cc: Linus Torvalds LKML-Reference: <1240991249-27117-2-git-send-email-sam@ravnborg.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index ef3e4f1..0bdbaa5 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -3,7 +3,8 @@ * * Historic 32-bit version written by Martin Mares * - * Modernisation and unification done by Sam Ravnborg + * Modernisation, unification and other changes and fixes: + * Copyright (C) 2007-2009 Sam Ravnborg * * * Don't define absolute symbols until and unless you know that symbol -- cgit v0.10.2 From 0492e1bb8fe7d122901c9f3af75e537d4129712e Mon Sep 17 00:00:00 2001 From: Stuart Bennett Date: Tue, 28 Apr 2009 20:17:49 +0100 Subject: tracing: x86, mmiotrace: code consistency/legibility improvement kmmio_probe being *p and kmmio_fault_page being sometimes *f and sometimes *p is not helpful. [ Impact: cleanup ] Signed-off-by: Stuart Bennett Acked-by: Pekka Paalanen Cc: Steven Rostedt LKML-Reference: <1240946271-7083-3-git-send-email-stuart@freedesktop.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 4f115e0..869181a 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -97,13 +97,13 @@ static struct kmmio_probe *get_kmmio_probe(unsigned long addr) static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) { struct list_head *head; - struct kmmio_fault_page *p; + struct kmmio_fault_page *f; page &= PAGE_MASK; head = kmmio_page_list(page); - list_for_each_entry_rcu(p, head, list) { - if (p->page == page) - return p; + list_for_each_entry_rcu(f, head, list) { + if (f->page == page) + return f; } return NULL; } @@ -439,12 +439,12 @@ static void rcu_free_kmmio_fault_pages(struct rcu_head *head) head, struct kmmio_delayed_release, rcu); - struct kmmio_fault_page *p = dr->release_list; - while (p) { - struct kmmio_fault_page *next = p->release_next; - BUG_ON(p->count); - kfree(p); - p = next; + struct kmmio_fault_page *f = dr->release_list; + while (f) { + struct kmmio_fault_page *next = f->release_next; + BUG_ON(f->count); + kfree(f); + f = next; } kfree(dr); } @@ -453,19 +453,19 @@ static void remove_kmmio_fault_pages(struct rcu_head *head) { struct kmmio_delayed_release *dr = container_of(head, struct kmmio_delayed_release, rcu); - struct kmmio_fault_page *p = dr->release_list; + struct kmmio_fault_page *f = dr->release_list; struct kmmio_fault_page **prevp = &dr->release_list; unsigned long flags; spin_lock_irqsave(&kmmio_lock, flags); - while (p) { - if (!p->count) { - list_del_rcu(&p->list); - prevp = &p->release_next; + while (f) { + if (!f->count) { + list_del_rcu(&f->list); + prevp = &f->release_next; } else { - *prevp = p->release_next; + *prevp = f->release_next; } - p = p->release_next; + f = f->release_next; } spin_unlock_irqrestore(&kmmio_lock, flags); -- cgit v0.10.2 From 46e91d00b1165b14b484aa33800e1bba0794ae1a Mon Sep 17 00:00:00 2001 From: Stuart Bennett Date: Tue, 28 Apr 2009 20:17:50 +0100 Subject: tracing: x86, mmiotrace: refactor clearing/restore of page presence * change function names to clear_* from set_*: in reality we only clear and restore page presence, and never unconditionally set present. Using clear_*({true, false}, ...) is therefore more honest than set_*({false, true}, ...) * upgrade presence storage to pteval_t: doing user-space tracing will require saving and manipulation of the _PAGE_PROTNONE bit, in addition to the existing _PAGE_PRESENT changes, and having multiple bools stored and passed around does not seem optimal [ Impact: refactor, clean up mmiotrace code ] Signed-off-by: Stuart Bennett Acked-by: Pekka Paalanen Cc: Steven Rostedt LKML-Reference: <1240946271-7083-4-git-send-email-stuart@freedesktop.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 869181a..a769d1a 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -32,7 +32,7 @@ struct kmmio_fault_page { struct list_head list; struct kmmio_fault_page *release_next; unsigned long page; /* location of the fault page */ - bool old_presence; /* page presence prior to arming */ + pteval_t old_presence; /* page presence prior to arming */ bool armed; /* @@ -108,49 +108,51 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) return NULL; } -static void set_pmd_presence(pmd_t *pmd, bool present, bool *old) +static void clear_pmd_presence(pmd_t *pmd, bool clear, pmdval_t *old) { pmdval_t v = pmd_val(*pmd); - *old = !!(v & _PAGE_PRESENT); - v &= ~_PAGE_PRESENT; - if (present) - v |= _PAGE_PRESENT; + if (clear) { + *old = v & _PAGE_PRESENT; + v &= ~_PAGE_PRESENT; + } else /* presume this has been called with clear==true previously */ + v |= *old; set_pmd(pmd, __pmd(v)); } -static void set_pte_presence(pte_t *pte, bool present, bool *old) +static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old) { pteval_t v = pte_val(*pte); - *old = !!(v & _PAGE_PRESENT); - v &= ~_PAGE_PRESENT; - if (present) - v |= _PAGE_PRESENT; + if (clear) { + *old = v & _PAGE_PRESENT; + v &= ~_PAGE_PRESENT; + } else /* presume this has been called with clear==true previously */ + v |= *old; set_pte_atomic(pte, __pte(v)); } -static int set_page_presence(unsigned long addr, bool present, bool *old) +static int clear_page_presence(struct kmmio_fault_page *f, bool clear) { unsigned int level; - pte_t *pte = lookup_address(addr, &level); + pte_t *pte = lookup_address(f->page, &level); if (!pte) { - pr_err("kmmio: no pte for page 0x%08lx\n", addr); + pr_err("kmmio: no pte for page 0x%08lx\n", f->page); return -1; } switch (level) { case PG_LEVEL_2M: - set_pmd_presence((pmd_t *)pte, present, old); + clear_pmd_presence((pmd_t *)pte, clear, &f->old_presence); break; case PG_LEVEL_4K: - set_pte_presence(pte, present, old); + clear_pte_presence(pte, clear, &f->old_presence); break; default: pr_err("kmmio: unexpected page level 0x%x.\n", level); return -1; } - __flush_tlb_one(addr); + __flush_tlb_one(f->page); return 0; } @@ -171,9 +173,9 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f) WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n"); if (f->armed) { pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n", - f->page, f->count, f->old_presence); + f->page, f->count, !!f->old_presence); } - ret = set_page_presence(f->page, false, &f->old_presence); + ret = clear_page_presence(f, true); WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page); f->armed = true; return ret; @@ -182,8 +184,7 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f) /** Restore the given page to saved presence state. */ static void disarm_kmmio_fault_page(struct kmmio_fault_page *f) { - bool tmp; - int ret = set_page_presence(f->page, f->old_presence, &tmp); + int ret = clear_page_presence(f, false); WARN_ONCE(ret < 0, KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page); f->armed = false; -- cgit v0.10.2 From 0f9a623dd6c9b5b4dd00c232f29525bfc7a8ecf2 Mon Sep 17 00:00:00 2001 From: Stuart Bennett Date: Tue, 28 Apr 2009 20:17:51 +0100 Subject: tracing: x86, mmiotrace: only register for die notifier when tracer active Follow up to afcfe024aebd74b0984a41af9a34e009cf5badaf in Linus' tree ("x86: mmiotrace: quieten spurious warning message") Signed-off-by: Stuart Bennett Acked-by: Pekka Paalanen Cc: Steven Rostedt LKML-Reference: <1240946271-7083-5-git-send-email-stuart@freedesktop.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index a769d1a..256ce64 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -311,7 +311,12 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); if (!ctx->active) { - pr_debug("kmmio: spurious debug trap on CPU %d.\n", + /* + * debug traps without an active context are due to either + * something external causing them (f.e. using a debugger while + * mmio tracing enabled), or erroneous behaviour + */ + pr_warning("kmmio: unexpected debug trap on CPU %d.\n", smp_processor_id()); goto out; } @@ -529,8 +534,8 @@ void unregister_kmmio_probe(struct kmmio_probe *p) } EXPORT_SYMBOL(unregister_kmmio_probe); -static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, - void *args) +static int +kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args) { struct die_args *arg = args; @@ -545,11 +550,23 @@ static struct notifier_block nb_die = { .notifier_call = kmmio_die_notifier }; -static int __init init_kmmio(void) +int kmmio_init(void) { int i; + for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) INIT_LIST_HEAD(&kmmio_page_table[i]); + return register_die_notifier(&nb_die); } -fs_initcall(init_kmmio); /* should be before device_initcall() */ + +void kmmio_cleanup(void) +{ + int i; + + unregister_die_notifier(&nb_die); + for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) { + WARN_ONCE(!list_empty(&kmmio_page_table[i]), + KERN_ERR "kmmio_page_table not empty at cleanup, any further tracing will leak memory.\n"); + } +} diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index c9342ed..132772a 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -451,6 +451,7 @@ void enable_mmiotrace(void) if (nommiotrace) pr_info(NAME "MMIO tracing disabled.\n"); + kmmio_init(); enter_uniprocessor(); spin_lock_irq(&trace_lock); atomic_inc(&mmiotrace_enabled); @@ -473,6 +474,7 @@ void disable_mmiotrace(void) clear_trace_list(); /* guarantees: no more kmmio callbacks */ leave_uniprocessor(); + kmmio_cleanup(); pr_info(NAME "disabled.\n"); out: mutex_unlock(&mmiotrace_mutex); diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index 3d1b7bd..97491f7 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -30,6 +30,8 @@ extern unsigned int kmmio_count; extern int register_kmmio_probe(struct kmmio_probe *p); extern void unregister_kmmio_probe(struct kmmio_probe *p); +extern int kmmio_init(void); +extern void kmmio_cleanup(void); #ifdef CONFIG_MMIOTRACE /* kmmio is active by some kmmio_probes? */ -- cgit v0.10.2 From fd0731944333db6e9e91b6954c6ef95f4b71ab04 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 29 Apr 2009 12:56:58 +0200 Subject: x86, vmlinux.lds: fix relocatable symbols __init_begin/_end symbols should be inside sections as well, otherwise the relocatable kernel gets confused when freeing init sections in the wrong place. [ Impact: fix bootup crash ] Cc: Sam Ravnborg Cc: Tim Abbott Cc: Linus Torvalds LKML-Reference: <20090429105056.GA28720@uranus.ravnborg.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 0bdbaa5..4c85b2e 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -255,8 +255,8 @@ SECTIONS /* Init code and data - will be freed after init */ . = ALIGN(PAGE_SIZE); - __init_begin = .; /* paired with __init_end */ .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { + __init_begin = .; /* paired with __init_end */ _sinittext = .; INIT_TEXT _einittext = .; @@ -346,8 +346,11 @@ SECTIONS #endif . = ALIGN(PAGE_SIZE); + /* freed after init ends here */ - __init_end = .; + .init.end : AT(ADDR(.init.end) - LOAD_OFFSET) { + __init_end = .; + } #ifdef CONFIG_X86_64 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { -- cgit v0.10.2 From 30e673b230f9d556eb81ef68a7b1a08c8b3b142c Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 28 Apr 2009 03:04:47 -0500 Subject: tracing/filters: move preds into event_filter object Create a new event_filter object, and move the pred-related members out of the call and subsystem objects and into the filter object - the details of the filter implementation don't need to be exposed in the call and subsystem in any case, and it will also help make the new parser implementation a little cleaner. [ Impact: refactor trace-filter code to prepare for new features ] Signed-off-by: Tom Zanussi Acked-by: Steven Rostedt Cc: fweisbec@gmail.com Cc: Li Zefan LKML-Reference: <1240905887.6416.119.camel@tropicana> Signed-off-by: Ingo Molnar diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 78a9ba2..46a27f2 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -101,8 +101,8 @@ struct ftrace_event_call { int (*show_format)(struct trace_seq *s); int (*define_fields)(void); struct list_head fields; - int n_preds; - struct filter_pred **preds; + int filter_active; + void *filter; void *mod; #ifdef CONFIG_EVENT_PROFILE diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 7d55bcf..1fb7d6c 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -731,12 +731,16 @@ struct ftrace_event_field { int size; }; +struct event_filter { + int n_preds; + struct filter_pred **preds; +}; + struct event_subsystem { struct list_head list; const char *name; struct dentry *entry; - int n_preds; - struct filter_pred **preds; + void *filter; }; struct filter_pred; @@ -774,7 +778,7 @@ filter_check_discard(struct ftrace_event_call *call, void *rec, struct ring_buffer *buffer, struct ring_buffer_event *event) { - if (unlikely(call->n_preds) && !filter_match_preds(call, rec)) { + if (unlikely(call->filter_active) && !filter_match_preds(call, rec)) { ring_buffer_discard_commit(buffer, event); return 1; } diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index be4d3a4..1cd1f37 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -757,8 +757,7 @@ event_subsystem_dir(const char *name, struct dentry *d_events) list_add(&system->list, &event_subsystems); - system->preds = NULL; - system->n_preds = 0; + system->filter = NULL; entry = debugfs_create_file("filter", 0644, system->entry, system, &ftrace_subsystem_filter_fops); diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 6541828..1e861ec 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -93,11 +93,12 @@ static int filter_pred_none(struct filter_pred *pred, void *event) /* return 1 if event matches, 0 otherwise (discard) */ int filter_match_preds(struct ftrace_event_call *call, void *rec) { + struct event_filter *filter = call->filter; int i, matched, and_failed = 0; struct filter_pred *pred; - for (i = 0; i < call->n_preds; i++) { - pred = call->preds[i]; + for (i = 0; i < filter->n_preds; i++) { + pred = filter->preds[i]; if (and_failed && !pred->or) continue; matched = pred->fn(pred, rec); @@ -115,20 +116,20 @@ int filter_match_preds(struct ftrace_event_call *call, void *rec) } EXPORT_SYMBOL_GPL(filter_match_preds); -static void __filter_print_preds(struct filter_pred **preds, int n_preds, +static void __filter_print_preds(struct event_filter *filter, struct trace_seq *s) { - char *field_name; struct filter_pred *pred; + char *field_name; int i; - if (!n_preds) { + if (!filter || !filter->n_preds) { trace_seq_printf(s, "none\n"); return; } - for (i = 0; i < n_preds; i++) { - pred = preds[i]; + for (i = 0; i < filter->n_preds; i++) { + pred = filter->preds[i]; field_name = pred->field_name; if (i) trace_seq_printf(s, pred->or ? "|| " : "&& "); @@ -144,7 +145,7 @@ static void __filter_print_preds(struct filter_pred **preds, int n_preds, void filter_print_preds(struct ftrace_event_call *call, struct trace_seq *s) { mutex_lock(&filter_mutex); - __filter_print_preds(call->preds, call->n_preds, s); + __filter_print_preds(call->filter, s); mutex_unlock(&filter_mutex); } @@ -152,7 +153,7 @@ void filter_print_subsystem_preds(struct event_subsystem *system, struct trace_seq *s) { mutex_lock(&filter_mutex); - __filter_print_preds(system->preds, system->n_preds, s); + __filter_print_preds(system->filter, s); mutex_unlock(&filter_mutex); } @@ -200,12 +201,14 @@ static int filter_set_pred(struct filter_pred *dest, static void __filter_disable_preds(struct ftrace_event_call *call) { + struct event_filter *filter = call->filter; int i; - call->n_preds = 0; + call->filter_active = 0; + filter->n_preds = 0; for (i = 0; i < MAX_FILTER_PRED; i++) - call->preds[i]->fn = filter_pred_none; + filter->preds[i]->fn = filter_pred_none; } void filter_disable_preds(struct ftrace_event_call *call) @@ -217,32 +220,39 @@ void filter_disable_preds(struct ftrace_event_call *call) int init_preds(struct ftrace_event_call *call) { + struct event_filter *filter; struct filter_pred *pred; int i; - call->n_preds = 0; - - call->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL); - if (!call->preds) + filter = call->filter = kzalloc(sizeof(*filter), GFP_KERNEL); + if (!call->filter) return -ENOMEM; + call->filter_active = 0; + filter->n_preds = 0; + + filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL); + if (!filter->preds) + goto oom; + for (i = 0; i < MAX_FILTER_PRED; i++) { pred = kzalloc(sizeof(*pred), GFP_KERNEL); if (!pred) goto oom; pred->fn = filter_pred_none; - call->preds[i] = pred; + filter->preds[i] = pred; } return 0; oom: for (i = 0; i < MAX_FILTER_PRED; i++) { - if (call->preds[i]) - filter_free_pred(call->preds[i]); + if (filter->preds[i]) + filter_free_pred(filter->preds[i]); } - kfree(call->preds); - call->preds = NULL; + kfree(filter->preds); + kfree(call->filter); + call->filter = NULL; return -ENOMEM; } @@ -250,15 +260,16 @@ EXPORT_SYMBOL_GPL(init_preds); static void __filter_free_subsystem_preds(struct event_subsystem *system) { + struct event_filter *filter = system->filter; struct ftrace_event_call *call; int i; - if (system->n_preds) { - for (i = 0; i < system->n_preds; i++) - filter_free_pred(system->preds[i]); - kfree(system->preds); - system->preds = NULL; - system->n_preds = 0; + if (filter && filter->n_preds) { + for (i = 0; i < filter->n_preds; i++) + filter_free_pred(filter->preds[i]); + kfree(filter->preds); + kfree(filter); + system->filter = NULL; } list_for_each_entry(call, &ftrace_events, list) { @@ -281,21 +292,23 @@ static int filter_add_pred_fn(struct ftrace_event_call *call, struct filter_pred *pred, filter_pred_fn_t fn) { + struct event_filter *filter = call->filter; int idx, err; - if (call->n_preds && !pred->compound) + if (filter->n_preds && !pred->compound) __filter_disable_preds(call); - if (call->n_preds == MAX_FILTER_PRED) + if (filter->n_preds == MAX_FILTER_PRED) return -ENOSPC; - idx = call->n_preds; - filter_clear_pred(call->preds[idx]); - err = filter_set_pred(call->preds[idx], pred, fn); + idx = filter->n_preds; + filter_clear_pred(filter->preds[idx]); + err = filter_set_pred(filter->preds[idx], pred, fn); if (err) return err; - call->n_preds++; + filter->n_preds++; + call->filter_active = 1; return 0; } @@ -366,29 +379,41 @@ int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred) int filter_add_subsystem_pred(struct event_subsystem *system, struct filter_pred *pred) { + struct event_filter *filter = system->filter; struct ftrace_event_call *call; mutex_lock(&filter_mutex); - if (system->n_preds && !pred->compound) + if (filter && filter->n_preds && !pred->compound) { __filter_free_subsystem_preds(system); + filter = NULL; + } - if (!system->n_preds) { - system->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), + if (!filter) { + system->filter = kzalloc(sizeof(*filter), GFP_KERNEL); + if (!system->filter) { + mutex_unlock(&filter_mutex); + return -ENOMEM; + } + filter = system->filter; + filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL); - if (!system->preds) { + + if (!filter->preds) { + kfree(system->filter); + system->filter = NULL; mutex_unlock(&filter_mutex); return -ENOMEM; } } - if (system->n_preds == MAX_FILTER_PRED) { + if (filter->n_preds == MAX_FILTER_PRED) { mutex_unlock(&filter_mutex); return -ENOSPC; } - system->preds[system->n_preds] = pred; - system->n_preds++; + filter->preds[filter->n_preds] = pred; + filter->n_preds++; list_for_each_entry(call, &ftrace_events, list) { int err; @@ -401,8 +426,8 @@ int filter_add_subsystem_pred(struct event_subsystem *system, err = __filter_add_pred(call, pred); if (err == -ENOMEM) { - system->preds[system->n_preds] = NULL; - system->n_preds--; + filter->preds[filter->n_preds] = NULL; + filter->n_preds--; mutex_unlock(&filter_mutex); return err; } -- cgit v0.10.2 From a118e4d1402f1349fe3d953493e4168a300a752d Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 28 Apr 2009 03:04:53 -0500 Subject: tracing/filters: distinguish between signed and unsigned fields The new filter comparison ops need to be able to distinguish between signed and unsigned field types, so add an is_signed flag/param to the event field struct/trace_define_fields(). Also define a simple macro, is_signed_type() to determine the signedness at compile time, used in the trace macros. If the is_signed_type() macro won't work with a specific type, a new slightly modified version of TRACE_FIELD() called TRACE_FIELD_SIGN(), allows the signedness to be set explicitly. [ Impact: extend trace-filter code for new feature ] Signed-off-by: Tom Zanussi Acked-by: Steven Rostedt Cc: fweisbec@gmail.com Cc: Li Zefan LKML-Reference: <1240905893.6416.120.camel@tropicana> Signed-off-by: Ingo Molnar diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 46a27f2..e61a740 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -122,8 +122,9 @@ extern int filter_current_check_discard(struct ftrace_event_call *call, struct ring_buffer_event *event); extern int trace_define_field(struct ftrace_event_call *call, char *type, - char *name, int offset, int size); + char *name, int offset, int size, int is_signed); +#define is_signed_type(type) (((type)(-1)) < 0) /* * The double __builtin_constant_p is because gcc will give us an error @@ -144,10 +145,10 @@ do { \ __trace_printk(ip, fmt, ##args); \ } while (0) -#define __common_field(type, item) \ +#define __common_field(type, item, is_signed) \ ret = trace_define_field(event_call, #type, "common_" #item, \ offsetof(typeof(field.ent), item), \ - sizeof(field.ent.item)); \ + sizeof(field.ent.item), is_signed); \ if (ret) \ return ret; diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 1e68114..edb02bc 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -225,7 +225,7 @@ ftrace_format_##call(struct trace_seq *s) \ #define __field(type, item) \ ret = trace_define_field(event_call, #type, #item, \ offsetof(typeof(field), item), \ - sizeof(field.item)); \ + sizeof(field.item), is_signed_type(type)); \ if (ret) \ return ret; @@ -234,7 +234,7 @@ ftrace_format_##call(struct trace_seq *s) \ BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ ret = trace_define_field(event_call, #type "[" #len "]", #item, \ offsetof(typeof(field), item), \ - sizeof(field.item)); \ + sizeof(field.item), 0); \ if (ret) \ return ret; @@ -242,7 +242,7 @@ ftrace_format_##call(struct trace_seq *s) \ #define __string(item, src) \ ret = trace_define_field(event_call, "__str_loc", #item, \ offsetof(typeof(field), __str_loc_##item), \ - sizeof(field.__str_loc_##item)); + sizeof(field.__str_loc_##item), 0); #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, func, print) \ @@ -253,11 +253,11 @@ ftrace_define_fields_##call(void) \ struct ftrace_event_call *event_call = &event_##call; \ int ret; \ \ - __common_field(int, type); \ - __common_field(unsigned char, flags); \ - __common_field(unsigned char, preempt_count); \ - __common_field(int, pid); \ - __common_field(int, tgid); \ + __common_field(int, type, 1); \ + __common_field(unsigned char, flags, 0); \ + __common_field(unsigned char, preempt_count, 0); \ + __common_field(int, pid, 1); \ + __common_field(int, tgid, 1); \ \ tstruct; \ \ diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 1fb7d6c..866d010 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -729,6 +729,7 @@ struct ftrace_event_field { char *type; int offset; int size; + int is_signed; }; struct event_filter { diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h index cfcecc4..5e32e37 100644 --- a/kernel/trace/trace_event_types.h +++ b/kernel/trace/trace_event_types.h @@ -141,8 +141,8 @@ TRACE_EVENT_FORMAT(hw_branch, TRACE_HW_BRANCHES, hw_branch_entry, ignore, TRACE_EVENT_FORMAT(power, TRACE_POWER, trace_power, ignore, TRACE_STRUCT( - TRACE_FIELD(ktime_t, state_data.stamp, stamp) - TRACE_FIELD(ktime_t, state_data.end, end) + TRACE_FIELD_SIGN(ktime_t, state_data.stamp, stamp, 1) + TRACE_FIELD_SIGN(ktime_t, state_data.end, end, 1) TRACE_FIELD(int, state_data.type, type) TRACE_FIELD(int, state_data.state, state) ), diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 1cd1f37..bbbea74 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -26,7 +26,7 @@ static DEFINE_MUTEX(event_mutex); LIST_HEAD(ftrace_events); int trace_define_field(struct ftrace_event_call *call, char *type, - char *name, int offset, int size) + char *name, int offset, int size, int is_signed) { struct ftrace_event_field *field; @@ -44,6 +44,7 @@ int trace_define_field(struct ftrace_event_call *call, char *type, field->offset = offset; field->size = size; + field->is_signed = is_signed; list_add(&field->link, &call->fields); return 0; diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 0cb1a14..d06cf89 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -50,6 +50,9 @@ extern void __bad_type_size(void); if (!ret) \ return 0; +#undef TRACE_FIELD_SIGN +#define TRACE_FIELD_SIGN(type, item, assign, is_signed) \ + TRACE_FIELD(type, item, assign) #undef TP_RAW_FMT #define TP_RAW_FMT(args...) args @@ -98,6 +101,10 @@ ftrace_format_##call(struct trace_seq *s) \ #define TRACE_FIELD(type, item, assign)\ entry->item = assign; +#undef TRACE_FIELD_SIGN +#define TRACE_FIELD_SIGN(type, item, assign, is_signed) \ + TRACE_FIELD(type, item, assign) + #undef TP_CMD #define TP_CMD(cmd...) cmd @@ -149,7 +156,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #define TRACE_FIELD(type, item, assign) \ ret = trace_define_field(event_call, #type, #item, \ offsetof(typeof(field), item), \ - sizeof(field.item)); \ + sizeof(field.item), is_signed_type(type)); \ if (ret) \ return ret; @@ -157,7 +164,15 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #define TRACE_FIELD_SPECIAL(type, item, len, cmd) \ ret = trace_define_field(event_call, #type "[" #len "]", #item, \ offsetof(typeof(field), item), \ - sizeof(field.item)); \ + sizeof(field.item), 0); \ + if (ret) \ + return ret; + +#undef TRACE_FIELD_SIGN +#define TRACE_FIELD_SIGN(type, item, assign, is_signed) \ + ret = trace_define_field(event_call, #type, #item, \ + offsetof(typeof(field), item), \ + sizeof(field.item), is_signed); \ if (ret) \ return ret; @@ -173,11 +188,11 @@ ftrace_define_fields_##call(void) \ struct args field; \ int ret; \ \ - __common_field(unsigned char, type); \ - __common_field(unsigned char, flags); \ - __common_field(unsigned char, preempt_count); \ - __common_field(int, pid); \ - __common_field(int, tgid); \ + __common_field(unsigned char, type, 0); \ + __common_field(unsigned char, flags, 0); \ + __common_field(unsigned char, preempt_count, 0); \ + __common_field(int, pid, 1); \ + __common_field(int, tgid, 1); \ \ tstruct; \ \ -- cgit v0.10.2 From 8b3725621074040d380664964ffbc40610aef8c6 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 28 Apr 2009 03:04:59 -0500 Subject: tracing/filters: a better event parser Replace the current event parser hack with a better one. Filters are no longer specified predicate by predicate, but all at once and can use parens and any of the following operators: numeric fields: ==, !=, <, <=, >, >= string fields: ==, != predicates can be combined with the logical operators: &&, || examples: "common_preempt_count > 4" > filter "((sig >= 10 && sig < 15) || sig == 17) && comm != bash" > filter If there was an error, the erroneous string along with an error message can be seen by looking at the filter e.g.: ((sig >= 10 && sig < 15) || dsig == 17) && comm != bash ^ parse_error: Field not found Currently the caret for an error always appears at the beginning of the filter; a real position should be used, but the error message should be useful even without it. To clear a filter, '0' can be written to the filter file. Filters can also be set or cleared for a complete subsystem by writing the same filter as would be written to an individual event to the filter file at the root of the subsytem. Note however, that if any event in the subsystem lacks a field specified in the filter being set, the set will fail and all filters in the subsytem are automatically cleared. This change from the previous version was made because using only the fields that happen to exist for a given event would most likely result in a meaningless filter. Because the logical operators are now implemented as predicates, the maximum number of predicates in a filter was increased from 8 to 16. [ Impact: add new, extended trace-filter implementation ] Signed-off-by: Tom Zanussi Acked-by: Steven Rostedt Cc: fweisbec@gmail.com Cc: Li Zefan LKML-Reference: <1240905899.6416.121.camel@tropicana> Signed-off-by: Ingo Molnar diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index e61a740..5fff40c 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -112,7 +112,7 @@ struct ftrace_event_call { #endif }; -#define MAX_FILTER_PRED 8 +#define MAX_FILTER_PRED 32 #define MAX_FILTER_STR_VAL 128 extern int init_preds(struct ftrace_event_call *call); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 866d010..7736fe8 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -735,6 +735,7 @@ struct ftrace_event_field { struct event_filter { int n_preds; struct filter_pred **preds; + char *filter_string; }; struct event_subsystem { @@ -746,7 +747,8 @@ struct event_subsystem { struct filter_pred; -typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event); +typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event, + int val1, int val2); struct filter_pred { filter_pred_fn_t fn; @@ -756,23 +758,18 @@ struct filter_pred { char *field_name; int offset; int not; - int or; - int compound; - int clear; + int op; + int pop_n; }; -extern void filter_free_pred(struct filter_pred *pred); -extern void filter_print_preds(struct ftrace_event_call *call, +extern void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s); -extern int filter_parse(char **pbuf, struct filter_pred *pred); -extern int filter_add_pred(struct ftrace_event_call *call, - struct filter_pred *pred); -extern void filter_disable_preds(struct ftrace_event_call *call); -extern void filter_free_subsystem_preds(struct event_subsystem *system); -extern void filter_print_subsystem_preds(struct event_subsystem *system, +extern int apply_event_filter(struct ftrace_event_call *call, + char *filter_string); +extern int apply_subsystem_event_filter(struct event_subsystem *system, + char *filter_string); +extern void print_subsystem_event_filter(struct event_subsystem *system, struct trace_seq *s); -extern int filter_add_subsystem_pred(struct event_subsystem *system, - struct filter_pred *pred); static inline int filter_check_discard(struct ftrace_event_call *call, void *rec, @@ -787,6 +784,47 @@ filter_check_discard(struct ftrace_event_call *call, void *rec, return 0; } +#define DEFINE_COMPARISON_PRED(type) \ +static int filter_pred_##type(struct filter_pred *pred, void *event, \ + int val1, int val2) \ +{ \ + type *addr = (type *)(event + pred->offset); \ + type val = (type)pred->val; \ + int match = 0; \ + \ + switch (pred->op) { \ + case OP_LT: \ + match = (*addr < val); \ + break; \ + case OP_LE: \ + match = (*addr <= val); \ + break; \ + case OP_GT: \ + match = (*addr > val); \ + break; \ + case OP_GE: \ + match = (*addr >= val); \ + break; \ + default: \ + break; \ + } \ + \ + return match; \ +} + +#define DEFINE_EQUALITY_PRED(size) \ +static int filter_pred_##size(struct filter_pred *pred, void *event, \ + int val1, int val2) \ +{ \ + u##size *addr = (u##size *)(event + pred->offset); \ + u##size val = (u##size)pred->val; \ + int match; \ + \ + match = (val == *addr) ^ pred->not; \ + \ + return match; \ +} + extern struct list_head ftrace_events; extern const char *__start___trace_bprintk_fmt[]; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index bbbea74..f789ca5 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -492,7 +492,7 @@ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt, trace_seq_init(s); - filter_print_preds(call, s); + print_event_filter(call, s); r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); kfree(s); @@ -505,40 +505,26 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct ftrace_event_call *call = filp->private_data; - char buf[64], *pbuf = buf; - struct filter_pred *pred; + char *buf; int err; - if (cnt >= sizeof(buf)) + if (cnt >= PAGE_SIZE) return -EINVAL; - if (copy_from_user(&buf, ubuf, cnt)) - return -EFAULT; - buf[cnt] = '\0'; - - pred = kzalloc(sizeof(*pred), GFP_KERNEL); - if (!pred) + buf = (char *)__get_free_page(GFP_TEMPORARY); + if (!buf) return -ENOMEM; - err = filter_parse(&pbuf, pred); - if (err < 0) { - filter_free_pred(pred); - return err; - } - - if (pred->clear) { - filter_disable_preds(call); - filter_free_pred(pred); - return cnt; + if (copy_from_user(buf, ubuf, cnt)) { + free_page((unsigned long) buf); + return -EFAULT; } + buf[cnt] = '\0'; - err = filter_add_pred(call, pred); - if (err < 0) { - filter_free_pred(pred); + err = apply_event_filter(call, buf); + free_page((unsigned long) buf); + if (err < 0) return err; - } - - filter_free_pred(pred); *ppos += cnt; @@ -562,7 +548,7 @@ subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt, trace_seq_init(s); - filter_print_subsystem_preds(system, s); + print_subsystem_event_filter(system, s); r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); kfree(s); @@ -575,38 +561,26 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { struct event_subsystem *system = filp->private_data; - char buf[64], *pbuf = buf; - struct filter_pred *pred; + char *buf; int err; - if (cnt >= sizeof(buf)) + if (cnt >= PAGE_SIZE) return -EINVAL; - if (copy_from_user(&buf, ubuf, cnt)) - return -EFAULT; - buf[cnt] = '\0'; - - pred = kzalloc(sizeof(*pred), GFP_KERNEL); - if (!pred) + buf = (char *)__get_free_page(GFP_TEMPORARY); + if (!buf) return -ENOMEM; - err = filter_parse(&pbuf, pred); - if (err < 0) { - filter_free_pred(pred); - return err; - } - - if (pred->clear) { - filter_free_subsystem_preds(system); - filter_free_pred(pred); - return cnt; + if (copy_from_user(buf, ubuf, cnt)) { + free_page((unsigned long) buf); + return -EFAULT; } + buf[cnt] = '\0'; - err = filter_add_subsystem_pred(system, pred); - if (err < 0) { - filter_free_pred(pred); + err = apply_subsystem_event_filter(system, buf); + free_page((unsigned long) buf); + if (err < 0) return err; - } *ppos += cnt; @@ -760,11 +734,21 @@ event_subsystem_dir(const char *name, struct dentry *d_events) system->filter = NULL; + system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL); + if (!system->filter) { + pr_warning("Could not allocate filter for subsystem " + "'%s'\n", name); + return system->entry; + } + entry = debugfs_create_file("filter", 0644, system->entry, system, &ftrace_subsystem_filter_fops); - if (!entry) + if (!entry) { + kfree(system->filter); + system->filter = NULL; pr_warning("Could not create debugfs " "'%s/filter' entry\n", name); + } return system->entry; } diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 1e861ec..f494866 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -29,51 +29,130 @@ static DEFINE_MUTEX(filter_mutex); -static int filter_pred_64(struct filter_pred *pred, void *event) +enum filter_op_ids { - u64 *addr = (u64 *)(event + pred->offset); - u64 val = (u64)pred->val; - int match; - - match = (val == *addr) ^ pred->not; - - return match; -} - -static int filter_pred_32(struct filter_pred *pred, void *event) -{ - u32 *addr = (u32 *)(event + pred->offset); - u32 val = (u32)pred->val; - int match; - - match = (val == *addr) ^ pred->not; - - return match; -} - -static int filter_pred_16(struct filter_pred *pred, void *event) + OP_OR, + OP_AND, + OP_NE, + OP_EQ, + OP_LT, + OP_LE, + OP_GT, + OP_GE, + OP_NONE, + OP_OPEN_PAREN, +}; + +struct filter_op { + int id; + char *string; + int precedence; +}; + +static struct filter_op filter_ops[] = { + { OP_OR, "||", 1 }, + { OP_AND, "&&", 2 }, + { OP_NE, "!=", 4 }, + { OP_EQ, "==", 4 }, + { OP_LT, "<", 5 }, + { OP_LE, "<=", 5 }, + { OP_GT, ">", 5 }, + { OP_GE, ">=", 5 }, + { OP_NONE, "OP_NONE", 0 }, + { OP_OPEN_PAREN, "(", 0 }, +}; + +enum { + FILT_ERR_NONE, + FILT_ERR_INVALID_OP, + FILT_ERR_UNBALANCED_PAREN, + FILT_ERR_TOO_MANY_OPERANDS, + FILT_ERR_OPERAND_TOO_LONG, + FILT_ERR_FIELD_NOT_FOUND, + FILT_ERR_ILLEGAL_FIELD_OP, + FILT_ERR_ILLEGAL_INTVAL, + FILT_ERR_BAD_SUBSYS_FILTER, + FILT_ERR_TOO_MANY_PREDS, + FILT_ERR_MISSING_FIELD, + FILT_ERR_INVALID_FILTER, +}; + +static char *err_text[] = { + "No error", + "Invalid operator", + "Unbalanced parens", + "Too many operands", + "Operand too long", + "Field not found", + "Illegal operation for field type", + "Illegal integer value", + "Couldn't find or set field in one of a subsystem's events", + "Too many terms in predicate expression", + "Missing field name and/or value", + "Meaningless filter expression", +}; + +struct opstack_op { + int op; + struct list_head list; +}; + +struct postfix_elt { + int op; + char *operand; + struct list_head list; +}; + +struct filter_parse_state { + struct filter_op *ops; + struct list_head opstack; + struct list_head postfix; + int lasterr; + int lasterr_pos; + + struct { + char *string; + unsigned int cnt; + unsigned int tail; + } infix; + + struct { + char string[MAX_FILTER_STR_VAL]; + int pos; + unsigned int tail; + } operand; +}; + +DEFINE_COMPARISON_PRED(s64); +DEFINE_COMPARISON_PRED(u64); +DEFINE_COMPARISON_PRED(s32); +DEFINE_COMPARISON_PRED(u32); +DEFINE_COMPARISON_PRED(s16); +DEFINE_COMPARISON_PRED(u16); +DEFINE_COMPARISON_PRED(s8); +DEFINE_COMPARISON_PRED(u8); + +DEFINE_EQUALITY_PRED(64); +DEFINE_EQUALITY_PRED(32); +DEFINE_EQUALITY_PRED(16); +DEFINE_EQUALITY_PRED(8); + +static int filter_pred_and(struct filter_pred *pred __attribute((unused)), + void *event __attribute((unused)), + int val1, int val2) { - u16 *addr = (u16 *)(event + pred->offset); - u16 val = (u16)pred->val; - int match; - - match = (val == *addr) ^ pred->not; - - return match; + return val1 && val2; } -static int filter_pred_8(struct filter_pred *pred, void *event) +static int filter_pred_or(struct filter_pred *pred __attribute((unused)), + void *event __attribute((unused)), + int val1, int val2) { - u8 *addr = (u8 *)(event + pred->offset); - u8 val = (u8)pred->val; - int match; - - match = (val == *addr) ^ pred->not; - - return match; + return val1 || val2; } -static int filter_pred_string(struct filter_pred *pred, void *event) +static int filter_pred_string(struct filter_pred *pred, void *event, + int val1, int val2) { char *addr = (char *)(event + pred->offset); int cmp, match; @@ -85,7 +164,8 @@ static int filter_pred_string(struct filter_pred *pred, void *event) return match; } -static int filter_pred_none(struct filter_pred *pred, void *event) +static int filter_pred_none(struct filter_pred *pred, void *event, + int val1, int val2) { return 0; } @@ -94,66 +174,119 @@ static int filter_pred_none(struct filter_pred *pred, void *event) int filter_match_preds(struct ftrace_event_call *call, void *rec) { struct event_filter *filter = call->filter; - int i, matched, and_failed = 0; + int match, top = 0, val1 = 0, val2 = 0; + int stack[MAX_FILTER_PRED]; struct filter_pred *pred; + int i; for (i = 0; i < filter->n_preds; i++) { pred = filter->preds[i]; - if (and_failed && !pred->or) + if (!pred->pop_n) { + match = pred->fn(pred, rec, val1, val2); + stack[top++] = match; continue; - matched = pred->fn(pred, rec); - if (!matched && !pred->or) { - and_failed = 1; - continue; - } else if (matched && pred->or) - return 1; + } + if (pred->pop_n > top) { + WARN_ON_ONCE(1); + return 0; + } + val1 = stack[--top]; + val2 = stack[--top]; + match = pred->fn(pred, rec, val1, val2); + stack[top++] = match; } - if (and_failed) - return 0; - - return 1; + return stack[--top]; } EXPORT_SYMBOL_GPL(filter_match_preds); -static void __filter_print_preds(struct event_filter *filter, - struct trace_seq *s) +static void parse_error(struct filter_parse_state *ps, int err, int pos) { - struct filter_pred *pred; - char *field_name; - int i; + ps->lasterr = err; + ps->lasterr_pos = pos; +} - if (!filter || !filter->n_preds) { - trace_seq_printf(s, "none\n"); +static void remove_filter_string(struct event_filter *filter) +{ + kfree(filter->filter_string); + filter->filter_string = NULL; +} + +static int replace_filter_string(struct event_filter *filter, + char *filter_string) +{ + kfree(filter->filter_string); + filter->filter_string = kstrdup(filter_string, GFP_KERNEL); + if (!filter->filter_string) + return -ENOMEM; + + return 0; +} + +static int append_filter_string(struct event_filter *filter, + char *string) +{ + int newlen; + char *new_filter_string; + + BUG_ON(!filter->filter_string); + newlen = strlen(filter->filter_string) + strlen(string) + 1; + new_filter_string = kmalloc(newlen, GFP_KERNEL); + if (!new_filter_string) + return -ENOMEM; + + strcpy(new_filter_string, filter->filter_string); + strcat(new_filter_string, string); + kfree(filter->filter_string); + filter->filter_string = new_filter_string; + + return 0; +} + +static void append_filter_err(struct filter_parse_state *ps, + struct event_filter *filter) +{ + int pos = ps->lasterr_pos; + char *buf, *pbuf; + + buf = (char *)__get_free_page(GFP_TEMPORARY); + if (!buf) return; - } - for (i = 0; i < filter->n_preds; i++) { - pred = filter->preds[i]; - field_name = pred->field_name; - if (i) - trace_seq_printf(s, pred->or ? "|| " : "&& "); - trace_seq_printf(s, "%s ", field_name); - trace_seq_printf(s, pred->not ? "!= " : "== "); - if (pred->str_len) - trace_seq_printf(s, "%s\n", pred->str_val); - else - trace_seq_printf(s, "%llu\n", pred->val); - } + append_filter_string(filter, "\n"); + memset(buf, ' ', PAGE_SIZE); + if (pos > PAGE_SIZE - 128) + pos = 0; + buf[pos] = '^'; + pbuf = &buf[pos] + 1; + + sprintf(pbuf, "\nparse_error: %s\n", err_text[ps->lasterr]); + append_filter_string(filter, buf); + free_page((unsigned long) buf); } -void filter_print_preds(struct ftrace_event_call *call, struct trace_seq *s) +void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s) { + struct event_filter *filter = call->filter; + mutex_lock(&filter_mutex); - __filter_print_preds(call->filter, s); + if (filter->filter_string) + trace_seq_printf(s, "%s\n", filter->filter_string); + else + trace_seq_printf(s, "none\n"); mutex_unlock(&filter_mutex); } -void filter_print_subsystem_preds(struct event_subsystem *system, +void print_subsystem_event_filter(struct event_subsystem *system, struct trace_seq *s) { + struct event_filter *filter = system->filter; + mutex_lock(&filter_mutex); - __filter_print_preds(system->filter, s); + if (filter->filter_string) + trace_seq_printf(s, "%s\n", filter->filter_string); + else + trace_seq_printf(s, "none\n"); mutex_unlock(&filter_mutex); } @@ -170,7 +303,7 @@ find_event_field(struct ftrace_event_call *call, char *name) return NULL; } -void filter_free_pred(struct filter_pred *pred) +static void filter_free_pred(struct filter_pred *pred) { if (!pred) return; @@ -191,15 +324,17 @@ static int filter_set_pred(struct filter_pred *dest, filter_pred_fn_t fn) { *dest = *src; - dest->field_name = kstrdup(src->field_name, GFP_KERNEL); - if (!dest->field_name) - return -ENOMEM; + if (src->field_name) { + dest->field_name = kstrdup(src->field_name, GFP_KERNEL); + if (!dest->field_name) + return -ENOMEM; + } dest->fn = fn; return 0; } -static void __filter_disable_preds(struct ftrace_event_call *call) +static void filter_disable_preds(struct ftrace_event_call *call) { struct event_filter *filter = call->filter; int i; @@ -211,13 +346,6 @@ static void __filter_disable_preds(struct ftrace_event_call *call) filter->preds[i]->fn = filter_pred_none; } -void filter_disable_preds(struct ftrace_event_call *call) -{ - mutex_lock(&filter_mutex); - __filter_disable_preds(call); - mutex_unlock(&filter_mutex); -} - int init_preds(struct ftrace_event_call *call) { struct event_filter *filter; @@ -258,48 +386,43 @@ oom: } EXPORT_SYMBOL_GPL(init_preds); -static void __filter_free_subsystem_preds(struct event_subsystem *system) +static void filter_free_subsystem_preds(struct event_subsystem *system) { struct event_filter *filter = system->filter; struct ftrace_event_call *call; int i; - if (filter && filter->n_preds) { + if (filter->n_preds) { for (i = 0; i < filter->n_preds; i++) filter_free_pred(filter->preds[i]); kfree(filter->preds); - kfree(filter); - system->filter = NULL; + filter->preds = NULL; + filter->n_preds = 0; } list_for_each_entry(call, &ftrace_events, list) { if (!call->define_fields) continue; - if (!strcmp(call->system, system->name)) - __filter_disable_preds(call); + if (!strcmp(call->system, system->name)) { + filter_disable_preds(call); + remove_filter_string(call->filter); + } } } -void filter_free_subsystem_preds(struct event_subsystem *system) -{ - mutex_lock(&filter_mutex); - __filter_free_subsystem_preds(system); - mutex_unlock(&filter_mutex); -} - -static int filter_add_pred_fn(struct ftrace_event_call *call, +static int filter_add_pred_fn(struct filter_parse_state *ps, + struct ftrace_event_call *call, struct filter_pred *pred, filter_pred_fn_t fn) { struct event_filter *filter = call->filter; int idx, err; - if (filter->n_preds && !pred->compound) - __filter_disable_preds(call); - - if (filter->n_preds == MAX_FILTER_PRED) + if (filter->n_preds == MAX_FILTER_PRED) { + parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0); return -ENOSPC; + } idx = filter->n_preds; filter_clear_pred(filter->preds[idx]); @@ -321,94 +444,132 @@ static int is_string_field(const char *type) return 0; } -static int __filter_add_pred(struct ftrace_event_call *call, - struct filter_pred *pred) +static int is_legal_op(struct ftrace_event_field *field, int op) +{ + if (is_string_field(field->type) && (op != OP_EQ && op != OP_NE)) + return 0; + + return 1; +} + +static filter_pred_fn_t select_comparison_fn(int op, int field_size, + int field_is_signed) +{ + filter_pred_fn_t fn = NULL; + + switch (field_size) { + case 8: + if (op == OP_EQ || op == OP_NE) + fn = filter_pred_64; + else if (field_is_signed) + fn = filter_pred_s64; + else + fn = filter_pred_u64; + break; + case 4: + if (op == OP_EQ || op == OP_NE) + fn = filter_pred_32; + else if (field_is_signed) + fn = filter_pred_s32; + else + fn = filter_pred_u32; + break; + case 2: + if (op == OP_EQ || op == OP_NE) + fn = filter_pred_16; + else if (field_is_signed) + fn = filter_pred_s16; + else + fn = filter_pred_u16; + break; + case 1: + if (op == OP_EQ || op == OP_NE) + fn = filter_pred_8; + else if (field_is_signed) + fn = filter_pred_s8; + else + fn = filter_pred_u8; + break; + } + + return fn; +} + +static int filter_add_pred(struct filter_parse_state *ps, + struct ftrace_event_call *call, + struct filter_pred *pred) { struct ftrace_event_field *field; filter_pred_fn_t fn; unsigned long long val; + pred->fn = filter_pred_none; + + if (pred->op == OP_AND) { + pred->pop_n = 2; + return filter_add_pred_fn(ps, call, pred, filter_pred_and); + } else if (pred->op == OP_OR) { + pred->pop_n = 2; + return filter_add_pred_fn(ps, call, pred, filter_pred_or); + } + field = find_event_field(call, pred->field_name); - if (!field) + if (!field) { + parse_error(ps, FILT_ERR_FIELD_NOT_FOUND, 0); return -EINVAL; + } - pred->fn = filter_pred_none; pred->offset = field->offset; + if (!is_legal_op(field, pred->op)) { + parse_error(ps, FILT_ERR_ILLEGAL_FIELD_OP, 0); + return -EINVAL; + } + if (is_string_field(field->type)) { fn = filter_pred_string; pred->str_len = field->size; - return filter_add_pred_fn(call, pred, fn); + if (pred->op == OP_NE) + pred->not = 1; + return filter_add_pred_fn(ps, call, pred, fn); } else { - if (strict_strtoull(pred->str_val, 0, &val)) + if (strict_strtoull(pred->str_val, 0, &val)) { + parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0); return -EINVAL; + } pred->val = val; } - switch (field->size) { - case 8: - fn = filter_pred_64; - break; - case 4: - fn = filter_pred_32; - break; - case 2: - fn = filter_pred_16; - break; - case 1: - fn = filter_pred_8; - break; - default: + fn = select_comparison_fn(pred->op, field->size, field->is_signed); + if (!fn) { + parse_error(ps, FILT_ERR_INVALID_OP, 0); return -EINVAL; } - return filter_add_pred_fn(call, pred, fn); -} - -int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred) -{ - int err; - - mutex_lock(&filter_mutex); - err = __filter_add_pred(call, pred); - mutex_unlock(&filter_mutex); + if (pred->op == OP_NE) + pred->not = 1; - return err; + return filter_add_pred_fn(ps, call, pred, fn); } -int filter_add_subsystem_pred(struct event_subsystem *system, - struct filter_pred *pred) +static int filter_add_subsystem_pred(struct filter_parse_state *ps, + struct event_subsystem *system, + struct filter_pred *pred, + char *filter_string) { struct event_filter *filter = system->filter; struct ftrace_event_call *call; - mutex_lock(&filter_mutex); - - if (filter && filter->n_preds && !pred->compound) { - __filter_free_subsystem_preds(system); - filter = NULL; - } - - if (!filter) { - system->filter = kzalloc(sizeof(*filter), GFP_KERNEL); - if (!system->filter) { - mutex_unlock(&filter_mutex); - return -ENOMEM; - } - filter = system->filter; + if (!filter->preds) { filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL); - if (!filter->preds) { - kfree(system->filter); - system->filter = NULL; - mutex_unlock(&filter_mutex); + if (!filter->preds) return -ENOMEM; - } } if (filter->n_preds == MAX_FILTER_PRED) { - mutex_unlock(&filter_mutex); + parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0); return -ENOSPC; } @@ -424,97 +585,508 @@ int filter_add_subsystem_pred(struct event_subsystem *system, if (strcmp(call->system, system->name)) continue; - err = __filter_add_pred(call, pred); - if (err == -ENOMEM) { - filter->preds[filter->n_preds] = NULL; - filter->n_preds--; - mutex_unlock(&filter_mutex); + err = filter_add_pred(ps, call, pred); + if (err) { + filter_free_subsystem_preds(system); + parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0); return err; } + replace_filter_string(call->filter, filter_string); } - mutex_unlock(&filter_mutex); + return 0; +} + +static void parse_init(struct filter_parse_state *ps, + struct filter_op *ops, + char *infix_string) +{ + memset(ps, '\0', sizeof(*ps)); + + ps->infix.string = infix_string; + ps->infix.cnt = strlen(infix_string); + ps->ops = ops; + + INIT_LIST_HEAD(&ps->opstack); + INIT_LIST_HEAD(&ps->postfix); +} + +static char infix_next(struct filter_parse_state *ps) +{ + ps->infix.cnt--; + + return ps->infix.string[ps->infix.tail++]; +} + +static char infix_peek(struct filter_parse_state *ps) +{ + if (ps->infix.tail == strlen(ps->infix.string)) + return 0; + + return ps->infix.string[ps->infix.tail]; +} + +static void infix_advance(struct filter_parse_state *ps) +{ + ps->infix.cnt--; + ps->infix.tail++; +} + +static inline int is_precedence_lower(struct filter_parse_state *ps, + int a, int b) +{ + return ps->ops[a].precedence < ps->ops[b].precedence; +} + +static inline int is_op_char(struct filter_parse_state *ps, char c) +{ + int i; + + for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) { + if (ps->ops[i].string[0] == c) + return 1; + } return 0; } -/* - * The filter format can be - * - 0, which means remove all filter preds - * - [||/&&] ==/!= - */ -int filter_parse(char **pbuf, struct filter_pred *pred) -{ - char *tok, *val_str = NULL; - int tok_n = 0; - - while ((tok = strsep(pbuf, " \n"))) { - if (tok_n == 0) { - if (!strcmp(tok, "0")) { - pred->clear = 1; - return 0; - } else if (!strcmp(tok, "&&")) { - pred->or = 0; - pred->compound = 1; - } else if (!strcmp(tok, "||")) { - pred->or = 1; - pred->compound = 1; - } else - pred->field_name = tok; - tok_n = 1; - continue; +static int infix_get_op(struct filter_parse_state *ps, char firstc) +{ + char nextc = infix_peek(ps); + char opstr[3]; + int i; + + opstr[0] = firstc; + opstr[1] = nextc; + opstr[2] = '\0'; + + for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) { + if (!strcmp(opstr, ps->ops[i].string)) { + infix_advance(ps); + return ps->ops[i].id; } - if (tok_n == 1) { - if (!pred->field_name) - pred->field_name = tok; - else if (!strcmp(tok, "!=")) - pred->not = 1; - else if (!strcmp(tok, "==")) - pred->not = 0; - else { - pred->field_name = NULL; + } + + opstr[1] = '\0'; + + for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) { + if (!strcmp(opstr, ps->ops[i].string)) + return ps->ops[i].id; + } + + return OP_NONE; +} + +static inline void clear_operand_string(struct filter_parse_state *ps) +{ + memset(ps->operand.string, '\0', MAX_FILTER_STR_VAL); + ps->operand.tail = 0; +} + +static inline int append_operand_char(struct filter_parse_state *ps, char c) +{ + if (ps->operand.tail == MAX_FILTER_STR_VAL) + return -EINVAL; + + ps->operand.string[ps->operand.tail++] = c; + + return 0; +} + +static int filter_opstack_push(struct filter_parse_state *ps, int op) +{ + struct opstack_op *opstack_op; + + opstack_op = kmalloc(sizeof(*opstack_op), GFP_KERNEL); + if (!opstack_op) + return -ENOMEM; + + opstack_op->op = op; + list_add(&opstack_op->list, &ps->opstack); + + return 0; +} + +static int filter_opstack_empty(struct filter_parse_state *ps) +{ + return list_empty(&ps->opstack); +} + +static int filter_opstack_top(struct filter_parse_state *ps) +{ + struct opstack_op *opstack_op; + + if (filter_opstack_empty(ps)) + return OP_NONE; + + opstack_op = list_first_entry(&ps->opstack, struct opstack_op, list); + + return opstack_op->op; +} + +static int filter_opstack_pop(struct filter_parse_state *ps) +{ + struct opstack_op *opstack_op; + int op; + + if (filter_opstack_empty(ps)) + return OP_NONE; + + opstack_op = list_first_entry(&ps->opstack, struct opstack_op, list); + op = opstack_op->op; + list_del(&opstack_op->list); + + kfree(opstack_op); + + return op; +} + +static void filter_opstack_clear(struct filter_parse_state *ps) +{ + while (!filter_opstack_empty(ps)) + filter_opstack_pop(ps); +} + +static char *curr_operand(struct filter_parse_state *ps) +{ + return ps->operand.string; +} + +static int postfix_append_operand(struct filter_parse_state *ps, char *operand) +{ + struct postfix_elt *elt; + + elt = kmalloc(sizeof(*elt), GFP_KERNEL); + if (!elt) + return -ENOMEM; + + elt->op = OP_NONE; + elt->operand = kstrdup(operand, GFP_KERNEL); + if (!elt->operand) { + kfree(elt); + return -ENOMEM; + } + + list_add_tail(&elt->list, &ps->postfix); + + return 0; +} + +static int postfix_append_op(struct filter_parse_state *ps, int op) +{ + struct postfix_elt *elt; + + elt = kmalloc(sizeof(*elt), GFP_KERNEL); + if (!elt) + return -ENOMEM; + + elt->op = op; + elt->operand = NULL; + + list_add_tail(&elt->list, &ps->postfix); + + return 0; +} + +static void postfix_clear(struct filter_parse_state *ps) +{ + struct postfix_elt *elt; + + while (!list_empty(&ps->postfix)) { + elt = list_first_entry(&ps->postfix, struct postfix_elt, list); + kfree(elt->operand); + list_del(&elt->list); + } +} + +static int filter_parse(struct filter_parse_state *ps) +{ + int op, top_op; + char ch; + + while ((ch = infix_next(ps))) { + if (isspace(ch)) + continue; + + if (is_op_char(ps, ch)) { + op = infix_get_op(ps, ch); + if (op == OP_NONE) { + parse_error(ps, FILT_ERR_INVALID_OP, 0); return -EINVAL; } - tok_n = 2; + + if (strlen(curr_operand(ps))) { + postfix_append_operand(ps, curr_operand(ps)); + clear_operand_string(ps); + } + + while (!filter_opstack_empty(ps)) { + top_op = filter_opstack_top(ps); + if (!is_precedence_lower(ps, top_op, op)) { + top_op = filter_opstack_pop(ps); + postfix_append_op(ps, top_op); + continue; + } + break; + } + + filter_opstack_push(ps, op); continue; } - if (tok_n == 2) { - if (pred->compound) { - if (!strcmp(tok, "!=")) - pred->not = 1; - else if (!strcmp(tok, "==")) - pred->not = 0; - else { - pred->field_name = NULL; - return -EINVAL; - } - } else { - val_str = tok; - break; /* done */ + + if (ch == '(') { + filter_opstack_push(ps, OP_OPEN_PAREN); + continue; + } + + if (ch == ')') { + if (strlen(curr_operand(ps))) { + postfix_append_operand(ps, curr_operand(ps)); + clear_operand_string(ps); + } + + top_op = filter_opstack_pop(ps); + while (top_op != OP_NONE) { + if (top_op == OP_OPEN_PAREN) + break; + postfix_append_op(ps, top_op); + top_op = filter_opstack_pop(ps); + } + if (top_op == OP_NONE) { + parse_error(ps, FILT_ERR_UNBALANCED_PAREN, 0); + return -EINVAL; } - tok_n = 3; continue; } - if (tok_n == 3) { - val_str = tok; - break; /* done */ + if (append_operand_char(ps, ch)) { + parse_error(ps, FILT_ERR_OPERAND_TOO_LONG, 0); + return -EINVAL; + } + } + + if (strlen(curr_operand(ps))) + postfix_append_operand(ps, curr_operand(ps)); + + while (!filter_opstack_empty(ps)) { + top_op = filter_opstack_pop(ps); + if (top_op == OP_NONE) + break; + if (top_op == OP_OPEN_PAREN) { + parse_error(ps, FILT_ERR_UNBALANCED_PAREN, 0); + return -EINVAL; + } + postfix_append_op(ps, top_op); + } + + return 0; +} + +static struct filter_pred *create_pred(int op, char *operand1, char *operand2) +{ + struct filter_pred *pred; + + pred = kzalloc(sizeof(*pred), GFP_KERNEL); + if (!pred) + return NULL; + + pred->field_name = kstrdup(operand1, GFP_KERNEL); + if (!pred->field_name) { + kfree(pred); + return NULL; + } + + strcpy(pred->str_val, operand2); + pred->str_len = strlen(operand2); + + pred->op = op; + + return pred; +} + +static struct filter_pred *create_logical_pred(int op) +{ + struct filter_pred *pred; + + pred = kzalloc(sizeof(*pred), GFP_KERNEL); + if (!pred) + return NULL; + + pred->op = op; + + return pred; +} + +static int check_preds(struct filter_parse_state *ps) +{ + int n_normal_preds = 0, n_logical_preds = 0; + struct postfix_elt *elt; + + list_for_each_entry(elt, &ps->postfix, list) { + if (elt->op == OP_NONE) + continue; + + if (elt->op == OP_AND || elt->op == OP_OR) { + n_logical_preds++; + continue; } + n_normal_preds++; } - if (!val_str || !strlen(val_str) - || strlen(val_str) >= MAX_FILTER_STR_VAL) { - pred->field_name = NULL; + if (!n_normal_preds || n_logical_preds >= n_normal_preds) { + parse_error(ps, FILT_ERR_INVALID_FILTER, 0); return -EINVAL; } - strcpy(pred->str_val, val_str); - pred->str_len = strlen(val_str); + return 0; +} - pred->field_name = kstrdup(pred->field_name, GFP_KERNEL); - if (!pred->field_name) - return -ENOMEM; +static int replace_preds(struct event_subsystem *system, + struct ftrace_event_call *call, + struct filter_parse_state *ps, + char *filter_string) +{ + char *operand1 = NULL, *operand2 = NULL; + struct filter_pred *pred; + struct postfix_elt *elt; + int err; + + err = check_preds(ps); + if (err) + return err; + + list_for_each_entry(elt, &ps->postfix, list) { + if (elt->op == OP_NONE) { + if (!operand1) + operand1 = elt->operand; + else if (!operand2) + operand2 = elt->operand; + else { + parse_error(ps, FILT_ERR_TOO_MANY_OPERANDS, 0); + return -EINVAL; + } + continue; + } + + if (elt->op == OP_AND || elt->op == OP_OR) { + pred = create_logical_pred(elt->op); + if (call) { + err = filter_add_pred(ps, call, pred); + filter_free_pred(pred); + } else + err = filter_add_subsystem_pred(ps, system, + pred, filter_string); + if (err) + return err; + + operand1 = operand2 = NULL; + continue; + } + + if (!operand1 || !operand2) { + parse_error(ps, FILT_ERR_MISSING_FIELD, 0); + return -EINVAL; + } + + pred = create_pred(elt->op, operand1, operand2); + if (call) { + err = filter_add_pred(ps, call, pred); + filter_free_pred(pred); + } else + err = filter_add_subsystem_pred(ps, system, pred, + filter_string); + if (err) + return err; + + operand1 = operand2 = NULL; + } return 0; } +int apply_event_filter(struct ftrace_event_call *call, char *filter_string) +{ + int err; + + struct filter_parse_state *ps; + + mutex_lock(&filter_mutex); + + if (!strcmp(strstrip(filter_string), "0")) { + filter_disable_preds(call); + remove_filter_string(call->filter); + mutex_unlock(&filter_mutex); + return 0; + } + + ps = kzalloc(sizeof(*ps), GFP_KERNEL); + if (!ps) + return -ENOMEM; + + filter_disable_preds(call); + replace_filter_string(call->filter, filter_string); + + parse_init(ps, filter_ops, filter_string); + err = filter_parse(ps); + if (err) { + append_filter_err(ps, call->filter); + goto out; + } + + err = replace_preds(NULL, call, ps, filter_string); + if (err) + append_filter_err(ps, call->filter); + +out: + filter_opstack_clear(ps); + postfix_clear(ps); + kfree(ps); + + mutex_unlock(&filter_mutex); + + return err; +} + +int apply_subsystem_event_filter(struct event_subsystem *system, + char *filter_string) +{ + int err; + + struct filter_parse_state *ps; + + mutex_lock(&filter_mutex); + + if (!strcmp(strstrip(filter_string), "0")) { + filter_free_subsystem_preds(system); + remove_filter_string(system->filter); + mutex_unlock(&filter_mutex); + return 0; + } + + ps = kzalloc(sizeof(*ps), GFP_KERNEL); + if (!ps) + return -ENOMEM; + + filter_free_subsystem_preds(system); + replace_filter_string(system->filter, filter_string); + + parse_init(ps, filter_ops, filter_string); + err = filter_parse(ps); + if (err) { + append_filter_err(ps, system->filter); + goto out; + } + + err = replace_preds(system, NULL, ps, filter_string); + if (err) + append_filter_err(ps, system->filter); + +out: + filter_opstack_clear(ps); + postfix_clear(ps); + kfree(ps); + + mutex_unlock(&filter_mutex); + + return err; +} -- cgit v0.10.2 From a0e39ed378fb6ba916522764cd508fa7d42ad495 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 29 Apr 2009 13:51:39 +0200 Subject: tracing: fix build failure on s390 "tracing: create automated trace defines" causes this compile error on s390, as reported by Sachin Sant against linux-next: kernel/built-in.o: In function `__do_softirq': (.text+0x1c680): undefined reference to `__tracepoint_softirq_entry' This happens because the definitions of the softirq tracepoints were moved from kernel/softirq.c to kernel/irq/handle.c. Since s390 doesn't support generic hardirqs handle.c doesn't get compiled and the definitions are missing. So move the tracepoints to softirq.c again. [ Impact: fix build failure on s390 ] Reported-by: Sachin Sant Signed-off-by: Heiko Carstens Cc: Steven Rostedt Cc: fweisbec@gmail.com LKML-Reference: <20090429135139.5fac79b8@osiris.boeblingen.de.ibm.com> Signed-off-by: Ingo Molnar diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 37c6363..e68bb5a 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -18,8 +18,6 @@ #include #include #include - -#define CREATE_TRACE_POINTS #include #include "internals.h" diff --git a/kernel/softirq.c b/kernel/softirq.c index 7ab9dfd..d4ba347 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -24,6 +24,8 @@ #include #include #include + +#define CREATE_TRACE_POINTS #include #include -- cgit v0.10.2 From 50fa610a3b6ba7cf91d7a92229177dfaff2b81a1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 28 Apr 2009 15:01:38 +0100 Subject: sched: Document memory barriers implied by sleep/wake-up primitives Add a section to the memory barriers document to note the implied memory barriers of sleep primitives (set_current_state() and wrappers) and wake-up primitives (wake_up() and co.). Also extend the in-code comments on the wake_up() functions to note these implied barriers. [ Impact: add documentation ] Signed-off-by: David Howells Cc: Oleg Nesterov Cc: Linus Torvalds Cc: Andrew Morton LKML-Reference: <20090428140138.1192.94723.stgit@warthog.procyon.org.uk> Signed-off-by: Ingo Molnar diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index f5b7127..7f5809e 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -31,6 +31,7 @@ Contents: - Locking functions. - Interrupt disabling functions. + - Sleep and wake-up functions. - Miscellaneous functions. (*) Inter-CPU locking barrier effects. @@ -1217,6 +1218,132 @@ barriers are required in such a situation, they must be provided from some other means. +SLEEP AND WAKE-UP FUNCTIONS +--------------------------- + +Sleeping and waking on an event flagged in global data can be viewed as an +interaction between two pieces of data: the task state of the task waiting for +the event and the global data used to indicate the event. To make sure that +these appear to happen in the right order, the primitives to begin the process +of going to sleep, and the primitives to initiate a wake up imply certain +barriers. + +Firstly, the sleeper normally follows something like this sequence of events: + + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (event_indicated) + break; + schedule(); + } + +A general memory barrier is interpolated automatically by set_current_state() +after it has altered the task state: + + CPU 1 + =============================== + set_current_state(); + set_mb(); + STORE current->state + + LOAD event_indicated + +set_current_state() may be wrapped by: + + prepare_to_wait(); + prepare_to_wait_exclusive(); + +which therefore also imply a general memory barrier after setting the state. +The whole sequence above is available in various canned forms, all of which +interpolate the memory barrier in the right place: + + wait_event(); + wait_event_interruptible(); + wait_event_interruptible_exclusive(); + wait_event_interruptible_timeout(); + wait_event_killable(); + wait_event_timeout(); + wait_on_bit(); + wait_on_bit_lock(); + + +Secondly, code that performs a wake up normally follows something like this: + + event_indicated = 1; + wake_up(&event_wait_queue); + +or: + + event_indicated = 1; + wake_up_process(event_daemon); + +A write memory barrier is implied by wake_up() and co. if and only if they wake +something up. The barrier occurs before the task state is cleared, and so sits +between the STORE to indicate the event and the STORE to set TASK_RUNNING: + + CPU 1 CPU 2 + =============================== =============================== + set_current_state(); STORE event_indicated + set_mb(); wake_up(); + STORE current->state + STORE current->state + LOAD event_indicated + +The available waker functions include: + + complete(); + wake_up(); + wake_up_all(); + wake_up_bit(); + wake_up_interruptible(); + wake_up_interruptible_all(); + wake_up_interruptible_nr(); + wake_up_interruptible_poll(); + wake_up_interruptible_sync(); + wake_up_interruptible_sync_poll(); + wake_up_locked(); + wake_up_locked_poll(); + wake_up_nr(); + wake_up_poll(); + wake_up_process(); + + +[!] Note that the memory barriers implied by the sleeper and the waker do _not_ +order multiple stores before the wake-up with respect to loads of those stored +values after the sleeper has called set_current_state(). For instance, if the +sleeper does: + + set_current_state(TASK_INTERRUPTIBLE); + if (event_indicated) + break; + __set_current_state(TASK_RUNNING); + do_something(my_data); + +and the waker does: + + my_data = value; + event_indicated = 1; + wake_up(&event_wait_queue); + +there's no guarantee that the change to event_indicated will be perceived by +the sleeper as coming after the change to my_data. In such a circumstance, the +code on both sides must interpolate its own memory barriers between the +separate data accesses. Thus the above sleeper ought to do: + + set_current_state(TASK_INTERRUPTIBLE); + if (event_indicated) { + smp_rmb(); + do_something(my_data); + } + +and the waker should do: + + my_data = value; + smp_wmb(); + event_indicated = 1; + wake_up(&event_wait_queue); + + MISCELLANEOUS FUNCTIONS ----------------------- @@ -1366,7 +1493,7 @@ WHERE ARE MEMORY BARRIERS NEEDED? Under normal operation, memory operation reordering is generally not going to be a problem as a single-threaded linear piece of code will still appear to -work correctly, even if it's in an SMP kernel. There are, however, three +work correctly, even if it's in an SMP kernel. There are, however, four circumstances in which reordering definitely _could_ be a problem: (*) Interprocessor interaction. diff --git a/kernel/sched.c b/kernel/sched.c index b902e58..fd0c2ce 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2458,6 +2458,17 @@ out: return success; } +/** + * wake_up_process - Wake up a specific process + * @p: The process to be woken up. + * + * Attempt to wake up the nominated process and move it to the set of runnable + * processes. Returns 1 if the process was woken up, 0 if it was already + * running. + * + * It may be assumed that this function implies a write memory barrier before + * changing the task state if and only if any tasks are woken up. + */ int wake_up_process(struct task_struct *p) { return try_to_wake_up(p, TASK_ALL, 0); @@ -5241,6 +5252,9 @@ void __wake_up_common(wait_queue_head_t *q, unsigned int mode, * @mode: which threads * @nr_exclusive: how many wake-one or wake-many threads to wake up * @key: is directly passed to the wakeup function + * + * It may be assumed that this function implies a write memory barrier before + * changing the task state if and only if any tasks are woken up. */ void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr_exclusive, void *key) @@ -5279,6 +5293,9 @@ void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key) * with each other. This can prevent needless bouncing between CPUs. * * On UP it can prevent extra preemption. + * + * It may be assumed that this function implies a write memory barrier before + * changing the task state if and only if any tasks are woken up. */ void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr_exclusive, void *key) @@ -5315,6 +5332,9 @@ EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */ * awakened in the same order in which they were queued. * * See also complete_all(), wait_for_completion() and related routines. + * + * It may be assumed that this function implies a write memory barrier before + * changing the task state if and only if any tasks are woken up. */ void complete(struct completion *x) { @@ -5332,6 +5352,9 @@ EXPORT_SYMBOL(complete); * @x: holds the state of this particular completion * * This will wake up all threads waiting on this particular completion event. + * + * It may be assumed that this function implies a write memory barrier before + * changing the task state if and only if any tasks are woken up. */ void complete_all(struct completion *x) { -- cgit v0.10.2 From da1a776be1ac7f78bb30ececbec4c1383163b079 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 29 Apr 2009 12:46:58 +0200 Subject: perf_counter, x86: remove X86_FEATURE_ARCH_PERFMON flag for AMD cpus X86_FEATURE_ARCH_PERFMON is an Intel hardware feature that does not work on AMD CPUs. The flag is now only used in Intel specific code (especially initialization). [ Impact: refactor code ] Signed-off-by: Robert Richter Acked-by: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1241002046-8832-2-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index fd69c51..7e4a459 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -420,10 +420,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) if (c->x86 >= 6) set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); - /* Enable Performance counter for K7 and later */ - if (c->x86 > 6 && c->x86 <= 0x11) - set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); - if (!c->x86_model_id[0]) { switch (c->x86) { case 0xf: diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 0fcbaab..7d0f81d 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -949,6 +949,9 @@ static struct pmc_x86_ops *pmc_intel_init(void) unsigned int unused; unsigned int ebx; + if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) + return NULL; + /* * Check whether the Architectural PerfMon supports * Branch Misses Retired Event or not. @@ -987,9 +990,6 @@ static struct pmc_x86_ops *pmc_amd_init(void) void __init init_hw_perf_counters(void) { - if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) - return; - switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_INTEL: pmc_ops = pmc_intel_init(); -- cgit v0.10.2 From 829b42dd395c5801f6ae87da87ecbdcfd5ef1a6c Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 29 Apr 2009 12:46:59 +0200 Subject: perf_counter, x86: declare perf_max_counters only for CONFIG_PERF_COUNTERS This is only needed for CONFIG_PERF_COUNTERS enabled. [ Impact: cleanup ] Signed-off-by: Robert Richter Acked-by: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1241002046-8832-3-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 9814328..be10b3f 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -512,12 +512,13 @@ struct perf_cpu_context { int recursion[4]; }; +#ifdef CONFIG_PERF_COUNTERS + /* * Set by architecture code: */ extern int perf_max_counters; -#ifdef CONFIG_PERF_COUNTERS extern const struct hw_perf_counter_ops * hw_perf_counter_init(struct perf_counter *counter); -- cgit v0.10.2 From 4138960a9251a265002b5cf07e671a49f8495381 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 29 Apr 2009 12:47:00 +0200 Subject: perf_counter, x86: add default path to cpu detection This quits hw counter initialization immediately if no cpu is detected. [ Impact: cleanup ] Signed-off-by: Robert Richter Cc: Paul Mackerras Acked-by: Peter Zijlstra LKML-Reference: <1241002046-8832-4-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 7d0f81d..d6d6529 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -997,6 +997,8 @@ void __init init_hw_perf_counters(void) case X86_VENDOR_AMD: pmc_ops = pmc_amd_init(); break; + default: + return; } if (!pmc_ops) return; -- cgit v0.10.2 From 4295ee62660b13ddb87d41539f49b239e6e7d56f Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 29 Apr 2009 12:47:01 +0200 Subject: perf_counter, x86: rework pmc_amd_save_disable_all() and pmc_amd_restore_all() MSR reads and writes are expensive. This patch adds checks to avoid its usage where possible. [ Impact: micro-optimization on AMD CPUs ] Signed-off-by: Robert Richter Cc: Paul Mackerras Acked-by: Peter Zijlstra LKML-Reference: <1241002046-8832-5-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index d6d6529..75a0903 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -334,11 +334,13 @@ static u64 pmc_amd_save_disable_all(void) for (idx = 0; idx < nr_counters_generic; idx++) { u64 val; + if (!test_bit(idx, cpuc->active_mask)) + continue; rdmsrl(MSR_K7_EVNTSEL0 + idx, val); - if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) { - val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; - wrmsrl(MSR_K7_EVNTSEL0 + idx, val); - } + if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE)) + continue; + val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsrl(MSR_K7_EVNTSEL0 + idx, val); } return enabled; @@ -372,13 +374,15 @@ static void pmc_amd_restore_all(u64 ctrl) return; for (idx = 0; idx < nr_counters_generic; idx++) { - if (test_bit(idx, cpuc->active_mask)) { - u64 val; + u64 val; - rdmsrl(MSR_K7_EVNTSEL0 + idx, val); - val |= ARCH_PERFMON_EVENTSEL0_ENABLE; - wrmsrl(MSR_K7_EVNTSEL0 + idx, val); - } + if (!test_bit(idx, cpuc->active_mask)) + continue; + rdmsrl(MSR_K7_EVNTSEL0 + idx, val); + if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) + continue; + val |= ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsrl(MSR_K7_EVNTSEL0 + idx, val); } } -- cgit v0.10.2 From 527e26af3741a2168986d8b82653ffe173891324 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 29 Apr 2009 12:47:02 +0200 Subject: perf_counter, x86: protect per-cpu variables with compile barriers only Per-cpu variables needn't to be protected with cpu barriers (smp_wmb()). Protection is only needed for preemption on the same cpu (rescheduling or the nmi handler). This can be done using a compiler barrier only. [ Impact: micro-optimization ] Signed-off-by: Robert Richter Cc: Paul Mackerras Acked-by: Peter Zijlstra LKML-Reference: <1241002046-8832-6-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 75a0903..ad663d5 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -673,7 +673,7 @@ try_generic: /* * Make it visible before enabling the hw: */ - smp_wmb(); + barrier(); __hw_perf_counter_set_period(counter, hwc, idx); __pmc_generic_enable(counter, hwc, idx); @@ -745,7 +745,7 @@ static void pmc_generic_disable(struct perf_counter *counter) * Make sure the cleared pointer becomes visible before we * (potentially) free the counter: */ - smp_wmb(); + barrier(); /* * Drain the remaining delta count out of a counter -- cgit v0.10.2 From 4aeb0b4239bb3b67ed402cb9cef3e000c892cadf Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 29 Apr 2009 12:47:03 +0200 Subject: perfcounters: rename struct hw_perf_counter_ops into struct pmu This patch renames struct hw_perf_counter_ops into struct pmu. It introduces a structure to describe a cpu specific pmu (performance monitoring unit). It may contain ops and data. The new name of the structure fits better, is shorter, and thus better to handle. Where it was appropriate, names of function and variable have been changed too. [ Impact: cleanup ] Signed-off-by: Robert Richter Cc: Paul Mackerras Acked-by: Peter Zijlstra LKML-Reference: <1241002046-8832-7-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index bd76d0f..d9bbe5e 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -256,7 +256,7 @@ static int check_excludes(struct perf_counter **ctrs, int n_prev, int n_new) return 0; } -static void power_perf_read(struct perf_counter *counter) +static void power_pmu_read(struct perf_counter *counter) { long val, delta, prev; @@ -405,7 +405,7 @@ void hw_perf_restore(u64 disable) for (i = 0; i < cpuhw->n_counters; ++i) { counter = cpuhw->counter[i]; if (counter->hw.idx && counter->hw.idx != hwc_index[i] + 1) { - power_perf_read(counter); + power_pmu_read(counter); write_pmc(counter->hw.idx, 0); counter->hw.idx = 0; } @@ -477,7 +477,7 @@ static void counter_sched_in(struct perf_counter *counter, int cpu) counter->oncpu = cpu; counter->tstamp_running += counter->ctx->time - counter->tstamp_stopped; if (is_software_counter(counter)) - counter->hw_ops->enable(counter); + counter->pmu->enable(counter); } /* @@ -533,7 +533,7 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader, * re-enable the PMU in order to get hw_perf_restore to do the * actual work of reconfiguring the PMU. */ -static int power_perf_enable(struct perf_counter *counter) +static int power_pmu_enable(struct perf_counter *counter) { struct cpu_hw_counters *cpuhw; unsigned long flags; @@ -573,7 +573,7 @@ static int power_perf_enable(struct perf_counter *counter) /* * Remove a counter from the PMU. */ -static void power_perf_disable(struct perf_counter *counter) +static void power_pmu_disable(struct perf_counter *counter) { struct cpu_hw_counters *cpuhw; long i; @@ -583,7 +583,7 @@ static void power_perf_disable(struct perf_counter *counter) local_irq_save(flags); pmudis = hw_perf_save_disable(); - power_perf_read(counter); + power_pmu_read(counter); cpuhw = &__get_cpu_var(cpu_hw_counters); for (i = 0; i < cpuhw->n_counters; ++i) { @@ -607,10 +607,10 @@ static void power_perf_disable(struct perf_counter *counter) local_irq_restore(flags); } -struct hw_perf_counter_ops power_perf_ops = { - .enable = power_perf_enable, - .disable = power_perf_disable, - .read = power_perf_read +struct pmu power_pmu = { + .enable = power_pmu_enable, + .disable = power_pmu_disable, + .read = power_pmu_read, }; /* Number of perf_counters counting hardware events */ @@ -631,8 +631,7 @@ static void hw_perf_counter_destroy(struct perf_counter *counter) } } -const struct hw_perf_counter_ops * -hw_perf_counter_init(struct perf_counter *counter) +const struct pmu *hw_perf_counter_init(struct perf_counter *counter) { unsigned long ev; struct perf_counter *ctrs[MAX_HWCOUNTERS]; @@ -705,7 +704,7 @@ hw_perf_counter_init(struct perf_counter *counter) if (err) return ERR_PTR(err); - return &power_perf_ops; + return &power_pmu; } /* diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index ad663d5..95de980 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -515,8 +515,8 @@ __pmc_fixed_disable(struct perf_counter *counter, } static inline void -__pmc_generic_disable(struct perf_counter *counter, - struct hw_perf_counter *hwc, unsigned int idx) +__x86_pmu_disable(struct perf_counter *counter, + struct hw_perf_counter *hwc, unsigned int idx) { if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) __pmc_fixed_disable(counter, hwc, idx); @@ -591,8 +591,8 @@ __pmc_fixed_enable(struct perf_counter *counter, } static void -__pmc_generic_enable(struct perf_counter *counter, - struct hw_perf_counter *hwc, int idx) +__x86_pmu_enable(struct perf_counter *counter, + struct hw_perf_counter *hwc, int idx) { if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) __pmc_fixed_enable(counter, hwc, idx); @@ -626,7 +626,7 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc) /* * Find a PMC slot for the freshly enabled / scheduled in counter: */ -static int pmc_generic_enable(struct perf_counter *counter) +static int x86_pmu_enable(struct perf_counter *counter) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); struct hw_perf_counter *hwc = &counter->hw; @@ -667,7 +667,7 @@ try_generic: perf_counters_lapic_init(hwc->nmi); - __pmc_generic_disable(counter, hwc, idx); + __x86_pmu_disable(counter, hwc, idx); cpuc->counters[idx] = counter; /* @@ -676,7 +676,7 @@ try_generic: barrier(); __hw_perf_counter_set_period(counter, hwc, idx); - __pmc_generic_enable(counter, hwc, idx); + __x86_pmu_enable(counter, hwc, idx); return 0; } @@ -731,13 +731,13 @@ void perf_counter_print_debug(void) local_irq_enable(); } -static void pmc_generic_disable(struct perf_counter *counter) +static void x86_pmu_disable(struct perf_counter *counter) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); struct hw_perf_counter *hwc = &counter->hw; unsigned int idx = hwc->idx; - __pmc_generic_disable(counter, hwc, idx); + __x86_pmu_disable(counter, hwc, idx); clear_bit(idx, cpuc->used); cpuc->counters[idx] = NULL; @@ -767,7 +767,7 @@ static void perf_save_and_restart(struct perf_counter *counter) __hw_perf_counter_set_period(counter, hwc, idx); if (counter->state == PERF_COUNTER_STATE_ACTIVE) - __pmc_generic_enable(counter, hwc, idx); + __x86_pmu_enable(counter, hwc, idx); } /* @@ -805,7 +805,7 @@ again: perf_save_and_restart(counter); if (perf_counter_overflow(counter, nmi, regs, 0)) - __pmc_generic_disable(counter, &counter->hw, bit); + __x86_pmu_disable(counter, &counter->hw, bit); } hw_perf_ack_status(ack); @@ -1034,19 +1034,18 @@ void __init init_hw_perf_counters(void) register_die_notifier(&perf_counter_nmi_notifier); } -static void pmc_generic_read(struct perf_counter *counter) +static void x86_pmu_read(struct perf_counter *counter) { x86_perf_counter_update(counter, &counter->hw, counter->hw.idx); } -static const struct hw_perf_counter_ops x86_perf_counter_ops = { - .enable = pmc_generic_enable, - .disable = pmc_generic_disable, - .read = pmc_generic_read, +static const struct pmu pmu = { + .enable = x86_pmu_enable, + .disable = x86_pmu_disable, + .read = x86_pmu_read, }; -const struct hw_perf_counter_ops * -hw_perf_counter_init(struct perf_counter *counter) +const struct pmu *hw_perf_counter_init(struct perf_counter *counter) { int err; @@ -1054,7 +1053,7 @@ hw_perf_counter_init(struct perf_counter *counter) if (err) return ERR_PTR(err); - return &x86_perf_counter_ops; + return &pmu; } /* diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index be10b3f..c3db52d 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -334,9 +334,9 @@ struct hw_perf_counter { struct perf_counter; /** - * struct hw_perf_counter_ops - performance counter hw ops + * struct pmu - generic performance monitoring unit */ -struct hw_perf_counter_ops { +struct pmu { int (*enable) (struct perf_counter *counter); void (*disable) (struct perf_counter *counter); void (*read) (struct perf_counter *counter); @@ -381,7 +381,7 @@ struct perf_counter { struct list_head sibling_list; int nr_siblings; struct perf_counter *group_leader; - const struct hw_perf_counter_ops *hw_ops; + const struct pmu *pmu; enum perf_counter_active_state state; enum perf_counter_active_state prev_state; @@ -519,8 +519,7 @@ struct perf_cpu_context { */ extern int perf_max_counters; -extern const struct hw_perf_counter_ops * -hw_perf_counter_init(struct perf_counter *counter); +extern const struct pmu *hw_perf_counter_init(struct perf_counter *counter); extern void perf_counter_task_sched_in(struct task_struct *task, int cpu); extern void perf_counter_task_sched_out(struct task_struct *task, int cpu); diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 0939609..582108a 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -52,8 +52,7 @@ static DEFINE_MUTEX(perf_resource_mutex); /* * Architecture provided APIs - weak aliases: */ -extern __weak const struct hw_perf_counter_ops * -hw_perf_counter_init(struct perf_counter *counter) +extern __weak const struct pmu *hw_perf_counter_init(struct perf_counter *counter) { return NULL; } @@ -124,7 +123,7 @@ counter_sched_out(struct perf_counter *counter, counter->state = PERF_COUNTER_STATE_INACTIVE; counter->tstamp_stopped = ctx->time; - counter->hw_ops->disable(counter); + counter->pmu->disable(counter); counter->oncpu = -1; if (!is_software_counter(counter)) @@ -417,7 +416,7 @@ counter_sched_in(struct perf_counter *counter, */ smp_wmb(); - if (counter->hw_ops->enable(counter)) { + if (counter->pmu->enable(counter)) { counter->state = PERF_COUNTER_STATE_INACTIVE; counter->oncpu = -1; return -EAGAIN; @@ -1096,7 +1095,7 @@ static void __read(void *info) local_irq_save(flags); if (ctx->is_active) update_context_time(ctx); - counter->hw_ops->read(counter); + counter->pmu->read(counter); update_counter_times(counter); local_irq_restore(flags); } @@ -1922,7 +1921,7 @@ static void perf_counter_output(struct perf_counter *counter, leader = counter->group_leader; list_for_each_entry(sub, &leader->sibling_list, list_entry) { if (sub != counter) - sub->hw_ops->read(sub); + sub->pmu->read(sub); group_entry.event = sub->hw_event.config; group_entry.counter = atomic64_read(&sub->count); @@ -2264,7 +2263,7 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) struct pt_regs *regs; counter = container_of(hrtimer, struct perf_counter, hw.hrtimer); - counter->hw_ops->read(counter); + counter->pmu->read(counter); regs = get_irq_regs(); /* @@ -2410,7 +2409,7 @@ static void perf_swcounter_disable(struct perf_counter *counter) perf_swcounter_update(counter); } -static const struct hw_perf_counter_ops perf_ops_generic = { +static const struct pmu perf_ops_generic = { .enable = perf_swcounter_enable, .disable = perf_swcounter_disable, .read = perf_swcounter_read, @@ -2460,7 +2459,7 @@ static void cpu_clock_perf_counter_read(struct perf_counter *counter) cpu_clock_perf_counter_update(counter); } -static const struct hw_perf_counter_ops perf_ops_cpu_clock = { +static const struct pmu perf_ops_cpu_clock = { .enable = cpu_clock_perf_counter_enable, .disable = cpu_clock_perf_counter_disable, .read = cpu_clock_perf_counter_read, @@ -2522,7 +2521,7 @@ static void task_clock_perf_counter_read(struct perf_counter *counter) task_clock_perf_counter_update(counter, time); } -static const struct hw_perf_counter_ops perf_ops_task_clock = { +static const struct pmu perf_ops_task_clock = { .enable = task_clock_perf_counter_enable, .disable = task_clock_perf_counter_disable, .read = task_clock_perf_counter_read, @@ -2574,7 +2573,7 @@ static void cpu_migrations_perf_counter_disable(struct perf_counter *counter) cpu_migrations_perf_counter_update(counter); } -static const struct hw_perf_counter_ops perf_ops_cpu_migrations = { +static const struct pmu perf_ops_cpu_migrations = { .enable = cpu_migrations_perf_counter_enable, .disable = cpu_migrations_perf_counter_disable, .read = cpu_migrations_perf_counter_read, @@ -2600,8 +2599,7 @@ static void tp_perf_counter_destroy(struct perf_counter *counter) ftrace_profile_disable(perf_event_id(&counter->hw_event)); } -static const struct hw_perf_counter_ops * -tp_perf_counter_init(struct perf_counter *counter) +static const struct pmu *tp_perf_counter_init(struct perf_counter *counter) { int event_id = perf_event_id(&counter->hw_event); int ret; @@ -2616,18 +2614,16 @@ tp_perf_counter_init(struct perf_counter *counter) return &perf_ops_generic; } #else -static const struct hw_perf_counter_ops * -tp_perf_counter_init(struct perf_counter *counter) +static const struct pmu *tp_perf_counter_init(struct perf_counter *counter) { return NULL; } #endif -static const struct hw_perf_counter_ops * -sw_perf_counter_init(struct perf_counter *counter) +static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) { struct perf_counter_hw_event *hw_event = &counter->hw_event; - const struct hw_perf_counter_ops *hw_ops = NULL; + const struct pmu *pmu = NULL; struct hw_perf_counter *hwc = &counter->hw; /* @@ -2639,7 +2635,7 @@ sw_perf_counter_init(struct perf_counter *counter) */ switch (perf_event_id(&counter->hw_event)) { case PERF_COUNT_CPU_CLOCK: - hw_ops = &perf_ops_cpu_clock; + pmu = &perf_ops_cpu_clock; if (hw_event->irq_period && hw_event->irq_period < 10000) hw_event->irq_period = 10000; @@ -2650,9 +2646,9 @@ sw_perf_counter_init(struct perf_counter *counter) * use the cpu_clock counter instead. */ if (counter->ctx->task) - hw_ops = &perf_ops_task_clock; + pmu = &perf_ops_task_clock; else - hw_ops = &perf_ops_cpu_clock; + pmu = &perf_ops_cpu_clock; if (hw_event->irq_period && hw_event->irq_period < 10000) hw_event->irq_period = 10000; @@ -2661,18 +2657,18 @@ sw_perf_counter_init(struct perf_counter *counter) case PERF_COUNT_PAGE_FAULTS_MIN: case PERF_COUNT_PAGE_FAULTS_MAJ: case PERF_COUNT_CONTEXT_SWITCHES: - hw_ops = &perf_ops_generic; + pmu = &perf_ops_generic; break; case PERF_COUNT_CPU_MIGRATIONS: if (!counter->hw_event.exclude_kernel) - hw_ops = &perf_ops_cpu_migrations; + pmu = &perf_ops_cpu_migrations; break; } - if (hw_ops) + if (pmu) hwc->irq_period = hw_event->irq_period; - return hw_ops; + return pmu; } /* @@ -2685,7 +2681,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, struct perf_counter *group_leader, gfp_t gfpflags) { - const struct hw_perf_counter_ops *hw_ops; + const struct pmu *pmu; struct perf_counter *counter; long err; @@ -2713,46 +2709,46 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event, counter->cpu = cpu; counter->hw_event = *hw_event; counter->group_leader = group_leader; - counter->hw_ops = NULL; + counter->pmu = NULL; counter->ctx = ctx; counter->state = PERF_COUNTER_STATE_INACTIVE; if (hw_event->disabled) counter->state = PERF_COUNTER_STATE_OFF; - hw_ops = NULL; + pmu = NULL; if (perf_event_raw(hw_event)) { - hw_ops = hw_perf_counter_init(counter); + pmu = hw_perf_counter_init(counter); goto done; } switch (perf_event_type(hw_event)) { case PERF_TYPE_HARDWARE: - hw_ops = hw_perf_counter_init(counter); + pmu = hw_perf_counter_init(counter); break; case PERF_TYPE_SOFTWARE: - hw_ops = sw_perf_counter_init(counter); + pmu = sw_perf_counter_init(counter); break; case PERF_TYPE_TRACEPOINT: - hw_ops = tp_perf_counter_init(counter); + pmu = tp_perf_counter_init(counter); break; } done: err = 0; - if (!hw_ops) + if (!pmu) err = -EINVAL; - else if (IS_ERR(hw_ops)) - err = PTR_ERR(hw_ops); + else if (IS_ERR(pmu)) + err = PTR_ERR(pmu); if (err) { kfree(counter); return ERR_PTR(err); } - counter->hw_ops = hw_ops; + counter->pmu = pmu; if (counter->hw_event.mmap) atomic_inc(&nr_mmap_tracking); -- cgit v0.10.2 From 5f4ec28ffe77c840354cce1820a3436106e9e0f1 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 29 Apr 2009 12:47:04 +0200 Subject: perf_counter, x86: rename struct pmc_x86_ops into struct x86_pmu This patch renames struct pmc_x86_ops into struct x86_pmu. It introduces a structure to describe an x86 model specific pmu (performance monitoring unit). It may contain ops and data. The new name of the structure fits better, is shorter, and thus better to handle. Where it was appropriate, names of function and variable have been changed too. [ Impact: cleanup ] Signed-off-by: Robert Richter Cc: Paul Mackerras Acked-by: Peter Zijlstra LKML-Reference: <1241002046-8832-8-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 95de980..808a1a1 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -44,9 +44,9 @@ struct cpu_hw_counters { }; /* - * struct pmc_x86_ops - performance counter x86 ops + * struct x86_pmu - generic x86 pmu */ -struct pmc_x86_ops { +struct x86_pmu { u64 (*save_disable_all)(void); void (*restore_all)(u64); u64 (*get_status)(u64); @@ -60,7 +60,7 @@ struct pmc_x86_ops { int max_events; }; -static struct pmc_x86_ops *pmc_ops __read_mostly; +static struct x86_pmu *x86_pmu __read_mostly; static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = { .enabled = 1, @@ -82,12 +82,12 @@ static const u64 intel_perfmon_event_map[] = [PERF_COUNT_BUS_CYCLES] = 0x013c, }; -static u64 pmc_intel_event_map(int event) +static u64 intel_pmu_event_map(int event) { return intel_perfmon_event_map[event]; } -static u64 pmc_intel_raw_event(u64 event) +static u64 intel_pmu_raw_event(u64 event) { #define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL @@ -114,12 +114,12 @@ static const u64 amd_perfmon_event_map[] = [PERF_COUNT_BRANCH_MISSES] = 0x00c5, }; -static u64 pmc_amd_event_map(int event) +static u64 amd_pmu_event_map(int event) { return amd_perfmon_event_map[event]; } -static u64 pmc_amd_raw_event(u64 event) +static u64 amd_pmu_raw_event(u64 event) { #define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL #define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL @@ -184,12 +184,12 @@ static bool reserve_pmc_hardware(void) disable_lapic_nmi_watchdog(); for (i = 0; i < nr_counters_generic; i++) { - if (!reserve_perfctr_nmi(pmc_ops->perfctr + i)) + if (!reserve_perfctr_nmi(x86_pmu->perfctr + i)) goto perfctr_fail; } for (i = 0; i < nr_counters_generic; i++) { - if (!reserve_evntsel_nmi(pmc_ops->eventsel + i)) + if (!reserve_evntsel_nmi(x86_pmu->eventsel + i)) goto eventsel_fail; } @@ -197,13 +197,13 @@ static bool reserve_pmc_hardware(void) eventsel_fail: for (i--; i >= 0; i--) - release_evntsel_nmi(pmc_ops->eventsel + i); + release_evntsel_nmi(x86_pmu->eventsel + i); i = nr_counters_generic; perfctr_fail: for (i--; i >= 0; i--) - release_perfctr_nmi(pmc_ops->perfctr + i); + release_perfctr_nmi(x86_pmu->perfctr + i); if (nmi_watchdog == NMI_LOCAL_APIC) enable_lapic_nmi_watchdog(); @@ -216,8 +216,8 @@ static void release_pmc_hardware(void) int i; for (i = 0; i < nr_counters_generic; i++) { - release_perfctr_nmi(pmc_ops->perfctr + i); - release_evntsel_nmi(pmc_ops->eventsel + i); + release_perfctr_nmi(x86_pmu->perfctr + i); + release_evntsel_nmi(x86_pmu->eventsel + i); } if (nmi_watchdog == NMI_LOCAL_APIC) @@ -293,14 +293,14 @@ static int __hw_perf_counter_init(struct perf_counter *counter) * Raw event type provide the config in the event structure */ if (perf_event_raw(hw_event)) { - hwc->config |= pmc_ops->raw_event(perf_event_config(hw_event)); + hwc->config |= x86_pmu->raw_event(perf_event_config(hw_event)); } else { - if (perf_event_id(hw_event) >= pmc_ops->max_events) + if (perf_event_id(hw_event) >= x86_pmu->max_events) return -EINVAL; /* * The generic map: */ - hwc->config |= pmc_ops->event_map(perf_event_id(hw_event)); + hwc->config |= x86_pmu->event_map(perf_event_id(hw_event)); } counter->destroy = hw_perf_counter_destroy; @@ -308,7 +308,7 @@ static int __hw_perf_counter_init(struct perf_counter *counter) return 0; } -static u64 pmc_intel_save_disable_all(void) +static u64 intel_pmu_save_disable_all(void) { u64 ctrl; @@ -318,7 +318,7 @@ static u64 pmc_intel_save_disable_all(void) return ctrl; } -static u64 pmc_amd_save_disable_all(void) +static u64 amd_pmu_save_disable_all(void) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); int enabled, idx; @@ -327,7 +327,8 @@ static u64 pmc_amd_save_disable_all(void) cpuc->enabled = 0; /* * ensure we write the disable before we start disabling the - * counters proper, so that pcm_amd_enable() does the right thing. + * counters proper, so that amd_pmu_enable_counter() does the + * right thing. */ barrier(); @@ -351,19 +352,19 @@ u64 hw_perf_save_disable(void) if (unlikely(!perf_counters_initialized)) return 0; - return pmc_ops->save_disable_all(); + return x86_pmu->save_disable_all(); } /* * Exported because of ACPI idle */ EXPORT_SYMBOL_GPL(hw_perf_save_disable); -static void pmc_intel_restore_all(u64 ctrl) +static void intel_pmu_restore_all(u64 ctrl) { wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); } -static void pmc_amd_restore_all(u64 ctrl) +static void amd_pmu_restore_all(u64 ctrl) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); int idx; @@ -391,14 +392,14 @@ void hw_perf_restore(u64 ctrl) if (unlikely(!perf_counters_initialized)) return; - pmc_ops->restore_all(ctrl); + x86_pmu->restore_all(ctrl); } /* * Exported because of ACPI idle */ EXPORT_SYMBOL_GPL(hw_perf_restore); -static u64 pmc_intel_get_status(u64 mask) +static u64 intel_pmu_get_status(u64 mask) { u64 status; @@ -407,7 +408,7 @@ static u64 pmc_intel_get_status(u64 mask) return status; } -static u64 pmc_amd_get_status(u64 mask) +static u64 amd_pmu_get_status(u64 mask) { u64 status = 0; int idx; @@ -432,15 +433,15 @@ static u64 hw_perf_get_status(u64 mask) if (unlikely(!perf_counters_initialized)) return 0; - return pmc_ops->get_status(mask); + return x86_pmu->get_status(mask); } -static void pmc_intel_ack_status(u64 ack) +static void intel_pmu_ack_status(u64 ack) { wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); } -static void pmc_amd_ack_status(u64 ack) +static void amd_pmu_ack_status(u64 ack) { } @@ -449,16 +450,16 @@ static void hw_perf_ack_status(u64 ack) if (unlikely(!perf_counters_initialized)) return; - pmc_ops->ack_status(ack); + x86_pmu->ack_status(ack); } -static void pmc_intel_enable(int idx, u64 config) +static void intel_pmu_enable_counter(int idx, u64 config) { wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, config | ARCH_PERFMON_EVENTSEL0_ENABLE); } -static void pmc_amd_enable(int idx, u64 config) +static void amd_pmu_enable_counter(int idx, u64 config) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); @@ -474,15 +475,15 @@ static void hw_perf_enable(int idx, u64 config) if (unlikely(!perf_counters_initialized)) return; - pmc_ops->enable(idx, config); + x86_pmu->enable(idx, config); } -static void pmc_intel_disable(int idx, u64 config) +static void intel_pmu_disable_counter(int idx, u64 config) { wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, config); } -static void pmc_amd_disable(int idx, u64 config) +static void amd_pmu_disable_counter(int idx, u64 config) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); @@ -496,7 +497,7 @@ static void hw_perf_disable(int idx, u64 config) if (unlikely(!perf_counters_initialized)) return; - pmc_ops->disable(idx, config); + x86_pmu->disable(idx, config); } static inline void @@ -613,11 +614,11 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc) event = hwc->config & ARCH_PERFMON_EVENT_MASK; - if (unlikely(event == pmc_ops->event_map(PERF_COUNT_INSTRUCTIONS))) + if (unlikely(event == x86_pmu->event_map(PERF_COUNT_INSTRUCTIONS))) return X86_PMC_IDX_FIXED_INSTRUCTIONS; - if (unlikely(event == pmc_ops->event_map(PERF_COUNT_CPU_CYCLES))) + if (unlikely(event == x86_pmu->event_map(PERF_COUNT_CPU_CYCLES))) return X86_PMC_IDX_FIXED_CPU_CYCLES; - if (unlikely(event == pmc_ops->event_map(PERF_COUNT_BUS_CYCLES))) + if (unlikely(event == x86_pmu->event_map(PERF_COUNT_BUS_CYCLES))) return X86_PMC_IDX_FIXED_BUS_CYCLES; return -1; @@ -661,8 +662,8 @@ try_generic: set_bit(idx, cpuc->used); hwc->idx = idx; } - hwc->config_base = pmc_ops->eventsel; - hwc->counter_base = pmc_ops->perfctr; + hwc->config_base = x86_pmu->eventsel; + hwc->counter_base = x86_pmu->perfctr; } perf_counters_lapic_init(hwc->nmi); @@ -710,8 +711,8 @@ void perf_counter_print_debug(void) pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used); for (idx = 0; idx < nr_counters_generic; idx++) { - rdmsrl(pmc_ops->eventsel + idx, pmc_ctrl); - rdmsrl(pmc_ops->perfctr + idx, pmc_count); + rdmsrl(x86_pmu->eventsel + idx, pmc_ctrl); + rdmsrl(x86_pmu->perfctr + idx, pmc_count); prev_left = per_cpu(prev_left[idx], cpu); @@ -918,35 +919,35 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = { .priority = 1 }; -static struct pmc_x86_ops pmc_intel_ops = { - .save_disable_all = pmc_intel_save_disable_all, - .restore_all = pmc_intel_restore_all, - .get_status = pmc_intel_get_status, - .ack_status = pmc_intel_ack_status, - .enable = pmc_intel_enable, - .disable = pmc_intel_disable, +static struct x86_pmu intel_pmu = { + .save_disable_all = intel_pmu_save_disable_all, + .restore_all = intel_pmu_restore_all, + .get_status = intel_pmu_get_status, + .ack_status = intel_pmu_ack_status, + .enable = intel_pmu_enable_counter, + .disable = intel_pmu_disable_counter, .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, .perfctr = MSR_ARCH_PERFMON_PERFCTR0, - .event_map = pmc_intel_event_map, - .raw_event = pmc_intel_raw_event, + .event_map = intel_pmu_event_map, + .raw_event = intel_pmu_raw_event, .max_events = ARRAY_SIZE(intel_perfmon_event_map), }; -static struct pmc_x86_ops pmc_amd_ops = { - .save_disable_all = pmc_amd_save_disable_all, - .restore_all = pmc_amd_restore_all, - .get_status = pmc_amd_get_status, - .ack_status = pmc_amd_ack_status, - .enable = pmc_amd_enable, - .disable = pmc_amd_disable, +static struct x86_pmu amd_pmu = { + .save_disable_all = amd_pmu_save_disable_all, + .restore_all = amd_pmu_restore_all, + .get_status = amd_pmu_get_status, + .ack_status = amd_pmu_ack_status, + .enable = amd_pmu_enable_counter, + .disable = amd_pmu_disable_counter, .eventsel = MSR_K7_EVNTSEL0, .perfctr = MSR_K7_PERFCTR0, - .event_map = pmc_amd_event_map, - .raw_event = pmc_amd_raw_event, + .event_map = amd_pmu_event_map, + .raw_event = amd_pmu_raw_event, .max_events = ARRAY_SIZE(amd_perfmon_event_map), }; -static struct pmc_x86_ops *pmc_intel_init(void) +static struct x86_pmu *intel_pmu_init(void) { union cpuid10_edx edx; union cpuid10_eax eax; @@ -977,10 +978,10 @@ static struct pmc_x86_ops *pmc_intel_init(void) nr_counters_fixed = edx.split.num_counters_fixed; counter_value_mask = (1ULL << eax.split.bit_width) - 1; - return &pmc_intel_ops; + return &intel_pmu; } -static struct pmc_x86_ops *pmc_amd_init(void) +static struct x86_pmu *amd_pmu_init(void) { nr_counters_generic = 4; nr_counters_fixed = 0; @@ -989,22 +990,22 @@ static struct pmc_x86_ops *pmc_amd_init(void) pr_info("AMD Performance Monitoring support detected.\n"); - return &pmc_amd_ops; + return &amd_pmu; } void __init init_hw_perf_counters(void) { switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_INTEL: - pmc_ops = pmc_intel_init(); + x86_pmu = intel_pmu_init(); break; case X86_VENDOR_AMD: - pmc_ops = pmc_amd_init(); + x86_pmu = amd_pmu_init(); break; default: return; } - if (!pmc_ops) + if (!x86_pmu) return; pr_info("... num counters: %d\n", nr_counters_generic); -- cgit v0.10.2 From 39d81eab2374d71b2d9c82f66258a1a4f57ddd2e Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 29 Apr 2009 12:47:05 +0200 Subject: perf_counter, x86: make interrupt handler model specific This separates the perfcounter interrupt handler for AMD and Intel cpus. The AMD interrupt handler implementation is a follow-on patch. [ Impact: refactor and clean up code ] Signed-off-by: Robert Richter Cc: Paul Mackerras Acked-by: Peter Zijlstra LKML-Reference: <1241002046-8832-9-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 808a1a1..9d90de0 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -4,6 +4,7 @@ * Copyright(C) 2008 Thomas Gleixner * Copyright(C) 2008 Red Hat, Inc., Ingo Molnar * Copyright(C) 2009 Jaswinder Singh Rajput + * Copyright(C) 2009 Advanced Micro Devices, Inc., Robert Richter * * For licencing details see kernel-base/COPYING */ @@ -47,6 +48,7 @@ struct cpu_hw_counters { * struct x86_pmu - generic x86 pmu */ struct x86_pmu { + int (*handle_irq)(struct pt_regs *, int); u64 (*save_disable_all)(void); void (*restore_all)(u64); u64 (*get_status)(u64); @@ -241,6 +243,10 @@ static int __hw_perf_counter_init(struct perf_counter *counter) struct hw_perf_counter *hwc = &counter->hw; int err; + /* disable temporarily */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + return -ENOSYS; + if (unlikely(!perf_counters_initialized)) return -EINVAL; @@ -780,7 +786,7 @@ static void perf_save_and_restart(struct perf_counter *counter) * This handler is triggered by the local APIC, so the APIC IRQ handling * rules apply: */ -static int __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi) +static int intel_pmu_handle_irq(struct pt_regs *regs, int nmi) { int bit, cpu = smp_processor_id(); u64 ack, status; @@ -827,6 +833,8 @@ out: return ret; } +static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi) { return 0; } + void perf_counter_unthrottle(void) { struct cpu_hw_counters *cpuc; @@ -851,7 +859,7 @@ void smp_perf_counter_interrupt(struct pt_regs *regs) irq_enter(); apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR); ack_APIC_irq(); - __smp_perf_counter_interrupt(regs, 0); + x86_pmu->handle_irq(regs, 0); irq_exit(); } @@ -908,7 +916,7 @@ perf_counter_nmi_handler(struct notifier_block *self, regs = args->regs; apic_write(APIC_LVTPC, APIC_DM_NMI); - ret = __smp_perf_counter_interrupt(regs, 1); + ret = x86_pmu->handle_irq(regs, 1); return ret ? NOTIFY_STOP : NOTIFY_OK; } @@ -920,6 +928,7 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = { }; static struct x86_pmu intel_pmu = { + .handle_irq = intel_pmu_handle_irq, .save_disable_all = intel_pmu_save_disable_all, .restore_all = intel_pmu_restore_all, .get_status = intel_pmu_get_status, @@ -934,6 +943,7 @@ static struct x86_pmu intel_pmu = { }; static struct x86_pmu amd_pmu = { + .handle_irq = amd_pmu_handle_irq, .save_disable_all = amd_pmu_save_disable_all, .restore_all = amd_pmu_restore_all, .get_status = amd_pmu_get_status, -- cgit v0.10.2 From b7f8859a8ed1937e2139c17b84878f1d413fa659 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 29 Apr 2009 12:47:06 +0200 Subject: perf_counter, x86: remove get_status() from struct x86_pmu This function is Intel only and not necessary for AMD cpus. [ Impact: simplify code ] Signed-off-by: Robert Richter Cc: Paul Mackerras Acked-by: Peter Zijlstra LKML-Reference: <1241002046-8832-10-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 9d90de0..d0bb029 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -51,7 +51,6 @@ struct x86_pmu { int (*handle_irq)(struct pt_regs *, int); u64 (*save_disable_all)(void); void (*restore_all)(u64); - u64 (*get_status)(u64); void (*ack_status)(u64); void (*enable)(int, u64); void (*disable)(int, u64); @@ -405,41 +404,15 @@ void hw_perf_restore(u64 ctrl) */ EXPORT_SYMBOL_GPL(hw_perf_restore); -static u64 intel_pmu_get_status(u64 mask) +static inline u64 intel_pmu_get_status(u64 mask) { u64 status; - rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); - - return status; -} - -static u64 amd_pmu_get_status(u64 mask) -{ - u64 status = 0; - int idx; - - for (idx = 0; idx < nr_counters_generic; idx++) { - s64 val; - - if (!(mask & (1 << idx))) - continue; - - rdmsrl(MSR_K7_PERFCTR0 + idx, val); - val <<= (64 - counter_value_bits); - if (val >= 0) - status |= (1 << idx); - } - - return status; -} - -static u64 hw_perf_get_status(u64 mask) -{ if (unlikely(!perf_counters_initialized)) return 0; + rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); - return x86_pmu->get_status(mask); + return status; } static void intel_pmu_ack_status(u64 ack) @@ -795,7 +768,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs, int nmi) cpuc->throttle_ctrl = hw_perf_save_disable(); - status = hw_perf_get_status(cpuc->throttle_ctrl); + status = intel_pmu_get_status(cpuc->throttle_ctrl); if (!status) goto out; @@ -820,7 +793,7 @@ again: /* * Repeat if there is more work to be done: */ - status = hw_perf_get_status(cpuc->throttle_ctrl); + status = intel_pmu_get_status(cpuc->throttle_ctrl); if (status) goto again; out: @@ -931,7 +904,6 @@ static struct x86_pmu intel_pmu = { .handle_irq = intel_pmu_handle_irq, .save_disable_all = intel_pmu_save_disable_all, .restore_all = intel_pmu_restore_all, - .get_status = intel_pmu_get_status, .ack_status = intel_pmu_ack_status, .enable = intel_pmu_enable_counter, .disable = intel_pmu_disable_counter, @@ -946,7 +918,6 @@ static struct x86_pmu amd_pmu = { .handle_irq = amd_pmu_handle_irq, .save_disable_all = amd_pmu_save_disable_all, .restore_all = amd_pmu_restore_all, - .get_status = amd_pmu_get_status, .ack_status = amd_pmu_ack_status, .enable = amd_pmu_enable_counter, .disable = amd_pmu_disable_counter, -- cgit v0.10.2 From dee5d9067ca78b317538fd67930be4e09a83dbc5 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 29 Apr 2009 12:47:07 +0200 Subject: perf_counter, x86: remove ack_status() from struct x86_pmu This function is Intel only and not necessary for AMD cpus. [ Impact: simplify code ] Signed-off-by: Robert Richter Cc: Paul Mackerras Acked-by: Peter Zijlstra LKML-Reference: <1241002046-8832-11-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index d0bb029..6bbdc16 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -51,7 +51,6 @@ struct x86_pmu { int (*handle_irq)(struct pt_regs *, int); u64 (*save_disable_all)(void); void (*restore_all)(u64); - void (*ack_status)(u64); void (*enable)(int, u64); void (*disable)(int, u64); unsigned eventsel; @@ -415,23 +414,11 @@ static inline u64 intel_pmu_get_status(u64 mask) return status; } -static void intel_pmu_ack_status(u64 ack) +static inline void intel_pmu_ack_status(u64 ack) { wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); } -static void amd_pmu_ack_status(u64 ack) -{ -} - -static void hw_perf_ack_status(u64 ack) -{ - if (unlikely(!perf_counters_initialized)) - return; - - x86_pmu->ack_status(ack); -} - static void intel_pmu_enable_counter(int idx, u64 config) { wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, @@ -788,7 +775,7 @@ again: __x86_pmu_disable(counter, &counter->hw, bit); } - hw_perf_ack_status(ack); + intel_pmu_ack_status(ack); /* * Repeat if there is more work to be done: @@ -904,7 +891,6 @@ static struct x86_pmu intel_pmu = { .handle_irq = intel_pmu_handle_irq, .save_disable_all = intel_pmu_save_disable_all, .restore_all = intel_pmu_restore_all, - .ack_status = intel_pmu_ack_status, .enable = intel_pmu_enable_counter, .disable = intel_pmu_disable_counter, .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, @@ -918,7 +904,6 @@ static struct x86_pmu amd_pmu = { .handle_irq = amd_pmu_handle_irq, .save_disable_all = amd_pmu_save_disable_all, .restore_all = amd_pmu_restore_all, - .ack_status = amd_pmu_ack_status, .enable = amd_pmu_enable_counter, .disable = amd_pmu_disable_counter, .eventsel = MSR_K7_EVNTSEL0, -- cgit v0.10.2 From 26816c287e13eedc67bc4ed0cd40c138314b7c7d Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 29 Apr 2009 12:47:08 +0200 Subject: perf_counter, x86: rename __hw_perf_counter_set_period into x86_perf_counter_set_period [ Impact: cleanup ] Signed-off-by: Robert Richter Cc: Paul Mackerras Acked-by: Peter Zijlstra LKML-Reference: <1241002046-8832-12-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 6bbdc16..fa6541d 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -498,7 +498,7 @@ static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]); * To be called with the counter disabled in hw: */ static void -__hw_perf_counter_set_period(struct perf_counter *counter, +x86_perf_counter_set_period(struct perf_counter *counter, struct hw_perf_counter *hwc, int idx) { s64 left = atomic64_read(&hwc->period_left); @@ -642,7 +642,7 @@ try_generic: */ barrier(); - __hw_perf_counter_set_period(counter, hwc, idx); + x86_perf_counter_set_period(counter, hwc, idx); __x86_pmu_enable(counter, hwc, idx); return 0; @@ -731,7 +731,7 @@ static void perf_save_and_restart(struct perf_counter *counter) int idx = hwc->idx; x86_perf_counter_update(counter, hwc, idx); - __hw_perf_counter_set_period(counter, hwc, idx); + x86_perf_counter_set_period(counter, hwc, idx); if (counter->state == PERF_COUNTER_STATE_ACTIVE) __x86_pmu_enable(counter, hwc, idx); -- cgit v0.10.2 From 55de0f2e57994b525324bf0d04d242d9358a2417 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 29 Apr 2009 12:47:09 +0200 Subject: perf_counter, x86: rename intel only functions [ Impact: cleanup ] Signed-off-by: Robert Richter Cc: Paul Mackerras Acked-by: Peter Zijlstra LKML-Reference: <1241002046-8832-13-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index fa6541d..5a52d73 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -725,7 +725,7 @@ static void x86_pmu_disable(struct perf_counter *counter) * Save and restart an expired counter. Called by NMI contexts, * so it has to be careful about preempting normal counter ops: */ -static void perf_save_and_restart(struct perf_counter *counter) +static void intel_pmu_save_and_restart(struct perf_counter *counter) { struct hw_perf_counter *hwc = &counter->hw; int idx = hwc->idx; @@ -753,7 +753,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs, int nmi) struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu); int ret = 0; - cpuc->throttle_ctrl = hw_perf_save_disable(); + cpuc->throttle_ctrl = intel_pmu_save_disable_all(); status = intel_pmu_get_status(cpuc->throttle_ctrl); if (!status) @@ -770,7 +770,7 @@ again: if (!counter) continue; - perf_save_and_restart(counter); + intel_pmu_save_and_restart(counter); if (perf_counter_overflow(counter, nmi, regs, 0)) __x86_pmu_disable(counter, &counter->hw, bit); } @@ -788,7 +788,7 @@ out: * Restore - do not reenable when global enable is off or throttled: */ if (++cpuc->interrupts < PERFMON_MAX_INTERRUPTS) - hw_perf_restore(cpuc->throttle_ctrl); + intel_pmu_restore_all(cpuc->throttle_ctrl); return ret; } -- cgit v0.10.2 From 72eae04d3a3075c26d39e1e685acfc8e8c29db64 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 29 Apr 2009 12:47:10 +0200 Subject: perf_counter, x86: modify initialization of struct x86_pmu This patch adds an error handler and changes initialization of struct x86_pmu. No functional changes. Needed for follow-on patches. [ Impact: cleanup ] Signed-off-by: Robert Richter Cc: Paul Mackerras Acked-by: Peter Zijlstra LKML-Reference: <1241002046-8832-14-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 5a52d73..7c72a94 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -913,7 +913,7 @@ static struct x86_pmu amd_pmu = { .max_events = ARRAY_SIZE(amd_perfmon_event_map), }; -static struct x86_pmu *intel_pmu_init(void) +static int intel_pmu_init(void) { union cpuid10_edx edx; union cpuid10_eax eax; @@ -921,7 +921,7 @@ static struct x86_pmu *intel_pmu_init(void) unsigned int ebx; if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) - return NULL; + return -ENODEV; /* * Check whether the Architectural PerfMon supports @@ -929,49 +929,54 @@ static struct x86_pmu *intel_pmu_init(void) */ cpuid(10, &eax.full, &ebx, &unused, &edx.full); if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) - return NULL; + return -ENODEV; intel_perfmon_version = eax.split.version_id; if (intel_perfmon_version < 2) - return NULL; + return -ENODEV; pr_info("Intel Performance Monitoring support detected.\n"); pr_info("... version: %d\n", intel_perfmon_version); pr_info("... bit width: %d\n", eax.split.bit_width); pr_info("... mask length: %d\n", eax.split.mask_length); + x86_pmu = &intel_pmu; + nr_counters_generic = eax.split.num_counters; nr_counters_fixed = edx.split.num_counters_fixed; counter_value_mask = (1ULL << eax.split.bit_width) - 1; - return &intel_pmu; + return 0; } -static struct x86_pmu *amd_pmu_init(void) +static int amd_pmu_init(void) { + x86_pmu = &amd_pmu; + nr_counters_generic = 4; nr_counters_fixed = 0; counter_value_mask = 0x0000FFFFFFFFFFFFULL; counter_value_bits = 48; pr_info("AMD Performance Monitoring support detected.\n"); - - return &amd_pmu; + return 0; } void __init init_hw_perf_counters(void) { + int err; + switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_INTEL: - x86_pmu = intel_pmu_init(); + err = intel_pmu_init(); break; case X86_VENDOR_AMD: - x86_pmu = amd_pmu_init(); + err = amd_pmu_init(); break; default: return; } - if (!x86_pmu) + if (err != 0) return; pr_info("... num counters: %d\n", nr_counters_generic); -- cgit v0.10.2 From 4a06bd8508f65ad1dd5cd2046b85694813fa36a2 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 29 Apr 2009 12:47:11 +0200 Subject: perf_counter, x86: make x86_pmu data a static struct Instead of using a pointer to reference to the x86 pmu we now have one single data structure that is initialized at the beginning. This saves the pointer access when using this memory. [ Impact: micro-optimization ] Signed-off-by: Robert Richter Cc: Paul Mackerras Acked-by: Peter Zijlstra LKML-Reference: <1241002046-8832-15-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 7c72a94..68597d7 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -60,7 +60,7 @@ struct x86_pmu { int max_events; }; -static struct x86_pmu *x86_pmu __read_mostly; +static struct x86_pmu x86_pmu __read_mostly; static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = { .enabled = 1, @@ -184,12 +184,12 @@ static bool reserve_pmc_hardware(void) disable_lapic_nmi_watchdog(); for (i = 0; i < nr_counters_generic; i++) { - if (!reserve_perfctr_nmi(x86_pmu->perfctr + i)) + if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) goto perfctr_fail; } for (i = 0; i < nr_counters_generic; i++) { - if (!reserve_evntsel_nmi(x86_pmu->eventsel + i)) + if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) goto eventsel_fail; } @@ -197,13 +197,13 @@ static bool reserve_pmc_hardware(void) eventsel_fail: for (i--; i >= 0; i--) - release_evntsel_nmi(x86_pmu->eventsel + i); + release_evntsel_nmi(x86_pmu.eventsel + i); i = nr_counters_generic; perfctr_fail: for (i--; i >= 0; i--) - release_perfctr_nmi(x86_pmu->perfctr + i); + release_perfctr_nmi(x86_pmu.perfctr + i); if (nmi_watchdog == NMI_LOCAL_APIC) enable_lapic_nmi_watchdog(); @@ -216,8 +216,8 @@ static void release_pmc_hardware(void) int i; for (i = 0; i < nr_counters_generic; i++) { - release_perfctr_nmi(x86_pmu->perfctr + i); - release_evntsel_nmi(x86_pmu->eventsel + i); + release_perfctr_nmi(x86_pmu.perfctr + i); + release_evntsel_nmi(x86_pmu.eventsel + i); } if (nmi_watchdog == NMI_LOCAL_APIC) @@ -297,14 +297,14 @@ static int __hw_perf_counter_init(struct perf_counter *counter) * Raw event type provide the config in the event structure */ if (perf_event_raw(hw_event)) { - hwc->config |= x86_pmu->raw_event(perf_event_config(hw_event)); + hwc->config |= x86_pmu.raw_event(perf_event_config(hw_event)); } else { - if (perf_event_id(hw_event) >= x86_pmu->max_events) + if (perf_event_id(hw_event) >= x86_pmu.max_events) return -EINVAL; /* * The generic map: */ - hwc->config |= x86_pmu->event_map(perf_event_id(hw_event)); + hwc->config |= x86_pmu.event_map(perf_event_id(hw_event)); } counter->destroy = hw_perf_counter_destroy; @@ -356,7 +356,7 @@ u64 hw_perf_save_disable(void) if (unlikely(!perf_counters_initialized)) return 0; - return x86_pmu->save_disable_all(); + return x86_pmu.save_disable_all(); } /* * Exported because of ACPI idle @@ -396,7 +396,7 @@ void hw_perf_restore(u64 ctrl) if (unlikely(!perf_counters_initialized)) return; - x86_pmu->restore_all(ctrl); + x86_pmu.restore_all(ctrl); } /* * Exported because of ACPI idle @@ -441,7 +441,7 @@ static void hw_perf_enable(int idx, u64 config) if (unlikely(!perf_counters_initialized)) return; - x86_pmu->enable(idx, config); + x86_pmu.enable(idx, config); } static void intel_pmu_disable_counter(int idx, u64 config) @@ -463,7 +463,7 @@ static void hw_perf_disable(int idx, u64 config) if (unlikely(!perf_counters_initialized)) return; - x86_pmu->disable(idx, config); + x86_pmu.disable(idx, config); } static inline void @@ -580,11 +580,11 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc) event = hwc->config & ARCH_PERFMON_EVENT_MASK; - if (unlikely(event == x86_pmu->event_map(PERF_COUNT_INSTRUCTIONS))) + if (unlikely(event == x86_pmu.event_map(PERF_COUNT_INSTRUCTIONS))) return X86_PMC_IDX_FIXED_INSTRUCTIONS; - if (unlikely(event == x86_pmu->event_map(PERF_COUNT_CPU_CYCLES))) + if (unlikely(event == x86_pmu.event_map(PERF_COUNT_CPU_CYCLES))) return X86_PMC_IDX_FIXED_CPU_CYCLES; - if (unlikely(event == x86_pmu->event_map(PERF_COUNT_BUS_CYCLES))) + if (unlikely(event == x86_pmu.event_map(PERF_COUNT_BUS_CYCLES))) return X86_PMC_IDX_FIXED_BUS_CYCLES; return -1; @@ -628,8 +628,8 @@ try_generic: set_bit(idx, cpuc->used); hwc->idx = idx; } - hwc->config_base = x86_pmu->eventsel; - hwc->counter_base = x86_pmu->perfctr; + hwc->config_base = x86_pmu.eventsel; + hwc->counter_base = x86_pmu.perfctr; } perf_counters_lapic_init(hwc->nmi); @@ -677,8 +677,8 @@ void perf_counter_print_debug(void) pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used); for (idx = 0; idx < nr_counters_generic; idx++) { - rdmsrl(x86_pmu->eventsel + idx, pmc_ctrl); - rdmsrl(x86_pmu->perfctr + idx, pmc_count); + rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); + rdmsrl(x86_pmu.perfctr + idx, pmc_count); prev_left = per_cpu(prev_left[idx], cpu); @@ -819,7 +819,7 @@ void smp_perf_counter_interrupt(struct pt_regs *regs) irq_enter(); apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR); ack_APIC_irq(); - x86_pmu->handle_irq(regs, 0); + x86_pmu.handle_irq(regs, 0); irq_exit(); } @@ -876,7 +876,7 @@ perf_counter_nmi_handler(struct notifier_block *self, regs = args->regs; apic_write(APIC_LVTPC, APIC_DM_NMI); - ret = x86_pmu->handle_irq(regs, 1); + ret = x86_pmu.handle_irq(regs, 1); return ret ? NOTIFY_STOP : NOTIFY_OK; } @@ -940,7 +940,7 @@ static int intel_pmu_init(void) pr_info("... bit width: %d\n", eax.split.bit_width); pr_info("... mask length: %d\n", eax.split.mask_length); - x86_pmu = &intel_pmu; + x86_pmu = intel_pmu; nr_counters_generic = eax.split.num_counters; nr_counters_fixed = edx.split.num_counters_fixed; @@ -951,7 +951,7 @@ static int intel_pmu_init(void) static int amd_pmu_init(void) { - x86_pmu = &amd_pmu; + x86_pmu = amd_pmu; nr_counters_generic = 4; nr_counters_fixed = 0; -- cgit v0.10.2 From 0933e5c6a680ba8d8d786a6f7fa377b7ec0d1e49 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 29 Apr 2009 12:47:12 +0200 Subject: perf_counter, x86: move counter parameters to struct x86_pmu [ Impact: refactor and generalize code ] Signed-off-by: Robert Richter Cc: Paul Mackerras Acked-by: Peter Zijlstra LKML-Reference: <1241002046-8832-16-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 68597d7..75dbb1f 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -24,16 +24,7 @@ #include static bool perf_counters_initialized __read_mostly; - -/* - * Number of (generic) HW counters: - */ -static int nr_counters_generic __read_mostly; static u64 perf_counter_mask __read_mostly; -static u64 counter_value_mask __read_mostly; -static int counter_value_bits __read_mostly; - -static int nr_counters_fixed __read_mostly; struct cpu_hw_counters { struct perf_counter *counters[X86_PMC_IDX_MAX]; @@ -58,6 +49,10 @@ struct x86_pmu { u64 (*event_map)(int); u64 (*raw_event)(u64); int max_events; + int num_counters; + int num_counters_fixed; + int counter_bits; + u64 counter_mask; }; static struct x86_pmu x86_pmu __read_mostly; @@ -183,12 +178,12 @@ static bool reserve_pmc_hardware(void) if (nmi_watchdog == NMI_LOCAL_APIC) disable_lapic_nmi_watchdog(); - for (i = 0; i < nr_counters_generic; i++) { + for (i = 0; i < x86_pmu.num_counters; i++) { if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) goto perfctr_fail; } - for (i = 0; i < nr_counters_generic; i++) { + for (i = 0; i < x86_pmu.num_counters; i++) { if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) goto eventsel_fail; } @@ -199,7 +194,7 @@ eventsel_fail: for (i--; i >= 0; i--) release_evntsel_nmi(x86_pmu.eventsel + i); - i = nr_counters_generic; + i = x86_pmu.num_counters; perfctr_fail: for (i--; i >= 0; i--) @@ -215,7 +210,7 @@ static void release_pmc_hardware(void) { int i; - for (i = 0; i < nr_counters_generic; i++) { + for (i = 0; i < x86_pmu.num_counters; i++) { release_perfctr_nmi(x86_pmu.perfctr + i); release_evntsel_nmi(x86_pmu.eventsel + i); } @@ -336,7 +331,7 @@ static u64 amd_pmu_save_disable_all(void) */ barrier(); - for (idx = 0; idx < nr_counters_generic; idx++) { + for (idx = 0; idx < x86_pmu.num_counters; idx++) { u64 val; if (!test_bit(idx, cpuc->active_mask)) @@ -378,7 +373,7 @@ static void amd_pmu_restore_all(u64 ctrl) if (!ctrl) return; - for (idx = 0; idx < nr_counters_generic; idx++) { + for (idx = 0; idx < x86_pmu.num_counters; idx++) { u64 val; if (!test_bit(idx, cpuc->active_mask)) @@ -527,7 +522,7 @@ x86_perf_counter_set_period(struct perf_counter *counter, atomic64_set(&hwc->prev_count, (u64)-left); err = checking_wrmsrl(hwc->counter_base + idx, - (u64)(-left) & counter_value_mask); + (u64)(-left) & x86_pmu.counter_mask); } static inline void @@ -621,8 +616,9 @@ static int x86_pmu_enable(struct perf_counter *counter) /* Try to get the previous generic counter again */ if (test_and_set_bit(idx, cpuc->used)) { try_generic: - idx = find_first_zero_bit(cpuc->used, nr_counters_generic); - if (idx == nr_counters_generic) + idx = find_first_zero_bit(cpuc->used, + x86_pmu.num_counters); + if (idx == x86_pmu.num_counters) return -EAGAIN; set_bit(idx, cpuc->used); @@ -654,7 +650,7 @@ void perf_counter_print_debug(void) struct cpu_hw_counters *cpuc; int cpu, idx; - if (!nr_counters_generic) + if (!x86_pmu.num_counters) return; local_irq_disable(); @@ -676,7 +672,7 @@ void perf_counter_print_debug(void) } pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used); - for (idx = 0; idx < nr_counters_generic; idx++) { + for (idx = 0; idx < x86_pmu.num_counters; idx++) { rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); rdmsrl(x86_pmu.perfctr + idx, pmc_count); @@ -689,7 +685,7 @@ void perf_counter_print_debug(void) pr_info("CPU#%d: gen-PMC%d left: %016llx\n", cpu, idx, prev_left); } - for (idx = 0; idx < nr_counters_fixed; idx++) { + for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", @@ -911,6 +907,9 @@ static struct x86_pmu amd_pmu = { .event_map = amd_pmu_event_map, .raw_event = amd_pmu_raw_event, .max_events = ARRAY_SIZE(amd_perfmon_event_map), + .num_counters = 4, + .counter_bits = 48, + .counter_mask = (1ULL << 48) - 1, }; static int intel_pmu_init(void) @@ -941,10 +940,10 @@ static int intel_pmu_init(void) pr_info("... mask length: %d\n", eax.split.mask_length); x86_pmu = intel_pmu; - - nr_counters_generic = eax.split.num_counters; - nr_counters_fixed = edx.split.num_counters_fixed; - counter_value_mask = (1ULL << eax.split.bit_width) - 1; + x86_pmu.num_counters = eax.split.num_counters; + x86_pmu.num_counters_fixed = edx.split.num_counters_fixed; + x86_pmu.counter_bits = eax.split.bit_width; + x86_pmu.counter_mask = (1ULL << eax.split.bit_width) - 1; return 0; } @@ -952,12 +951,6 @@ static int intel_pmu_init(void) static int amd_pmu_init(void) { x86_pmu = amd_pmu; - - nr_counters_generic = 4; - nr_counters_fixed = 0; - counter_value_mask = 0x0000FFFFFFFFFFFFULL; - counter_value_bits = 48; - pr_info("AMD Performance Monitoring support detected.\n"); return 0; } @@ -979,25 +972,26 @@ void __init init_hw_perf_counters(void) if (err != 0) return; - pr_info("... num counters: %d\n", nr_counters_generic); - if (nr_counters_generic > X86_PMC_MAX_GENERIC) { - nr_counters_generic = X86_PMC_MAX_GENERIC; + pr_info("... num counters: %d\n", x86_pmu.num_counters); + if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { + x86_pmu.num_counters = X86_PMC_MAX_GENERIC; WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!", - nr_counters_generic, X86_PMC_MAX_GENERIC); + x86_pmu.num_counters, X86_PMC_MAX_GENERIC); } - perf_counter_mask = (1 << nr_counters_generic) - 1; - perf_max_counters = nr_counters_generic; + perf_counter_mask = (1 << x86_pmu.num_counters) - 1; + perf_max_counters = x86_pmu.num_counters; - pr_info("... value mask: %016Lx\n", counter_value_mask); + pr_info("... value mask: %016Lx\n", x86_pmu.counter_mask); - if (nr_counters_fixed > X86_PMC_MAX_FIXED) { - nr_counters_fixed = X86_PMC_MAX_FIXED; + if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { + x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED; WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!", - nr_counters_fixed, X86_PMC_MAX_FIXED); + x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); } - pr_info("... fixed counters: %d\n", nr_counters_fixed); + pr_info("... fixed counters: %d\n", x86_pmu.num_counters_fixed); - perf_counter_mask |= ((1LL << nr_counters_fixed)-1) << X86_PMC_IDX_FIXED; + perf_counter_mask |= + ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; pr_info("... counter mask: %016Lx\n", perf_counter_mask); perf_counters_initialized = true; -- cgit v0.10.2 From faa28ae018ed004a22aa4a7704e04ccdde4a941e Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 29 Apr 2009 12:47:13 +0200 Subject: perf_counter, x86: make pmu version generic This makes the use of the version variable generic. Also, some debug messages have been generalized. [ Impact: refactor and generalize code ] Signed-off-by: Robert Richter Cc: Paul Mackerras Acked-by: Peter Zijlstra LKML-Reference: <1241002046-8832-17-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 75dbb1f..15d2c03 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -39,6 +39,8 @@ struct cpu_hw_counters { * struct x86_pmu - generic x86 pmu */ struct x86_pmu { + const char *name; + int version; int (*handle_irq)(struct pt_regs *, int); u64 (*save_disable_all)(void); void (*restore_all)(u64); @@ -61,8 +63,6 @@ static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = { .enabled = 1, }; -static __read_mostly int intel_perfmon_version; - /* * Intel PerfMon v3. Used on Core2 and later. */ @@ -658,7 +658,7 @@ void perf_counter_print_debug(void) cpu = smp_processor_id(); cpuc = &per_cpu(cpu_hw_counters, cpu); - if (intel_perfmon_version >= 2) { + if (x86_pmu.version >= 2) { rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); @@ -884,6 +884,7 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = { }; static struct x86_pmu intel_pmu = { + .name = "Intel", .handle_irq = intel_pmu_handle_irq, .save_disable_all = intel_pmu_save_disable_all, .restore_all = intel_pmu_restore_all, @@ -897,6 +898,7 @@ static struct x86_pmu intel_pmu = { }; static struct x86_pmu amd_pmu = { + .name = "AMD", .handle_irq = amd_pmu_handle_irq, .save_disable_all = amd_pmu_save_disable_all, .restore_all = amd_pmu_restore_all, @@ -918,6 +920,7 @@ static int intel_pmu_init(void) union cpuid10_eax eax; unsigned int unused; unsigned int ebx; + int version; if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) return -ENODEV; @@ -930,16 +933,12 @@ static int intel_pmu_init(void) if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) return -ENODEV; - intel_perfmon_version = eax.split.version_id; - if (intel_perfmon_version < 2) + version = eax.split.version_id; + if (version < 2) return -ENODEV; - pr_info("Intel Performance Monitoring support detected.\n"); - pr_info("... version: %d\n", intel_perfmon_version); - pr_info("... bit width: %d\n", eax.split.bit_width); - pr_info("... mask length: %d\n", eax.split.mask_length); - x86_pmu = intel_pmu; + x86_pmu.version = version; x86_pmu.num_counters = eax.split.num_counters; x86_pmu.num_counters_fixed = edx.split.num_counters_fixed; x86_pmu.counter_bits = eax.split.bit_width; @@ -951,7 +950,6 @@ static int intel_pmu_init(void) static int amd_pmu_init(void) { x86_pmu = amd_pmu; - pr_info("AMD Performance Monitoring support detected.\n"); return 0; } @@ -972,6 +970,10 @@ void __init init_hw_perf_counters(void) if (err != 0) return; + pr_info("%s Performance Monitoring support detected.\n", x86_pmu.name); + pr_info("... version: %d\n", x86_pmu.version); + pr_info("... bit width: %d\n", x86_pmu.counter_bits); + pr_info("... num counters: %d\n", x86_pmu.num_counters); if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { x86_pmu.num_counters = X86_PMC_MAX_GENERIC; -- cgit v0.10.2 From bb775fc2d1dcd1aa6eafde37a8289ba2d80783aa Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 29 Apr 2009 12:47:14 +0200 Subject: perf_counter, x86: make x86_pmu_read() static inline [ Impact: micro-optimization ] Signed-off-by: Robert Richter Cc: Paul Mackerras Acked-by: Peter Zijlstra LKML-Reference: <1241002046-8832-18-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 15d2c03..3f3ae47 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -1002,7 +1002,7 @@ void __init init_hw_perf_counters(void) register_die_notifier(&perf_counter_nmi_notifier); } -static void x86_pmu_read(struct perf_counter *counter) +static inline void x86_pmu_read(struct perf_counter *counter) { x86_perf_counter_update(counter, &counter->hw, counter->hw.idx); } -- cgit v0.10.2 From 93904966934193204ad08e951f806d5631c29eb3 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 29 Apr 2009 12:47:15 +0200 Subject: perf_counter, x86: rename cpuc->active_mask This is to have a consistent naming scheme with cpuc->used. [ Impact: cleanup ] Signed-off-by: Robert Richter Cc: Paul Mackerras Acked-by: Peter Zijlstra LKML-Reference: <1241002046-8832-19-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 3f3ae47..9ec51a6 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -29,9 +29,9 @@ static u64 perf_counter_mask __read_mostly; struct cpu_hw_counters { struct perf_counter *counters[X86_PMC_IDX_MAX]; unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + unsigned long active[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; unsigned long interrupts; u64 throttle_ctrl; - unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; int enabled; }; @@ -334,7 +334,7 @@ static u64 amd_pmu_save_disable_all(void) for (idx = 0; idx < x86_pmu.num_counters; idx++) { u64 val; - if (!test_bit(idx, cpuc->active_mask)) + if (!test_bit(idx, cpuc->active)) continue; rdmsrl(MSR_K7_EVNTSEL0 + idx, val); if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE)) @@ -376,7 +376,7 @@ static void amd_pmu_restore_all(u64 ctrl) for (idx = 0; idx < x86_pmu.num_counters; idx++) { u64 val; - if (!test_bit(idx, cpuc->active_mask)) + if (!test_bit(idx, cpuc->active)) continue; rdmsrl(MSR_K7_EVNTSEL0 + idx, val); if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) @@ -424,7 +424,7 @@ static void amd_pmu_enable_counter(int idx, u64 config) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - set_bit(idx, cpuc->active_mask); + set_bit(idx, cpuc->active); if (cpuc->enabled) config |= ARCH_PERFMON_EVENTSEL0_ENABLE; @@ -448,7 +448,7 @@ static void amd_pmu_disable_counter(int idx, u64 config) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - clear_bit(idx, cpuc->active_mask); + clear_bit(idx, cpuc->active); wrmsrl(MSR_K7_EVNTSEL0 + idx, config); } -- cgit v0.10.2 From 095342389e2ed8deed07b3076f990260ce3c7c9f Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 29 Apr 2009 12:47:16 +0200 Subject: perf_counter, x86: generic use of cpuc->active cpuc->active will now be used to indicate an enabled counter which implies also valid pointers of cpuc->counters[]. In contrast, cpuc->used only locks the counter, but it can be still uninitialized. [ Impact: refactor and generalize code ] Signed-off-by: Robert Richter Cc: Paul Mackerras Acked-by: Peter Zijlstra LKML-Reference: <1241002046-8832-20-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 9ec51a6..f7fd4a3 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -424,7 +424,6 @@ static void amd_pmu_enable_counter(int idx, u64 config) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - set_bit(idx, cpuc->active); if (cpuc->enabled) config |= ARCH_PERFMON_EVENTSEL0_ENABLE; @@ -446,9 +445,6 @@ static void intel_pmu_disable_counter(int idx, u64 config) static void amd_pmu_disable_counter(int idx, u64 config) { - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - - clear_bit(idx, cpuc->active); wrmsrl(MSR_K7_EVNTSEL0 + idx, config); } @@ -633,10 +629,7 @@ try_generic: __x86_pmu_disable(counter, hwc, idx); cpuc->counters[idx] = counter; - /* - * Make it visible before enabling the hw: - */ - barrier(); + set_bit(idx, cpuc->active); x86_perf_counter_set_period(counter, hwc, idx); __x86_pmu_enable(counter, hwc, idx); @@ -700,10 +693,13 @@ static void x86_pmu_disable(struct perf_counter *counter) struct hw_perf_counter *hwc = &counter->hw; unsigned int idx = hwc->idx; + /* + * Must be done before we disable, otherwise the nmi handler + * could reenable again: + */ + clear_bit(idx, cpuc->active); __x86_pmu_disable(counter, hwc, idx); - clear_bit(idx, cpuc->used); - cpuc->counters[idx] = NULL; /* * Make sure the cleared pointer becomes visible before we * (potentially) free the counter: @@ -715,6 +711,8 @@ static void x86_pmu_disable(struct perf_counter *counter) * that we are disabling: */ x86_perf_counter_update(counter, hwc, idx); + cpuc->counters[idx] = NULL; + clear_bit(idx, cpuc->used); } /* @@ -763,7 +761,7 @@ again: struct perf_counter *counter = cpuc->counters[bit]; clear_bit(bit, (unsigned long *) &status); - if (!counter) + if (!test_bit(bit, cpuc->active)) continue; intel_pmu_save_and_restart(counter); -- cgit v0.10.2 From 6f00cada07bb5da7f751929d3173494dcc5446cc Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 29 Apr 2009 12:47:17 +0200 Subject: perf_counter, x86: consistent use of type int for counter index The type of counter index is sometimes implemented as unsigned int. This patch changes this to have a consistent usage of int. [ Impact: cleanup ] Signed-off-by: Robert Richter Cc: Paul Mackerras Acked-by: Peter Zijlstra LKML-Reference: <1241002046-8832-21-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index f7fd4a3..d8beebe 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -459,7 +459,7 @@ static void hw_perf_disable(int idx, u64 config) static inline void __pmc_fixed_disable(struct perf_counter *counter, - struct hw_perf_counter *hwc, unsigned int __idx) + struct hw_perf_counter *hwc, int __idx) { int idx = __idx - X86_PMC_IDX_FIXED; u64 ctrl_val, mask; @@ -474,7 +474,7 @@ __pmc_fixed_disable(struct perf_counter *counter, static inline void __x86_pmu_disable(struct perf_counter *counter, - struct hw_perf_counter *hwc, unsigned int idx) + struct hw_perf_counter *hwc, int idx) { if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) __pmc_fixed_disable(counter, hwc, idx); @@ -523,7 +523,7 @@ x86_perf_counter_set_period(struct perf_counter *counter, static inline void __pmc_fixed_enable(struct perf_counter *counter, - struct hw_perf_counter *hwc, unsigned int __idx) + struct hw_perf_counter *hwc, int __idx) { int idx = __idx - X86_PMC_IDX_FIXED; u64 ctrl_val, bits, mask; @@ -691,7 +691,7 @@ static void x86_pmu_disable(struct perf_counter *counter) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); struct hw_perf_counter *hwc = &counter->hw; - unsigned int idx = hwc->idx; + int idx = hwc->idx; /* * Must be done before we disable, otherwise the nmi handler diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index c3db52d..41aed42 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -318,7 +318,7 @@ struct hw_perf_counter { unsigned long config_base; unsigned long counter_base; int nmi; - unsigned int idx; + int idx; }; union { /* software */ atomic64_t count; -- cgit v0.10.2 From 7c90cc45f89af4dd4617f97d452740ad95b800d5 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 29 Apr 2009 12:47:18 +0200 Subject: perf_counter, x86: rework counter enable functions There is vendor specific code in generic x86 code, and there is vendor specific code that could be generic. This patch introduces x86_pmu_enable_counter() for x86 generic code. Fixed counter code for Intel is moved to Intel only functions. In the end, checks and calls via function pointers were reduced to the necessary. Also, the internal function i/f changed. [ Impact: refactor and generalize code ] Signed-off-by: Robert Richter Cc: Paul Mackerras Acked-by: Peter Zijlstra LKML-Reference: <1241002046-8832-22-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index d8beebe..ae55933 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -44,7 +44,7 @@ struct x86_pmu { int (*handle_irq)(struct pt_regs *, int); u64 (*save_disable_all)(void); void (*restore_all)(u64); - void (*enable)(int, u64); + void (*enable)(struct hw_perf_counter *, int); void (*disable)(int, u64); unsigned eventsel; unsigned perfctr; @@ -414,28 +414,15 @@ static inline void intel_pmu_ack_status(u64 ack) wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); } -static void intel_pmu_enable_counter(int idx, u64 config) +static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) { - wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, - config | ARCH_PERFMON_EVENTSEL0_ENABLE); -} - -static void amd_pmu_enable_counter(int idx, u64 config) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - - if (cpuc->enabled) - config |= ARCH_PERFMON_EVENTSEL0_ENABLE; - - wrmsrl(MSR_K7_EVNTSEL0 + idx, config); -} + int err; -static void hw_perf_enable(int idx, u64 config) -{ if (unlikely(!perf_counters_initialized)) return; - x86_pmu.enable(idx, config); + err = checking_wrmsrl(hwc->config_base + idx, + hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); } static void intel_pmu_disable_counter(int idx, u64 config) @@ -522,8 +509,7 @@ x86_perf_counter_set_period(struct perf_counter *counter, } static inline void -__pmc_fixed_enable(struct perf_counter *counter, - struct hw_perf_counter *hwc, int __idx) +intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx) { int idx = __idx - X86_PMC_IDX_FIXED; u64 ctrl_val, bits, mask; @@ -548,14 +534,24 @@ __pmc_fixed_enable(struct perf_counter *counter, err = checking_wrmsrl(hwc->config_base, ctrl_val); } -static void -__x86_pmu_enable(struct perf_counter *counter, - struct hw_perf_counter *hwc, int idx) +static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) { - if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) - __pmc_fixed_enable(counter, hwc, idx); + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { + intel_pmu_enable_fixed(hwc, idx); + return; + } + + x86_pmu_enable_counter(hwc, idx); +} + +static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) +{ + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + + if (cpuc->enabled) + x86_pmu_enable_counter(hwc, idx); else - hw_perf_enable(idx, hwc->config); + amd_pmu_disable_counter(idx, hwc->config); } static int @@ -632,7 +628,7 @@ try_generic: set_bit(idx, cpuc->active); x86_perf_counter_set_period(counter, hwc, idx); - __x86_pmu_enable(counter, hwc, idx); + x86_pmu.enable(hwc, idx); return 0; } @@ -728,7 +724,7 @@ static void intel_pmu_save_and_restart(struct perf_counter *counter) x86_perf_counter_set_period(counter, hwc, idx); if (counter->state == PERF_COUNTER_STATE_ACTIVE) - __x86_pmu_enable(counter, hwc, idx); + intel_pmu_enable_counter(hwc, idx); } /* -- cgit v0.10.2 From d43698918bd46c71d494555fb92195fbea1fcb6c Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 29 Apr 2009 12:47:19 +0200 Subject: perf_counter, x86: rework counter disable functions As for the enable function, this patch reworks the disable functions and introduces x86_pmu_disable_counter(). The internal function i/f in struct x86_pmu changed too. [ Impact: refactor and generalize code ] Signed-off-by: Robert Richter Cc: Paul Mackerras Acked-by: Peter Zijlstra LKML-Reference: <1241002046-8832-23-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index ae55933..df9012b 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -45,7 +45,7 @@ struct x86_pmu { u64 (*save_disable_all)(void); void (*restore_all)(u64); void (*enable)(struct hw_perf_counter *, int); - void (*disable)(int, u64); + void (*disable)(struct hw_perf_counter *, int); unsigned eventsel; unsigned perfctr; u64 (*event_map)(int); @@ -425,28 +425,19 @@ static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); } -static void intel_pmu_disable_counter(int idx, u64 config) +static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) { - wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, config); -} - -static void amd_pmu_disable_counter(int idx, u64 config) -{ - wrmsrl(MSR_K7_EVNTSEL0 + idx, config); - -} + int err; -static void hw_perf_disable(int idx, u64 config) -{ if (unlikely(!perf_counters_initialized)) return; - x86_pmu.disable(idx, config); + err = checking_wrmsrl(hwc->config_base + idx, + hwc->config); } static inline void -__pmc_fixed_disable(struct perf_counter *counter, - struct hw_perf_counter *hwc, int __idx) +intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx) { int idx = __idx - X86_PMC_IDX_FIXED; u64 ctrl_val, mask; @@ -460,13 +451,20 @@ __pmc_fixed_disable(struct perf_counter *counter, } static inline void -__x86_pmu_disable(struct perf_counter *counter, - struct hw_perf_counter *hwc, int idx) +intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) { - if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) - __pmc_fixed_disable(counter, hwc, idx); - else - hw_perf_disable(idx, hwc->config); + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { + intel_pmu_disable_fixed(hwc, idx); + return; + } + + x86_pmu_disable_counter(hwc, idx); +} + +static inline void +amd_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) +{ + x86_pmu_disable_counter(hwc, idx); } static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]); @@ -551,7 +549,7 @@ static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) if (cpuc->enabled) x86_pmu_enable_counter(hwc, idx); else - amd_pmu_disable_counter(idx, hwc->config); + x86_pmu_disable_counter(hwc, idx); } static int @@ -622,7 +620,7 @@ try_generic: perf_counters_lapic_init(hwc->nmi); - __x86_pmu_disable(counter, hwc, idx); + x86_pmu.disable(hwc, idx); cpuc->counters[idx] = counter; set_bit(idx, cpuc->active); @@ -694,7 +692,7 @@ static void x86_pmu_disable(struct perf_counter *counter) * could reenable again: */ clear_bit(idx, cpuc->active); - __x86_pmu_disable(counter, hwc, idx); + x86_pmu.disable(hwc, idx); /* * Make sure the cleared pointer becomes visible before we @@ -762,7 +760,7 @@ again: intel_pmu_save_and_restart(counter); if (perf_counter_overflow(counter, nmi, regs, 0)) - __x86_pmu_disable(counter, &counter->hw, bit); + intel_pmu_disable_counter(&counter->hw, bit); } intel_pmu_ack_status(ack); -- cgit v0.10.2 From 85cf9dba92152bb4edec118b2f4f0be1ae7fdcab Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 29 Apr 2009 12:47:20 +0200 Subject: perf_counter, x86: change and remove pmu initialization checks Some functions are only called if the pmu was proper initialized. That initalization checks can be removed. The way to check initialization changed too. Now, the pointer to the interrupt handler is checked. If it exists the pmu is initialized. This also removes a static variable and uses struct x86_pmu as only data source for the check. [ Impact: simplify code ] Signed-off-by: Robert Richter Cc: Paul Mackerras Acked-by: Peter Zijlstra LKML-Reference: <1241002046-8832-24-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index df9012b..2d3681b 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -23,7 +23,6 @@ #include #include -static bool perf_counters_initialized __read_mostly; static u64 perf_counter_mask __read_mostly; struct cpu_hw_counters { @@ -227,6 +226,11 @@ static void hw_perf_counter_destroy(struct perf_counter *counter) } } +static inline int x86_pmu_initialized(void) +{ + return x86_pmu.handle_irq != NULL; +} + /* * Setup the hardware configuration for a given hw_event_type */ @@ -240,8 +244,8 @@ static int __hw_perf_counter_init(struct perf_counter *counter) if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) return -ENOSYS; - if (unlikely(!perf_counters_initialized)) - return -EINVAL; + if (!x86_pmu_initialized()) + return -ENODEV; err = 0; if (atomic_inc_not_zero(&num_counters)) { @@ -348,9 +352,8 @@ static u64 amd_pmu_save_disable_all(void) u64 hw_perf_save_disable(void) { - if (unlikely(!perf_counters_initialized)) + if (!x86_pmu_initialized()) return 0; - return x86_pmu.save_disable_all(); } /* @@ -388,9 +391,8 @@ static void amd_pmu_restore_all(u64 ctrl) void hw_perf_restore(u64 ctrl) { - if (unlikely(!perf_counters_initialized)) + if (!x86_pmu_initialized()) return; - x86_pmu.restore_all(ctrl); } /* @@ -402,8 +404,6 @@ static inline u64 intel_pmu_get_status(u64 mask) { u64 status; - if (unlikely(!perf_counters_initialized)) - return 0; rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); return status; @@ -417,10 +417,6 @@ static inline void intel_pmu_ack_status(u64 ack) static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) { int err; - - if (unlikely(!perf_counters_initialized)) - return; - err = checking_wrmsrl(hwc->config_base + idx, hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); } @@ -428,10 +424,6 @@ static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) { int err; - - if (unlikely(!perf_counters_initialized)) - return; - err = checking_wrmsrl(hwc->config_base + idx, hwc->config); } @@ -787,10 +779,10 @@ void perf_counter_unthrottle(void) { struct cpu_hw_counters *cpuc; - if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) + if (!x86_pmu_initialized()) return; - if (unlikely(!perf_counters_initialized)) + if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) return; cpuc = &__get_cpu_var(cpu_hw_counters); @@ -829,8 +821,9 @@ void perf_counters_lapic_init(int nmi) { u32 apic_val; - if (!perf_counters_initialized) + if (!x86_pmu_initialized()) return; + /* * Enable the performance counter vector in the APIC LVT: */ @@ -988,7 +981,6 @@ void __init init_hw_perf_counters(void) ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; pr_info("... counter mask: %016Lx\n", perf_counter_mask); - perf_counters_initialized = true; perf_counters_lapic_init(0); register_die_notifier(&perf_counter_nmi_notifier); -- cgit v0.10.2 From a29aa8a7ff93e4196d558036928597e68337dd8d Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 29 Apr 2009 12:47:21 +0200 Subject: perf_counter, x86: implement the interrupt handler for AMD cpus This patch implements the interrupt handler for AMD performance counters. In difference to the Intel pmu, there is no single status register and also there are no fixed counters. This makes the handler very different and it is useful to make the handler vendor specific. To check if a counter is overflowed the upper bit of the counter is checked. Only counters where the active bit is set are checked. With this patch throttling is enabled for AMD performance counters. This patch also reenables Linux performance counters on AMD cpus. [ Impact: re-enable perfcounters on AMD CPUs ] Signed-off-by: Robert Richter Cc: Paul Mackerras Acked-by: Peter Zijlstra LKML-Reference: <1241002046-8832-25-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 2d3681b..f4d59d4 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -240,10 +240,6 @@ static int __hw_perf_counter_init(struct perf_counter *counter) struct hw_perf_counter *hwc = &counter->hw; int err; - /* disable temporarily */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) - return -ENOSYS; - if (!x86_pmu_initialized()) return -ENODEV; @@ -773,7 +769,43 @@ out: return ret; } -static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi) { return 0; } +static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi) +{ + int cpu = smp_processor_id(); + struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu); + u64 val; + int handled = 0; + struct perf_counter *counter; + struct hw_perf_counter *hwc; + int idx; + + ++cpuc->interrupts; + for (idx = 0; idx < x86_pmu.num_counters; idx++) { + if (!test_bit(idx, cpuc->active)) + continue; + counter = cpuc->counters[idx]; + hwc = &counter->hw; + x86_perf_counter_update(counter, hwc, idx); + val = atomic64_read(&hwc->prev_count); + if (val & (1ULL << (x86_pmu.counter_bits - 1))) + continue; + /* counter overflow */ + x86_perf_counter_set_period(counter, hwc, idx); + handled = 1; + inc_irq_stat(apic_perf_irqs); + if (perf_counter_overflow(counter, nmi, regs, 0)) + amd_pmu_disable_counter(hwc, idx); + else if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS) + /* + * do not reenable when throttled, but reload + * the register + */ + amd_pmu_disable_counter(hwc, idx); + else if (counter->state == PERF_COUNTER_STATE_ACTIVE) + amd_pmu_enable_counter(hwc, idx); + } + return handled; +} void perf_counter_unthrottle(void) { @@ -782,9 +814,6 @@ void perf_counter_unthrottle(void) if (!x86_pmu_initialized()) return; - if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) - return; - cpuc = &__get_cpu_var(cpu_hw_counters); if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS) { if (printk_ratelimit()) -- cgit v0.10.2 From 4b7bfd0d276da3a006d37e85d3cf900d7a14ae2a Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 29 Apr 2009 12:47:22 +0200 Subject: perf_counter, x86: return raw count with x86_perf_counter_update() To check on AMD cpus if a counter overflows, the upper bit of the raw counter value must be checked. This value is already internally available in x86_perf_counter_update(). Now, the value is returned so that it can be used directly to check for overflows. [ Impact: micro-optimization ] Signed-off-by: Robert Richter Cc: Paul Mackerras Acked-by: Peter Zijlstra LKML-Reference: <1241002046-8832-26-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index f4d59d4..a8a53ab 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -132,7 +132,7 @@ static u64 amd_pmu_raw_event(u64 event) * Can only be executed on the CPU where the counter is active. * Returns the delta events processed. */ -static void +static u64 x86_perf_counter_update(struct perf_counter *counter, struct hw_perf_counter *hwc, int idx) { @@ -165,6 +165,8 @@ again: atomic64_add(delta, &counter->count); atomic64_sub(delta, &hwc->period_left); + + return new_raw_count; } static atomic_t num_counters; @@ -785,8 +787,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi) continue; counter = cpuc->counters[idx]; hwc = &counter->hw; - x86_perf_counter_update(counter, hwc, idx); - val = atomic64_read(&hwc->prev_count); + val = x86_perf_counter_update(counter, hwc, idx); if (val & (1ULL << (x86_pmu.counter_bits - 1))) continue; /* counter overflow */ -- cgit v0.10.2 From c619b8ffb1cec6a431687a35695dc6fd292a79e6 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 29 Apr 2009 12:47:23 +0200 Subject: perf_counter, x86: introduce max_period variable In x86 pmus the allowed counter period to programm differs. This introduces a max_period value and allows the generic implementation for all models to check the max period. [ Impact: generalize code ] Signed-off-by: Robert Richter Cc: Paul Mackerras Acked-by: Peter Zijlstra LKML-Reference: <1241002046-8832-27-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index a8a53ab..4b8715b 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -54,6 +54,7 @@ struct x86_pmu { int num_counters_fixed; int counter_bits; u64 counter_mask; + u64 max_period; }; static struct x86_pmu x86_pmu __read_mostly; @@ -279,14 +280,8 @@ static int __hw_perf_counter_init(struct perf_counter *counter) hwc->nmi = 1; hwc->irq_period = hw_event->irq_period; - /* - * Intel PMCs cannot be accessed sanely above 32 bit width, - * so we install an artificial 1<<31 period regardless of - * the generic counter period: - */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - if ((s64)hwc->irq_period <= 0 || hwc->irq_period > 0x7FFFFFFF) - hwc->irq_period = 0x7FFFFFFF; + if ((s64)hwc->irq_period <= 0 || hwc->irq_period > x86_pmu.max_period) + hwc->irq_period = x86_pmu.max_period; atomic64_set(&hwc->period_left, hwc->irq_period); @@ -910,6 +905,12 @@ static struct x86_pmu intel_pmu = { .event_map = intel_pmu_event_map, .raw_event = intel_pmu_raw_event, .max_events = ARRAY_SIZE(intel_perfmon_event_map), + /* + * Intel PMCs cannot be accessed sanely above 32 bit width, + * so we install an artificial 1<<31 period regardless of + * the generic counter period: + */ + .max_period = (1ULL << 31) - 1, }; static struct x86_pmu amd_pmu = { @@ -927,6 +928,8 @@ static struct x86_pmu amd_pmu = { .num_counters = 4, .counter_bits = 48, .counter_mask = (1ULL << 48) - 1, + /* use highest bit to detect overflow */ + .max_period = (1ULL << 47) - 1, }; static int intel_pmu_init(void) @@ -999,6 +1002,7 @@ void __init init_hw_perf_counters(void) perf_max_counters = x86_pmu.num_counters; pr_info("... value mask: %016Lx\n", x86_pmu.counter_mask); + pr_info("... max period: %016Lx\n", x86_pmu.max_period); if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED; -- cgit v0.10.2 From ef7b3e09ffdcd5200aea9523f6b56d331d1c4fc0 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 29 Apr 2009 12:47:24 +0200 Subject: perf_counter, x86: remove vendor check in fixed_mode_idx() The function fixed_mode_idx() is used generically. Now it checks the num_counters_fixed value instead of the vendor to decide if fixed counters are present. [ Impact: generalize code ] Signed-off-by: Robert Richter Cc: Paul Mackerras Acked-by: Peter Zijlstra LKML-Reference: <1241002046-8832-28-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 4b8715b..d1c8036 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -542,7 +542,7 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc) { unsigned int event; - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + if (!x86_pmu.num_counters_fixed) return -1; if (unlikely(hwc->nmi)) -- cgit v0.10.2 From 19d84dab55a383d75c885b5c1a618f5ead96f2f6 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 29 Apr 2009 12:47:25 +0200 Subject: perf_counter, x86: remove unused function argument in intel_pmu_get_status() The mask argument is unused and thus can be removed. [ Impact: cleanup ] Signed-off-by: Robert Richter Cc: Paul Mackerras Acked-by: Peter Zijlstra LKML-Reference: <1241002046-8832-29-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index d1c8036..856b0b8 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -393,7 +393,7 @@ void hw_perf_restore(u64 ctrl) */ EXPORT_SYMBOL_GPL(hw_perf_restore); -static inline u64 intel_pmu_get_status(u64 mask) +static inline u64 intel_pmu_get_status(void) { u64 status; @@ -728,7 +728,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs, int nmi) cpuc->throttle_ctrl = intel_pmu_save_disable_all(); - status = intel_pmu_get_status(cpuc->throttle_ctrl); + status = intel_pmu_get_status(); if (!status) goto out; @@ -753,7 +753,7 @@ again: /* * Repeat if there is more work to be done: */ - status = intel_pmu_get_status(cpuc->throttle_ctrl); + status = intel_pmu_get_status(); if (status) goto again; out: -- cgit v0.10.2 From 38105f0234d4795c77c7c6845916caf3a395c451 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 29 Apr 2009 12:47:26 +0200 Subject: perf_counter: update 'perf top' documentation The documentation about the perf-top build was outdated after perfstat has been implemented. This updates it. [ Impact: update documentation ] Signed-off-by: Robert Richter Cc: Paul Mackerras Acked-by: Peter Zijlstra LKML-Reference: <1241002046-8832-30-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c index 6a276d2..8d28864 100644 --- a/Documentation/perf_counter/builtin-top.c +++ b/Documentation/perf_counter/builtin-top.c @@ -3,7 +3,7 @@ Build with: - cc -O6 -Wall -c -o kerneltop.o kerneltop.c -lrt + make -C Documentation/perf_counter/ Sample output: -- cgit v0.10.2 From 98144511427c192e4249ff66a3f9debc55c59411 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 29 Apr 2009 14:52:50 +0200 Subject: perf_counter: add/update copyrights Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 856b0b8..47e563b 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -1,10 +1,11 @@ /* * Performance counter x86 architecture code * - * Copyright(C) 2008 Thomas Gleixner - * Copyright(C) 2008 Red Hat, Inc., Ingo Molnar - * Copyright(C) 2009 Jaswinder Singh Rajput - * Copyright(C) 2009 Advanced Micro Devices, Inc., Robert Richter + * Copyright (C) 2008 Thomas Gleixner + * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar + * Copyright (C) 2009 Jaswinder Singh Rajput + * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter + * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra * * For licencing details see kernel-base/COPYING */ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 582108a..a95a171 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1,9 +1,9 @@ /* * Performance counter core code * - * Copyright(C) 2008 Thomas Gleixner - * Copyright(C) 2008 Red Hat, Inc., Ingo Molnar - * + * Copyright (C) 2008 Thomas Gleixner + * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar + * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra * * For licensing details see kernel-base/COPYING */ -- cgit v0.10.2 From ab7ef2e50a557af92f4f90689f51fadadafc16b2 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 29 Apr 2009 22:38:51 +1000 Subject: perf_counter: powerpc: allow use of limited-function counters POWER5+ and POWER6 have two hardware counters with limited functionality: PMC5 counts instructions completed in run state and PMC6 counts cycles in run state. (Run state is the state when a hardware RUN bit is 1; the idle task clears RUN while waiting for work to do and sets it when there is work to do.) These counters can't be written to by the kernel, can't generate interrupts, and don't obey the freeze conditions. That means we can only use them for per-task counters (where we know we'll always be in run state; we can't put a per-task counter on an idle task), and only if we don't want interrupts and we do want to count in all processor modes. Obviously some counters can't go on a limited hardware counter, but there are also situations where we can only put a counter on a limited hardware counter - if there are already counters on that exclude some processor modes and we want to put on a per-task cycle or instruction counter that doesn't exclude any processor mode, it could go on if it can use a limited hardware counter. To keep track of these constraints, this adds a flags argument to the processor-specific get_alternatives() functions, with three bits defined: one to say that we can accept alternative event codes that go on limited counters, one to say we only want alternatives on limited counters, and one to say that this is a per-task counter and therefore events that are gated by run state are equivalent to those that aren't (e.g. a "cycles" event is equivalent to a "cycles in run state" event). These flags are computed for each counter and stored in the counter->hw.counter_base field (slightly wonky name for what it does, but it was an existing unused field). Since the limited counters don't freeze when we freeze the other counters, we need some special handling to avoid getting skew between things counted on the limited counters and those counted on normal counters. To minimize this skew, if we are using any limited counters, we read PMC5 and PMC6 immediately after setting and clearing the freeze bit. This is done in a single asm in the new write_mmcr0() function. The code here is specific to PMC5 and PMC6 being the limited hardware counters. Being more general (e.g. having a bitmap of limited hardware counter numbers) would have meant more complex code to read the limited counters when freezing and unfreezing the normal counters, with conditional branches, which would have increased the skew. Since it isn't necessary for the code to be more general at this stage, it isn't. This also extends the back-ends for POWER5+ and POWER6 to be able to handle up to 6 counters rather than the 4 they previously handled. Signed-off-by: Paul Mackerras Acked-by: Peter Zijlstra Cc: Robert Richter LKML-Reference: <18936.19035.163066.892208@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h index 9d7ff6d..56d66c3 100644 --- a/arch/powerpc/include/asm/perf_counter.h +++ b/arch/powerpc/include/asm/perf_counter.h @@ -12,6 +12,7 @@ #define MAX_HWCOUNTERS 8 #define MAX_EVENT_ALTERNATIVES 8 +#define MAX_LIMITED_HWCOUNTERS 2 /* * This struct provides the constants and functions needed to @@ -25,8 +26,11 @@ struct power_pmu { int (*compute_mmcr)(unsigned int events[], int n_ev, unsigned int hwc[], u64 mmcr[]); int (*get_constraint)(unsigned int event, u64 *mskp, u64 *valp); - int (*get_alternatives)(unsigned int event, unsigned int alt[]); + int (*get_alternatives)(unsigned int event, unsigned int flags, + unsigned int alt[]); void (*disable_pmc)(unsigned int pmc, u64 mmcr[]); + int (*limited_pmc_event)(unsigned int event); + int limited_pmc5_6; /* PMC5 and PMC6 have limited function */ int n_generic; int *generic_events; }; @@ -34,6 +38,13 @@ struct power_pmu { extern struct power_pmu *ppmu; /* + * Values for flags to get_alternatives() + */ +#define PPMU_LIMITED_PMC_OK 1 /* can put this on a limited PMC */ +#define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */ +#define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */ + +/* * The power_pmu.get_constraint function returns a 64-bit value and * a 64-bit mask that express the constraints between this event and * other events. diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index d9bbe5e..15cdc8e 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c @@ -23,10 +23,14 @@ struct cpu_hw_counters { int n_percpu; int disabled; int n_added; + int n_limited; + u8 pmcs_enabled; struct perf_counter *counter[MAX_HWCOUNTERS]; unsigned int events[MAX_HWCOUNTERS]; + unsigned int flags[MAX_HWCOUNTERS]; u64 mmcr[3]; - u8 pmcs_enabled; + struct perf_counter *limited_counter[MAX_LIMITED_HWCOUNTERS]; + u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS]; }; DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters); @@ -127,7 +131,8 @@ static void write_pmc(int idx, unsigned long val) * and see if any combination of alternative codes is feasible. * The feasible set is returned in event[]. */ -static int power_check_constraints(unsigned int event[], int n_ev) +static int power_check_constraints(unsigned int event[], unsigned int cflags[], + int n_ev) { u64 mask, value, nv; unsigned int alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; @@ -144,11 +149,15 @@ static int power_check_constraints(unsigned int event[], int n_ev) /* First see if the events will go on as-is */ for (i = 0; i < n_ev; ++i) { - alternatives[i][0] = event[i]; + if ((cflags[i] & PPMU_LIMITED_PMC_REQD) + && !ppmu->limited_pmc_event(event[i])) { + ppmu->get_alternatives(event[i], cflags[i], + alternatives[i]); + event[i] = alternatives[i][0]; + } if (ppmu->get_constraint(event[i], &amasks[i][0], &avalues[i][0])) return -1; - choice[i] = 0; } value = mask = 0; for (i = 0; i < n_ev; ++i) { @@ -166,7 +175,9 @@ static int power_check_constraints(unsigned int event[], int n_ev) if (!ppmu->get_alternatives) return -1; for (i = 0; i < n_ev; ++i) { - n_alt[i] = ppmu->get_alternatives(event[i], alternatives[i]); + choice[i] = 0; + n_alt[i] = ppmu->get_alternatives(event[i], cflags[i], + alternatives[i]); for (j = 1; j < n_alt[i]; ++j) ppmu->get_constraint(alternatives[i][j], &amasks[i][j], &avalues[i][j]); @@ -231,28 +242,41 @@ static int power_check_constraints(unsigned int event[], int n_ev) * exclude_{user,kernel,hv} with each other and any previously * added counters. */ -static int check_excludes(struct perf_counter **ctrs, int n_prev, int n_new) +static int check_excludes(struct perf_counter **ctrs, unsigned int cflags[], + int n_prev, int n_new) { - int eu, ek, eh; - int i, n; + int eu = 0, ek = 0, eh = 0; + int i, n, first; struct perf_counter *counter; n = n_prev + n_new; if (n <= 1) return 0; - eu = ctrs[0]->hw_event.exclude_user; - ek = ctrs[0]->hw_event.exclude_kernel; - eh = ctrs[0]->hw_event.exclude_hv; - if (n_prev == 0) - n_prev = 1; - for (i = n_prev; i < n; ++i) { + first = 1; + for (i = 0; i < n; ++i) { + if (cflags[i] & PPMU_LIMITED_PMC_OK) { + cflags[i] &= ~PPMU_LIMITED_PMC_REQD; + continue; + } counter = ctrs[i]; - if (counter->hw_event.exclude_user != eu || - counter->hw_event.exclude_kernel != ek || - counter->hw_event.exclude_hv != eh) + if (first) { + eu = counter->hw_event.exclude_user; + ek = counter->hw_event.exclude_kernel; + eh = counter->hw_event.exclude_hv; + first = 0; + } else if (counter->hw_event.exclude_user != eu || + counter->hw_event.exclude_kernel != ek || + counter->hw_event.exclude_hv != eh) { return -EAGAIN; + } } + + if (eu || ek || eh) + for (i = 0; i < n; ++i) + if (cflags[i] & PPMU_LIMITED_PMC_OK) + cflags[i] |= PPMU_LIMITED_PMC_REQD; + return 0; } @@ -280,6 +304,85 @@ static void power_pmu_read(struct perf_counter *counter) } /* + * On some machines, PMC5 and PMC6 can't be written, don't respect + * the freeze conditions, and don't generate interrupts. This tells + * us if `counter' is using such a PMC. + */ +static int is_limited_pmc(int pmcnum) +{ + return ppmu->limited_pmc5_6 && (pmcnum == 5 || pmcnum == 6); +} + +static void freeze_limited_counters(struct cpu_hw_counters *cpuhw, + unsigned long pmc5, unsigned long pmc6) +{ + struct perf_counter *counter; + u64 val, prev, delta; + int i; + + for (i = 0; i < cpuhw->n_limited; ++i) { + counter = cpuhw->limited_counter[i]; + if (!counter->hw.idx) + continue; + val = (counter->hw.idx == 5) ? pmc5 : pmc6; + prev = atomic64_read(&counter->hw.prev_count); + counter->hw.idx = 0; + delta = (val - prev) & 0xfffffffful; + atomic64_add(delta, &counter->count); + } +} + +static void thaw_limited_counters(struct cpu_hw_counters *cpuhw, + unsigned long pmc5, unsigned long pmc6) +{ + struct perf_counter *counter; + u64 val; + int i; + + for (i = 0; i < cpuhw->n_limited; ++i) { + counter = cpuhw->limited_counter[i]; + counter->hw.idx = cpuhw->limited_hwidx[i]; + val = (counter->hw.idx == 5) ? pmc5 : pmc6; + atomic64_set(&counter->hw.prev_count, val); + perf_counter_update_userpage(counter); + } +} + +/* + * Since limited counters don't respect the freeze conditions, we + * have to read them immediately after freezing or unfreezing the + * other counters. We try to keep the values from the limited + * counters as consistent as possible by keeping the delay (in + * cycles and instructions) between freezing/unfreezing and reading + * the limited counters as small and consistent as possible. + * Therefore, if any limited counters are in use, we read them + * both, and always in the same order, to minimize variability, + * and do it inside the same asm that writes MMCR0. + */ +static void write_mmcr0(struct cpu_hw_counters *cpuhw, unsigned long mmcr0) +{ + unsigned long pmc5, pmc6; + + if (!cpuhw->n_limited) { + mtspr(SPRN_MMCR0, mmcr0); + return; + } + + /* + * Write MMCR0, then read PMC5 and PMC6 immediately. + */ + asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5" + : "=&r" (pmc5), "=&r" (pmc6) + : "r" (mmcr0), "i" (SPRN_MMCR0), + "i" (SPRN_PMC5), "i" (SPRN_PMC6)); + + if (mmcr0 & MMCR0_FC) + freeze_limited_counters(cpuhw, pmc5, pmc6); + else + thaw_limited_counters(cpuhw, pmc5, pmc6); +} + +/* * Disable all counters to prevent PMU interrupts and to allow * counters to be added or removed. */ @@ -321,7 +424,7 @@ u64 hw_perf_save_disable(void) * executed and the PMU has frozen the counters * before we return. */ - mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC); + write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC); mb(); } local_irq_restore(flags); @@ -342,6 +445,8 @@ void hw_perf_restore(u64 disable) unsigned long val; s64 left; unsigned int hwc_index[MAX_HWCOUNTERS]; + int n_lim; + int idx; if (disable) return; @@ -414,10 +519,18 @@ void hw_perf_restore(u64 disable) /* * Initialize the PMCs for all the new and moved counters. */ + cpuhw->n_limited = n_lim = 0; for (i = 0; i < cpuhw->n_counters; ++i) { counter = cpuhw->counter[i]; if (counter->hw.idx) continue; + idx = hwc_index[i] + 1; + if (is_limited_pmc(idx)) { + cpuhw->limited_counter[n_lim] = counter; + cpuhw->limited_hwidx[n_lim] = idx; + ++n_lim; + continue; + } val = 0; if (counter->hw_event.irq_period) { left = atomic64_read(&counter->hw.period_left); @@ -425,15 +538,16 @@ void hw_perf_restore(u64 disable) val = 0x80000000L - left; } atomic64_set(&counter->hw.prev_count, val); - counter->hw.idx = hwc_index[i] + 1; - write_pmc(counter->hw.idx, val); + counter->hw.idx = idx; + write_pmc(idx, val); perf_counter_update_userpage(counter); } + cpuhw->n_limited = n_lim; cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; out_enable: mb(); - mtspr(SPRN_MMCR0, cpuhw->mmcr[0]); + write_mmcr0(cpuhw, cpuhw->mmcr[0]); /* * Enable instruction sampling if necessary @@ -448,7 +562,8 @@ void hw_perf_restore(u64 disable) } static int collect_events(struct perf_counter *group, int max_count, - struct perf_counter *ctrs[], unsigned int *events) + struct perf_counter *ctrs[], unsigned int *events, + unsigned int *flags) { int n = 0; struct perf_counter *counter; @@ -457,6 +572,7 @@ static int collect_events(struct perf_counter *group, int max_count, if (n >= max_count) return -1; ctrs[n] = group; + flags[n] = group->hw.counter_base; events[n++] = group->hw.config; } list_for_each_entry(counter, &group->sibling_list, list_entry) { @@ -465,6 +581,7 @@ static int collect_events(struct perf_counter *group, int max_count, if (n >= max_count) return -1; ctrs[n] = counter; + flags[n] = counter->hw.counter_base; events[n++] = counter->hw.config; } } @@ -497,12 +614,14 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader, cpuhw = &__get_cpu_var(cpu_hw_counters); n0 = cpuhw->n_counters; n = collect_events(group_leader, ppmu->n_counter - n0, - &cpuhw->counter[n0], &cpuhw->events[n0]); + &cpuhw->counter[n0], &cpuhw->events[n0], + &cpuhw->flags[n0]); if (n < 0) return -EAGAIN; - if (check_excludes(cpuhw->counter, n0, n)) + if (check_excludes(cpuhw->counter, cpuhw->flags, n0, n)) return -EAGAIN; - if (power_check_constraints(cpuhw->events, n + n0)) + i = power_check_constraints(cpuhw->events, cpuhw->flags, n + n0); + if (i < 0) return -EAGAIN; cpuhw->n_counters = n0 + n; cpuhw->n_added += n; @@ -554,9 +673,10 @@ static int power_pmu_enable(struct perf_counter *counter) goto out; cpuhw->counter[n0] = counter; cpuhw->events[n0] = counter->hw.config; - if (check_excludes(cpuhw->counter, n0, 1)) + cpuhw->flags[n0] = counter->hw.counter_base; + if (check_excludes(cpuhw->counter, cpuhw->flags, n0, 1)) goto out; - if (power_check_constraints(cpuhw->events, n0 + 1)) + if (power_check_constraints(cpuhw->events, cpuhw->flags, n0 + 1)) goto out; counter->hw.config = cpuhw->events[n0]; @@ -592,12 +712,24 @@ static void power_pmu_disable(struct perf_counter *counter) cpuhw->counter[i-1] = cpuhw->counter[i]; --cpuhw->n_counters; ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr); - write_pmc(counter->hw.idx, 0); - counter->hw.idx = 0; + if (counter->hw.idx) { + write_pmc(counter->hw.idx, 0); + counter->hw.idx = 0; + } perf_counter_update_userpage(counter); break; } } + for (i = 0; i < cpuhw->n_limited; ++i) + if (counter == cpuhw->limited_counter[i]) + break; + if (i < cpuhw->n_limited) { + while (++i < cpuhw->n_limited) { + cpuhw->limited_counter[i-1] = cpuhw->limited_counter[i]; + cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i]; + } + --cpuhw->n_limited; + } if (cpuhw->n_counters == 0) { /* disable exceptions if no counters are running */ cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); @@ -613,6 +745,61 @@ struct pmu power_pmu = { .read = power_pmu_read, }; +/* + * Return 1 if we might be able to put counter on a limited PMC, + * or 0 if not. + * A counter can only go on a limited PMC if it counts something + * that a limited PMC can count, doesn't require interrupts, and + * doesn't exclude any processor mode. + */ +static int can_go_on_limited_pmc(struct perf_counter *counter, unsigned int ev, + unsigned int flags) +{ + int n; + unsigned int alt[MAX_EVENT_ALTERNATIVES]; + + if (counter->hw_event.exclude_user + || counter->hw_event.exclude_kernel + || counter->hw_event.exclude_hv + || counter->hw_event.irq_period) + return 0; + + if (ppmu->limited_pmc_event(ev)) + return 1; + + /* + * The requested event isn't on a limited PMC already; + * see if any alternative code goes on a limited PMC. + */ + if (!ppmu->get_alternatives) + return 0; + + flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD; + n = ppmu->get_alternatives(ev, flags, alt); + if (n) + return alt[0]; + + return 0; +} + +/* + * Find an alternative event that goes on a normal PMC, if possible, + * and return the event code, or 0 if there is no such alternative. + * (Note: event code 0 is "don't count" on all machines.) + */ +static unsigned long normal_pmc_alternative(unsigned long ev, + unsigned long flags) +{ + unsigned int alt[MAX_EVENT_ALTERNATIVES]; + int n; + + flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD); + n = ppmu->get_alternatives(ev, flags, alt); + if (!n) + return 0; + return alt[0]; +} + /* Number of perf_counters counting hardware events */ static atomic_t num_counters; /* Used to avoid races in calling reserve/release_pmc_hardware */ @@ -633,9 +820,10 @@ static void hw_perf_counter_destroy(struct perf_counter *counter) const struct pmu *hw_perf_counter_init(struct perf_counter *counter) { - unsigned long ev; + unsigned long ev, flags; struct perf_counter *ctrs[MAX_HWCOUNTERS]; unsigned int events[MAX_HWCOUNTERS]; + unsigned int cflags[MAX_HWCOUNTERS]; int n; int err; @@ -661,7 +849,36 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) */ if (!firmware_has_feature(FW_FEATURE_LPAR)) counter->hw_event.exclude_hv = 0; - + + /* + * If this is a per-task counter, then we can use + * PM_RUN_* events interchangeably with their non RUN_* + * equivalents, e.g. PM_RUN_CYC instead of PM_CYC. + * XXX we should check if the task is an idle task. + */ + flags = 0; + if (counter->ctx->task) + flags |= PPMU_ONLY_COUNT_RUN; + + /* + * If this machine has limited counters, check whether this + * event could go on a limited counter. + */ + if (ppmu->limited_pmc5_6) { + if (can_go_on_limited_pmc(counter, ev, flags)) { + flags |= PPMU_LIMITED_PMC_OK; + } else if (ppmu->limited_pmc_event(ev)) { + /* + * The requested event is on a limited PMC, + * but we can't use a limited PMC; see if any + * alternative goes on a normal PMC. + */ + ev = normal_pmc_alternative(ev, flags); + if (!ev) + return ERR_PTR(-EINVAL); + } + } + /* * If this is in a group, check if it can go on with all the * other hardware counters in the group. We assume the counter @@ -670,18 +887,20 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) n = 0; if (counter->group_leader != counter) { n = collect_events(counter->group_leader, ppmu->n_counter - 1, - ctrs, events); + ctrs, events, cflags); if (n < 0) return ERR_PTR(-EINVAL); } events[n] = ev; ctrs[n] = counter; - if (check_excludes(ctrs, n, 1)) + cflags[n] = flags; + if (check_excludes(ctrs, cflags, n, 1)) return ERR_PTR(-EINVAL); - if (power_check_constraints(events, n + 1)) + if (power_check_constraints(events, cflags, n + 1)) return ERR_PTR(-EINVAL); counter->hw.config = events[n]; + counter->hw.counter_base = cflags[n]; atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period); /* @@ -763,6 +982,10 @@ static void perf_counter_interrupt(struct pt_regs *regs) int found = 0; int nmi; + if (cpuhw->n_limited) + freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5), + mfspr(SPRN_PMC6)); + /* * If interrupts were soft-disabled when this PMU interrupt * occurred, treat it as an NMI. @@ -775,6 +998,8 @@ static void perf_counter_interrupt(struct pt_regs *regs) for (i = 0; i < cpuhw->n_counters; ++i) { counter = cpuhw->counter[i]; + if (is_limited_pmc(counter->hw.idx)) + continue; val = read_pmc(counter->hw.idx); if ((int)val < 0) { /* counter has overflowed */ @@ -791,6 +1016,8 @@ static void perf_counter_interrupt(struct pt_regs *regs) */ if (!found) { for (i = 0; i < ppmu->n_counter; ++i) { + if (is_limited_pmc(i + 1)) + continue; val = read_pmc(i + 1); if ((int)val < 0) write_pmc(i + 1, 0); @@ -804,7 +1031,7 @@ static void perf_counter_interrupt(struct pt_regs *regs) * XXX might want to use MSR.PM to keep the counters frozen until * we get back out of this interrupt. */ - mtspr(SPRN_MMCR0, cpuhw->mmcr[0]); + write_mmcr0(cpuhw, cpuhw->mmcr[0]); if (nmi) nmi_exit(); diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c index 1407b19..744a275 100644 --- a/arch/powerpc/kernel/power4-pmu.c +++ b/arch/powerpc/kernel/power4-pmu.c @@ -320,7 +320,8 @@ static unsigned int ppc_inst_cmpl[] = { 0x1001, 0x4001, 0x6001, 0x7001, 0x8001 }; -static int p4_get_alternatives(unsigned int event, unsigned int alt[]) +static int p4_get_alternatives(unsigned int event, unsigned int flags, + unsigned int alt[]) { int i, j, na; diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c index 1222c8e..8154eaa 100644 --- a/arch/powerpc/kernel/power5+-pmu.c +++ b/arch/powerpc/kernel/power5+-pmu.c @@ -78,8 +78,8 @@ * Layout of constraint bits: * 6666555555555544444444443333333333222222222211111111110000000000 * 3210987654321098765432109876543210987654321098765432109876543210 - * [ ><><>< ><> <><>[ > < >< >< >< ><><><><> - * NC G0G1G2 G3 T0T1 UC B0 B1 B2 B3 P4P3P2P1 + * [ ><><>< ><> <><>[ > < >< >< >< ><><><><><><> + * NC G0G1G2 G3 T0T1 UC B0 B1 B2 B3 P6P5P4P3P2P1 * * NC - number of counters * 51: NC error 0x0008_0000_0000_0000 @@ -105,18 +105,18 @@ * 30: IDU|GRS events needed 0x00_4000_0000 * * B0 - * 20-23: Byte 0 event source 0x00f0_0000 + * 24-27: Byte 0 event source 0x0f00_0000 * Encoding as for the event code * * B1, B2, B3 - * 16-19, 12-15, 8-11: Byte 1, 2, 3 event sources + * 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources * - * P4 - * 7: P1 error 0x80 - * 6-7: Count of events needing PMC4 + * P6 + * 11: P6 error 0x800 + * 10-11: Count of events needing PMC6 * - * P1..P3 - * 0-6: Count of events needing PMC1..PMC3 + * P1..P5 + * 0-9: Count of events needing PMC1..PMC5 */ static const int grsel_shift[8] = { @@ -143,11 +143,13 @@ static int power5p_get_constraint(unsigned int event, u64 *maskp, u64 *valp) pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; if (pmc) { - if (pmc > 4) + if (pmc > 6) return -1; sh = (pmc - 1) * 2; mask |= 2 << sh; value |= 1 << sh; + if (pmc >= 5 && !(event == 0x500009 || event == 0x600005)) + return -1; } if (event & PM_BUSEVENT_MSK) { unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; @@ -173,16 +175,26 @@ static int power5p_get_constraint(unsigned int event, u64 *maskp, u64 *valp) value |= (u64)((event >> PM_GRS_SH) & fmask) << sh; } /* Set byte lane select field */ - mask |= 0xfULL << (20 - 4 * byte); - value |= (u64)unit << (20 - 4 * byte); + mask |= 0xfULL << (24 - 4 * byte); + value |= (u64)unit << (24 - 4 * byte); + } + if (pmc < 5) { + /* need a counter from PMC1-4 set */ + mask |= 0x8000000000000ull; + value |= 0x1000000000000ull; } - mask |= 0x8000000000000ull; - value |= 0x1000000000000ull; *maskp = mask; *valp = value; return 0; } +static int power5p_limited_pmc_event(unsigned int event) +{ + int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; + + return pmc == 5 || pmc == 6; +} + #define MAX_ALT 3 /* at most 3 alternatives for any event */ static const unsigned int event_alternatives[][MAX_ALT] = { @@ -193,6 +205,7 @@ static const unsigned int event_alternatives[][MAX_ALT] = { { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */ { 0x800c4, 0xc20e0 }, /* PM_DTLB_MISS */ { 0xc50c6, 0xc60e0 }, /* PM_MRK_DTLB_MISS */ + { 0x100005, 0x600005 }, /* PM_RUN_CYC */ { 0x100009, 0x200009 }, /* PM_INST_CMPL */ { 0x200015, 0x300015 }, /* PM_LSU_LMQ_SRQ_EMPTY_CYC */ { 0x300009, 0x400009 }, /* PM_INST_DISP */ @@ -260,24 +273,85 @@ static int find_alternative_bdecode(unsigned int event) return -1; } -static int power5p_get_alternatives(unsigned int event, unsigned int alt[]) +static int power5p_get_alternatives(unsigned int event, unsigned int flags, + unsigned int alt[]) { int i, j, ae, nalt = 1; + int nlim; alt[0] = event; nalt = 1; + nlim = power5p_limited_pmc_event(event); i = find_alternative(event); if (i >= 0) { for (j = 0; j < MAX_ALT; ++j) { ae = event_alternatives[i][j]; if (ae && ae != event) alt[nalt++] = ae; + nlim += power5p_limited_pmc_event(ae); } } else { ae = find_alternative_bdecode(event); if (ae > 0) alt[nalt++] = ae; } + + if (flags & PPMU_ONLY_COUNT_RUN) { + /* + * We're only counting in RUN state, + * so PM_CYC is equivalent to PM_RUN_CYC + * and PM_INST_CMPL === PM_RUN_INST_CMPL. + * This doesn't include alternatives that don't provide + * any extra flexibility in assigning PMCs (e.g. + * 0x100005 for PM_RUN_CYC vs. 0xf for PM_CYC). + * Note that even with these additional alternatives + * we never end up with more than 3 alternatives for any event. + */ + j = nalt; + for (i = 0; i < nalt; ++i) { + switch (alt[i]) { + case 0xf: /* PM_CYC */ + alt[j++] = 0x600005; /* PM_RUN_CYC */ + ++nlim; + break; + case 0x600005: /* PM_RUN_CYC */ + alt[j++] = 0xf; + break; + case 0x100009: /* PM_INST_CMPL */ + alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */ + ++nlim; + break; + case 0x500009: /* PM_RUN_INST_CMPL */ + alt[j++] = 0x100009; /* PM_INST_CMPL */ + alt[j++] = 0x200009; + break; + } + } + nalt = j; + } + + if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) { + /* remove the limited PMC events */ + j = 0; + for (i = 0; i < nalt; ++i) { + if (!power5p_limited_pmc_event(alt[i])) { + alt[j] = alt[i]; + ++j; + } + } + nalt = j; + } else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) { + /* remove all but the limited PMC events */ + j = 0; + for (i = 0; i < nalt; ++i) { + if (power5p_limited_pmc_event(alt[i])) { + alt[j] = alt[i]; + ++j; + } + } + nalt = j; + } + return nalt; } @@ -390,7 +464,7 @@ static int power5p_compute_mmcr(unsigned int event[], int n_ev, unsigned char unituse[16]; int ttmuse; - if (n_ev > 4) + if (n_ev > 6) return -1; /* First pass to count resource use */ @@ -399,7 +473,7 @@ static int power5p_compute_mmcr(unsigned int event[], int n_ev, for (i = 0; i < n_ev; ++i) { pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; if (pmc) { - if (pmc > 4) + if (pmc > 6) return -1; if (pmc_inuse & (1 << (pmc - 1))) return -1; @@ -488,13 +562,16 @@ static int power5p_compute_mmcr(unsigned int event[], int n_ev, if (pmc >= 4) return -1; pmc_inuse |= 1 << pmc; - } else { + } else if (pmc <= 4) { /* Direct event */ --pmc; if (isbus && (byte & 2) && (psel == 8 || psel == 0x10 || psel == 0x28)) /* add events on higher-numbered bus */ mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc); + } else { + /* Instructions or run cycles on PMC5/6 */ + --pmc; } if (isbus && unit == PM_GRS) { bit = psel & 7; @@ -538,7 +615,7 @@ static int power5p_generic_events[] = { }; struct power_pmu power5p_pmu = { - .n_counter = 4, + .n_counter = 6, .max_alternatives = MAX_ALT, .add_fields = 0x7000000000055ull, .test_adder = 0x3000040000000ull, @@ -548,4 +625,6 @@ struct power_pmu power5p_pmu = { .disable_pmc = power5p_disable_pmc, .n_generic = ARRAY_SIZE(power5p_generic_events), .generic_events = power5p_generic_events, + .limited_pmc5_6 = 1, + .limited_pmc_event = power5p_limited_pmc_event, }; diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c index 116c4bb..6e667dc 100644 --- a/arch/powerpc/kernel/power5-pmu.c +++ b/arch/powerpc/kernel/power5-pmu.c @@ -269,7 +269,8 @@ static int find_alternative_bdecode(unsigned int event) return -1; } -static int power5_get_alternatives(unsigned int event, unsigned int alt[]) +static int power5_get_alternatives(unsigned int event, unsigned int flags, + unsigned int alt[]) { int i, j, ae, nalt = 1; diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c index fce1fc2..d44049f 100644 --- a/arch/powerpc/kernel/power6-pmu.c +++ b/arch/powerpc/kernel/power6-pmu.c @@ -182,7 +182,7 @@ static int p6_compute_mmcr(unsigned int event[], int n_ev, unsigned int ttmset = 0; unsigned int pmc_inuse = 0; - if (n_ev > 4) + if (n_ev > 6) return -1; for (i = 0; i < n_ev; ++i) { pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; @@ -202,6 +202,8 @@ static int p6_compute_mmcr(unsigned int event[], int n_ev, for (pmc = 0; pmc < 4; ++pmc) if (!(pmc_inuse & (1 << pmc))) break; + if (pmc >= 4) + return -1; pmc_inuse |= 1 << pmc; } hwc[i] = pmc; @@ -240,7 +242,8 @@ static int p6_compute_mmcr(unsigned int event[], int n_ev, } if (power6_marked_instr_event(event[i])) mmcra |= MMCRA_SAMPLE_ENABLE; - mmcr1 |= (u64)psel << MMCR1_PMCSEL_SH(pmc); + if (pmc < 4) + mmcr1 |= (u64)psel << MMCR1_PMCSEL_SH(pmc); } mmcr[0] = 0; if (pmc_inuse & 1) @@ -256,19 +259,20 @@ static int p6_compute_mmcr(unsigned int event[], int n_ev, * Layout of constraint bits: * * 0-1 add field: number of uses of PMC1 (max 1) - * 2-3, 4-5, 6-7: ditto for PMC2, 3, 4 - * 8-10 select field: nest (subunit) event selector + * 2-3, 4-5, 6-7, 8-9, 10-11: ditto for PMC2, 3, 4, 5, 6 + * 12-15 add field: number of uses of PMC1-4 (max 4) * 16-19 select field: unit on byte 0 of event bus * 20-23, 24-27, 28-31 ditto for bytes 1, 2, 3 + * 32-34 select field: nest (subunit) event selector */ static int p6_get_constraint(unsigned int event, u64 *maskp, u64 *valp) { - int pmc, byte, sh; - unsigned int mask = 0, value = 0; + int pmc, byte, sh, subunit; + u64 mask = 0, value = 0; pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; if (pmc) { - if (pmc > 4) + if (pmc > 4 && !(event == 0x500009 || event == 0x600005)) return -1; sh = (pmc - 1) * 2; mask |= 2 << sh; @@ -276,26 +280,38 @@ static int p6_get_constraint(unsigned int event, u64 *maskp, u64 *valp) } if (event & PM_BUSEVENT_MSK) { byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; - sh = byte * 4; + sh = byte * 4 + (16 - PM_UNIT_SH); mask |= PM_UNIT_MSKS << sh; - value |= (event & PM_UNIT_MSKS) << sh; + value |= (u64)(event & PM_UNIT_MSKS) << sh; if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) { - mask |= PM_SUBUNIT_MSKS; - value |= event & PM_SUBUNIT_MSKS; + subunit = (event >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK; + mask |= (u64)PM_SUBUNIT_MSK << 32; + value |= (u64)subunit << 32; } } + if (pmc <= 4) { + mask |= 0x8000; /* add field for count of PMC1-4 uses */ + value |= 0x1000; + } *maskp = mask; *valp = value; return 0; } +static int p6_limited_pmc_event(unsigned int event) +{ + int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; + + return pmc == 5 || pmc == 6; +} + #define MAX_ALT 4 /* at most 4 alternatives for any event */ static const unsigned int event_alternatives[][MAX_ALT] = { { 0x0130e8, 0x2000f6, 0x3000fc }, /* PM_PTEG_RELOAD_VALID */ { 0x080080, 0x10000d, 0x30000c, 0x4000f0 }, /* PM_LD_MISS_L1 */ { 0x080088, 0x200054, 0x3000f0 }, /* PM_ST_MISS_L1 */ - { 0x10000a, 0x2000f4 }, /* PM_RUN_CYC */ + { 0x10000a, 0x2000f4, 0x600005 }, /* PM_RUN_CYC */ { 0x10000b, 0x2000f5 }, /* PM_RUN_COUNT */ { 0x10000e, 0x400010 }, /* PM_PURR */ { 0x100010, 0x4000f8 }, /* PM_FLUSH */ @@ -340,13 +356,15 @@ static int find_alternatives_list(unsigned int event) return -1; } -static int p6_get_alternatives(unsigned int event, unsigned int alt[]) +static int p6_get_alternatives(unsigned int event, unsigned int flags, + unsigned int alt[]) { - int i, j; + int i, j, nlim; unsigned int aevent, psel, pmc; unsigned int nalt = 1; alt[0] = event; + nlim = p6_limited_pmc_event(event); /* check the alternatives table */ i = find_alternatives_list(event); @@ -358,6 +376,7 @@ static int p6_get_alternatives(unsigned int event, unsigned int alt[]) break; if (aevent != event) alt[nalt++] = aevent; + nlim += p6_limited_pmc_event(aevent); } } else { @@ -375,13 +394,75 @@ static int p6_get_alternatives(unsigned int event, unsigned int alt[]) ((pmc > 2? pmc - 2: pmc + 2) << PM_PMC_SH); } + if (flags & PPMU_ONLY_COUNT_RUN) { + /* + * We're only counting in RUN state, + * so PM_CYC is equivalent to PM_RUN_CYC, + * PM_INST_CMPL === PM_RUN_INST_CMPL, PM_PURR === PM_RUN_PURR. + * This doesn't include alternatives that don't provide + * any extra flexibility in assigning PMCs (e.g. + * 0x10000a for PM_RUN_CYC vs. 0x1e for PM_CYC). + * Note that even with these additional alternatives + * we never end up with more than 4 alternatives for any event. + */ + j = nalt; + for (i = 0; i < nalt; ++i) { + switch (alt[i]) { + case 0x1e: /* PM_CYC */ + alt[j++] = 0x600005; /* PM_RUN_CYC */ + ++nlim; + break; + case 0x10000a: /* PM_RUN_CYC */ + alt[j++] = 0x1e; /* PM_CYC */ + break; + case 2: /* PM_INST_CMPL */ + alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */ + ++nlim; + break; + case 0x500009: /* PM_RUN_INST_CMPL */ + alt[j++] = 2; /* PM_INST_CMPL */ + break; + case 0x10000e: /* PM_PURR */ + alt[j++] = 0x4000f4; /* PM_RUN_PURR */ + break; + case 0x4000f4: /* PM_RUN_PURR */ + alt[j++] = 0x10000e; /* PM_PURR */ + break; + } + } + nalt = j; + } + + if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) { + /* remove the limited PMC events */ + j = 0; + for (i = 0; i < nalt; ++i) { + if (!p6_limited_pmc_event(alt[i])) { + alt[j] = alt[i]; + ++j; + } + } + nalt = j; + } else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) { + /* remove all but the limited PMC events */ + j = 0; + for (i = 0; i < nalt; ++i) { + if (p6_limited_pmc_event(alt[i])) { + alt[j] = alt[i]; + ++j; + } + } + nalt = j; + } + return nalt; } static void p6_disable_pmc(unsigned int pmc, u64 mmcr[]) { /* Set PMCxSEL to 0 to disable PMCx */ - mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc)); + if (pmc <= 3) + mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc)); } static int power6_generic_events[] = { @@ -394,14 +475,16 @@ static int power6_generic_events[] = { }; struct power_pmu power6_pmu = { - .n_counter = 4, + .n_counter = 6, .max_alternatives = MAX_ALT, - .add_fields = 0x55, - .test_adder = 0, + .add_fields = 0x1555, + .test_adder = 0x3000, .compute_mmcr = p6_compute_mmcr, .get_constraint = p6_get_constraint, .get_alternatives = p6_get_alternatives, .disable_pmc = p6_disable_pmc, .n_generic = ARRAY_SIZE(power6_generic_events), .generic_events = power6_generic_events, + .limited_pmc5_6 = 1, + .limited_pmc_event = p6_limited_pmc_event, }; diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c index aed8ccd..af2d188 100644 --- a/arch/powerpc/kernel/ppc970-pmu.c +++ b/arch/powerpc/kernel/ppc970-pmu.c @@ -243,7 +243,8 @@ static int p970_get_constraint(unsigned int event, u64 *maskp, u64 *valp) return 0; } -static int p970_get_alternatives(unsigned int event, unsigned int alt[]) +static int p970_get_alternatives(unsigned int event, unsigned int flags, + unsigned int alt[]) { alt[0] = event; -- cgit v0.10.2 From f25181f598cf4a8ccc40a51d8b74f8b555ecddee Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Thu, 23 Apr 2009 22:18:00 -0400 Subject: xfs_file_last_byte() needs to acquire ilock We had some systems crash with this stack: [] ia64_leave_kernel+0x0/0x280 [] xfs_bmbt_get_startoff+0x0/0x20 [xfs] [] xfs_bmap_last_offset+0x210/0x280 [xfs] [] xfs_file_last_byte+0x70/0x1a0 [xfs] [] xfs_itruncate_start+0xc0/0x1a0 [xfs] [] xfs_inactive_free_eofblocks+0x290/0x460 [xfs] [] xfs_release+0x1b0/0x240 [xfs] [] xfs_file_release+0x70/0xa0 [xfs] [] __fput+0x1a0/0x420 [] fput+0x40/0x60 The problem here is that xfs_file_last_byte() does not acquire the inode lock and can therefore race with another thread that is modifying the extext list. While xfs_bmap_last_offset() is trying to lookup what was the last extent some extents were merged and the extent list shrunk so the index we lookup is now beyond the end of the extent list and potentially in a freed buffer. Signed-off-by: Lachlan McIlroy Reviewed-by: Christoph Hellwig Reviewed-by: Felix Blyakher Signed-off-by: Felix Blyakher diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index e7ae08d..123b20c 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1258,8 +1258,10 @@ xfs_file_last_byte( * necessary. */ if (ip->i_df.if_flags & XFS_IFEXTENTS) { + xfs_ilock(ip, XFS_ILOCK_SHARED); error = xfs_bmap_last_offset(NULL, ip, &last_block, XFS_DATA_FORK); + xfs_iunlock(ip, XFS_ILOCK_SHARED); if (error) { last_block = 0; } -- cgit v0.10.2 From 2ac00af7a6d2e65013e6f28bd1f37c0cd98ba134 Mon Sep 17 00:00:00 2001 From: Olaf Weber Date: Fri, 17 Apr 2009 16:12:45 -0500 Subject: xfs: add more checks to superblock validation There had been reports where xfs filesystem was randomly corrupted with fsfuzzer, and xfs failed to handle it gracefully. This patch fixes couple of reported problem by providing additional checks in the superblock validation routine. Signed-off-by: Olaf Weber Reviewed-by: Josef 'Jeff' Sipek Reviewed-by: Christoph Hellwig Signed-off-by: Felix Blyakher diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index b101990..65a9972 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -291,14 +291,17 @@ xfs_mount_validate_sb( sbp->sb_sectsize > XFS_MAX_SECTORSIZE || sbp->sb_sectlog < XFS_MIN_SECTORSIZE_LOG || sbp->sb_sectlog > XFS_MAX_SECTORSIZE_LOG || + sbp->sb_sectsize != (1 << sbp->sb_sectlog) || sbp->sb_blocksize < XFS_MIN_BLOCKSIZE || sbp->sb_blocksize > XFS_MAX_BLOCKSIZE || sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG || sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || + sbp->sb_blocksize != (1 << sbp->sb_blocklog) || sbp->sb_inodesize < XFS_DINODE_MIN_SIZE || sbp->sb_inodesize > XFS_DINODE_MAX_SIZE || sbp->sb_inodelog < XFS_DINODE_MIN_LOG || sbp->sb_inodelog > XFS_DINODE_MAX_LOG || + sbp->sb_inodesize != (1 << sbp->sb_inodelog) || (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || -- cgit v0.10.2 From b1fca26631f76a5e8b18435a43f5d82b8734da4b Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Mon, 23 Mar 2009 18:22:09 +0100 Subject: mutex: add atomic_dec_and_mutex_lock() Much like the atomic_dec_and_lock() function in which we take an hold a spin_lock if we drop the atomic to 0 this function takes and holds the mutex if we dec the atomic to 0. Signed-off-by: Eric Paris Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Orig-LKML-Reference: <20090323172417.410913479@chello.nl> Signed-off-by: Ingo Molnar diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 3069ec7..93054fc 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -151,4 +151,27 @@ extern int __must_check mutex_lock_killable(struct mutex *lock); extern int mutex_trylock(struct mutex *lock); extern void mutex_unlock(struct mutex *lock); +/** + * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0 + * @cnt: the atomic which we are to dec + * @lock: the mutex to return holding if we dec to 0 + * + * return true and hold lock if we dec to 0, return false otherwise + */ +static inline int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) +{ + /* dec if we can't possibly hit 0 */ + if (atomic_add_unless(cnt, -1, 1)) + return 0; + /* we might hit 0, so take the lock */ + mutex_lock(lock); + if (!atomic_dec_and_test(cnt)) { + /* when we actually did the dec, we didn't hit 0 */ + mutex_unlock(lock); + return 0; + } + /* we hit 0, and we hold the lock */ + return 1; +} + #endif -- cgit v0.10.2 From 4be4a00fb55879ef44b5914c189aecffa828deb4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 29 Apr 2009 10:50:48 -0400 Subject: xfs: a couple getbmap cleanups - reshuffle various conditionals for data vs attr fork to make the code more readable - do fine-grainded goto-based error handling - exit early from conditionals instead of keeping a long else branch around - allow kmem_alloc to fail Signed-off-by: Christoph Hellwig Reviewed-by: Eric Sandeen Reviewed-by: Felix Blyakher Signed-off-by: Felix Blyakher diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 3a6ed42..abe4244 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -5880,7 +5880,7 @@ xfs_getbmap( void *arg) /* formatter arg */ { __int64_t bmvend; /* last block requested */ - int error; /* return value */ + int error = 0; /* return value */ __int64_t fixlen; /* length for -1 case */ int i; /* extent number */ int lock; /* lock state */ @@ -5899,30 +5899,8 @@ xfs_getbmap( mp = ip->i_mount; iflags = bmv->bmv_iflags; - whichfork = iflags & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK; - /* If the BMV_IF_NO_DMAPI_READ interface bit specified, do not - * generate a DMAPI read event. Otherwise, if the DM_EVENT_READ - * bit is set for the file, generate a read event in order - * that the DMAPI application may do its thing before we return - * the extents. Usually this means restoring user file data to - * regions of the file that look like holes. - * - * The "old behavior" (from XFS_IOC_GETBMAP) is to not specify - * BMV_IF_NO_DMAPI_READ so that read events are generated. - * If this were not true, callers of ioctl( XFS_IOC_GETBMAP ) - * could misinterpret holes in a DMAPI file as true holes, - * when in fact they may represent offline user data. - */ - if ((iflags & BMV_IF_NO_DMAPI_READ) == 0 && - DM_EVENT_ENABLED(ip, DM_EVENT_READ) && - whichfork == XFS_DATA_FORK) { - error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, 0, 0, 0, NULL); - if (error) - return XFS_ERROR(error); - } - if (whichfork == XFS_ATTR_FORK) { if (XFS_IFORK_Q(ip)) { if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS && @@ -5936,11 +5914,37 @@ xfs_getbmap( ip->i_mount); return XFS_ERROR(EFSCORRUPTED); } - } else if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS && - ip->i_d.di_format != XFS_DINODE_FMT_BTREE && - ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) - return XFS_ERROR(EINVAL); - if (whichfork == XFS_DATA_FORK) { + + prealloced = 0; + fixlen = 1LL << 32; + } else { + /* + * If the BMV_IF_NO_DMAPI_READ interface bit specified, do + * not generate a DMAPI read event. Otherwise, if the + * DM_EVENT_READ bit is set for the file, generate a read + * event in order that the DMAPI application may do its thing + * before we return the extents. Usually this means restoring + * user file data to regions of the file that look like holes. + * + * The "old behavior" (from XFS_IOC_GETBMAP) is to not specify + * BMV_IF_NO_DMAPI_READ so that read events are generated. + * If this were not true, callers of ioctl(XFS_IOC_GETBMAP) + * could misinterpret holes in a DMAPI file as true holes, + * when in fact they may represent offline user data. + */ + if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && + !(iflags & BMV_IF_NO_DMAPI_READ)) { + error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, + 0, 0, 0, NULL); + if (error) + return XFS_ERROR(error); + } + + if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS && + ip->i_d.di_format != XFS_DINODE_FMT_BTREE && + ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) + return XFS_ERROR(EINVAL); + if (xfs_get_extsz_hint(ip) || ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){ prealloced = 1; @@ -5949,42 +5953,34 @@ xfs_getbmap( prealloced = 0; fixlen = ip->i_size; } - } else { - prealloced = 0; - fixlen = 1LL << 32; } if (bmv->bmv_length == -1) { fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen)); - bmv->bmv_length = MAX( (__int64_t)(fixlen - bmv->bmv_offset), - (__int64_t)0); - } else if (bmv->bmv_length < 0) - return XFS_ERROR(EINVAL); - if (bmv->bmv_length == 0) { + bmv->bmv_length = + max_t(__int64_t, fixlen - bmv->bmv_offset, 0); + } else if (bmv->bmv_length == 0) { bmv->bmv_entries = 0; return 0; + } else if (bmv->bmv_length < 0) { + return XFS_ERROR(EINVAL); } + nex = bmv->bmv_count - 1; if (nex <= 0) return XFS_ERROR(EINVAL); bmvend = bmv->bmv_offset + bmv->bmv_length; xfs_ilock(ip, XFS_IOLOCK_SHARED); - - if (((iflags & BMV_IF_DELALLOC) == 0) && - (whichfork == XFS_DATA_FORK) && - (ip->i_delayed_blks || ip->i_size > ip->i_d.di_size)) { - /* xfs_fsize_t last_byte = xfs_file_last_byte(ip); */ - error = xfs_flush_pages(ip, (xfs_off_t)0, - -1, 0, FI_REMAPF); - if (error) { - xfs_iunlock(ip, XFS_IOLOCK_SHARED); - return error; + if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) { + if (ip->i_delayed_blks || ip->i_size > ip->i_d.di_size) { + error = xfs_flush_pages(ip, 0, -1, 0, FI_REMAPF); + if (error) + goto out_unlock_iolock; } - } - ASSERT(whichfork == XFS_ATTR_FORK || (iflags & BMV_IF_DELALLOC) || - ip->i_delayed_blks == 0); + ASSERT(ip->i_delayed_blks == 0); + } lock = xfs_ilock_map_shared(ip); @@ -5995,23 +5991,25 @@ xfs_getbmap( if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1) nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1; - bmapi_flags = xfs_bmapi_aflag(whichfork) | - ((iflags & BMV_IF_PREALLOC) ? 0 : XFS_BMAPI_IGSTATE); + bmapi_flags = xfs_bmapi_aflag(whichfork); + if (!(iflags & BMV_IF_PREALLOC)) + bmapi_flags |= XFS_BMAPI_IGSTATE; /* * Allocate enough space to handle "subnex" maps at a time. */ + error = ENOMEM; subnex = 16; - map = kmem_alloc(subnex * sizeof(*map), KM_SLEEP); + map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL); + if (!map) + goto out_unlock_ilock; bmv->bmv_entries = 0; - if ((XFS_IFORK_NEXTENTS(ip, whichfork) == 0)) { - if (((iflags & BMV_IF_DELALLOC) == 0) || - whichfork == XFS_ATTR_FORK) { - error = 0; - goto unlock_and_return; - } + if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 && + (whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) { + error = 0; + goto out_free_map; } nexleft = nex; @@ -6023,10 +6021,12 @@ xfs_getbmap( bmapi_flags, NULL, 0, map, &nmap, NULL, NULL); if (error) - goto unlock_and_return; + goto out_free_map; ASSERT(nmap <= subnex); for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) { + int full = 0; /* user array is full */ + out.bmv_oflags = 0; if (map[i].br_state == XFS_EXT_UNWRITTEN) out.bmv_oflags |= BMV_OF_PREALLOC; @@ -6041,36 +6041,32 @@ xfs_getbmap( whichfork == XFS_ATTR_FORK) { /* came to the end of attribute fork */ out.bmv_oflags |= BMV_OF_LAST; - goto unlock_and_return; - } else { - int full = 0; /* user array is full */ - - if (!xfs_getbmapx_fix_eof_hole(ip, &out, - prealloced, bmvend, - map[i].br_startblock)) { - goto unlock_and_return; - } - - /* format results & advance arg */ - error = formatter(&arg, &out, &full); - if (error || full) - goto unlock_and_return; - nexleft--; - bmv->bmv_offset = - out.bmv_offset + out.bmv_length; - bmv->bmv_length = MAX((__int64_t)0, - (__int64_t)(bmvend - bmv->bmv_offset)); - bmv->bmv_entries++; + goto out_free_map; } + + if (!xfs_getbmapx_fix_eof_hole(ip, &out, prealloced, + bmvend, map[i].br_startblock)) + goto out_free_map; + + /* format results & advance arg */ + error = formatter(&arg, &out, &full); + if (error || full) + goto out_free_map; + nexleft--; + bmv->bmv_offset = + out.bmv_offset + out.bmv_length; + bmv->bmv_length = + max_t(__int64_t, 0, bmvend - bmv->bmv_offset); + bmv->bmv_entries++; } } while (nmap && nexleft && bmv->bmv_length); -unlock_and_return: + out_free_map: + kmem_free(map); + out_unlock_ilock: xfs_iunlock_map_shared(ip, lock); + out_unlock_iolock: xfs_iunlock(ip, XFS_IOLOCK_SHARED); - - kmem_free(map); - return error; } -- cgit v0.10.2 From 6321e3ed2acf3ee9643cdd403e1c88605d7944ba Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Feb 2009 08:39:02 -0500 Subject: xfs: fix getbmap vs mmap deadlock xfs_getbmap (or rather the formatters called by it) copy out the getbmap structures under the ilock, which can deadlock against mmap. This has been reported via bugzilla a while ago (#717) and has recently also shown up via lockdep. So allocate a temporary buffer to format the kernel getbmap structures into and then copy them out after dropping the locks. A little problem with this is that we limit the number of extents we can copy out by the maximum allocation size, but I see no real way around that. Signed-off-by: Christoph Hellwig Reviewed-by: Eric Sandeen Reviewed-by: Felix Blyakher Signed-off-by: Felix Blyakher diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index abe4244..ca7c600 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -5890,12 +5890,13 @@ xfs_getbmap( int nexleft; /* # of user extents left */ int subnex; /* # of bmapi's can do */ int nmap; /* number of map entries */ - struct getbmapx out; /* output structure */ + struct getbmapx *out; /* output structure */ int whichfork; /* data or attr fork */ int prealloced; /* this is a file with * preallocated data space */ int iflags; /* interface flags */ int bmapi_flags; /* flags for xfs_bmapi */ + int cur_ext = 0; mp = ip->i_mount; iflags = bmv->bmv_iflags; @@ -5971,6 +5972,13 @@ xfs_getbmap( return XFS_ERROR(EINVAL); bmvend = bmv->bmv_offset + bmv->bmv_length; + + if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx)) + return XFS_ERROR(ENOMEM); + out = kmem_zalloc(bmv->bmv_count * sizeof(struct getbmapx), KM_MAYFAIL); + if (!out) + return XFS_ERROR(ENOMEM); + xfs_ilock(ip, XFS_IOLOCK_SHARED); if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) { if (ip->i_delayed_blks || ip->i_size > ip->i_d.di_size) { @@ -6025,39 +6033,39 @@ xfs_getbmap( ASSERT(nmap <= subnex); for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) { - int full = 0; /* user array is full */ - - out.bmv_oflags = 0; + out[cur_ext].bmv_oflags = 0; if (map[i].br_state == XFS_EXT_UNWRITTEN) - out.bmv_oflags |= BMV_OF_PREALLOC; + out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC; else if (map[i].br_startblock == DELAYSTARTBLOCK) - out.bmv_oflags |= BMV_OF_DELALLOC; - out.bmv_offset = XFS_FSB_TO_BB(mp, map[i].br_startoff); - out.bmv_length = XFS_FSB_TO_BB(mp, map[i].br_blockcount); - out.bmv_unused1 = out.bmv_unused2 = 0; + out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC; + out[cur_ext].bmv_offset = + XFS_FSB_TO_BB(mp, map[i].br_startoff); + out[cur_ext].bmv_length = + XFS_FSB_TO_BB(mp, map[i].br_blockcount); + out[cur_ext].bmv_unused1 = 0; + out[cur_ext].bmv_unused2 = 0; ASSERT(((iflags & BMV_IF_DELALLOC) != 0) || (map[i].br_startblock != DELAYSTARTBLOCK)); if (map[i].br_startblock == HOLESTARTBLOCK && whichfork == XFS_ATTR_FORK) { /* came to the end of attribute fork */ - out.bmv_oflags |= BMV_OF_LAST; + out[cur_ext].bmv_oflags |= BMV_OF_LAST; goto out_free_map; } - if (!xfs_getbmapx_fix_eof_hole(ip, &out, prealloced, - bmvend, map[i].br_startblock)) + if (!xfs_getbmapx_fix_eof_hole(ip, &out[cur_ext], + prealloced, bmvend, + map[i].br_startblock)) goto out_free_map; - /* format results & advance arg */ - error = formatter(&arg, &out, &full); - if (error || full) - goto out_free_map; nexleft--; bmv->bmv_offset = - out.bmv_offset + out.bmv_length; + out[cur_ext].bmv_offset + + out[cur_ext].bmv_length; bmv->bmv_length = max_t(__int64_t, 0, bmvend - bmv->bmv_offset); bmv->bmv_entries++; + cur_ext++; } } while (nmap && nexleft && bmv->bmv_length); @@ -6067,6 +6075,16 @@ xfs_getbmap( xfs_iunlock_map_shared(ip, lock); out_unlock_iolock: xfs_iunlock(ip, XFS_IOLOCK_SHARED); + + for (i = 0; i < cur_ext; i++) { + int full = 0; /* user array is full */ + + /* format results & advance arg */ + error = formatter(&arg, &out[i], &full); + if (error || full) + break; + } + return error; } -- cgit v0.10.2 From 0c8b946e3ebb3846103486420ea7430a4b5e5b1b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 15 Apr 2009 17:48:18 +0200 Subject: vsprintf: introduce %pf format specifier A printf format specifier which would allow us to print a pure function name has been suggested by Andrew Morton a couple of months ago. The current %pF is very convenient to print a function symbol, but often we only want to print the name of the function, without its asm offset. That's what %pf does in this patch. The lowecase f has been chosen for its intuitive meaning of a 'weak kind of %pF'. The support for this new format would be welcome by the tracing code where the need to print pure function names is often needed. This is also true for other parts of the kernel: $ git-grep -E "kallsyms_lookup\(.+?\)" arch/blackfin/kernel/traps.c: symname = kallsyms_lookup(address, &symsize, &offset, &modname, namebuf); arch/powerpc/xmon/xmon.c: name = kallsyms_lookup(pc, &size, &offset, NULL, tmpstr); arch/sh/kernel/cpu/sh5/unwind.c: sym = kallsyms_lookup(pc, NULL, &offset, NULL, namebuf); arch/x86/kernel/ftrace.c: kallsyms_lookup((unsigned long) syscall, NULL, NULL, NULL, str); kernel/kprobes.c: sym = kallsyms_lookup((unsigned long)p->addr, NULL, kernel/lockdep.c: return kallsyms_lookup((unsigned long)key, NULL, NULL, NULL, str); kernel/trace/ftrace.c: kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); kernel/trace/ftrace.c: kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); kernel/trace/ftrace.c: kallsyms_lookup((unsigned long)rec->ops->func, NULL, NULL, NULL, str); kernel/trace/ftrace.c: kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); kernel/trace/ftrace.c: kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); kernel/trace/ftrace.c: kallsyms_lookup(rec->ip, NULL, NULL, &modname, str); kernel/trace/ftrace.c: kallsyms_lookup(*ptr, NULL, NULL, NULL, str); kernel/trace/trace_functions.c: kallsyms_lookup(ip, NULL, NULL, NULL, str); kernel/trace/trace_output.c: kallsyms_lookup(address, NULL, NULL, NULL, str); Changes in v2: - Add the explanation of the %pf role for vsnprintf() and bstr_printf() - Change the comments by dropping the "asm offset" notion and only define the %pf against the actual function offset notion. Signed-off-by: Frederic Weisbecker Acked-by: Mike Frysinger Cc: Linus Torvalds Cc: Zhaolei Cc: Tom Zanussi Cc: Li Zefan Cc: Andrew Morton Cc: Steven Rostedt LKML-Reference: <20090415154817.GC5989@nowhere> Signed-off-by: Ingo Molnar diff --git a/lib/vsprintf.c b/lib/vsprintf.c index b56f6d0..756ccaf 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -575,12 +575,15 @@ static char *string(char *buf, char *end, char *s, struct printf_spec spec) } static char *symbol_string(char *buf, char *end, void *ptr, - struct printf_spec spec) + struct printf_spec spec, char ext) { unsigned long value = (unsigned long) ptr; #ifdef CONFIG_KALLSYMS char sym[KSYM_SYMBOL_LEN]; - sprint_symbol(sym, value); + if (ext != 'f') + sprint_symbol(sym, value); + else + kallsyms_lookup(value, NULL, NULL, NULL, sym); return string(buf, end, sym, spec); #else spec.field_width = 2*sizeof(void *); @@ -692,7 +695,8 @@ static char *ip4_addr_string(char *buf, char *end, u8 *addr, * * Right now we handle: * - * - 'F' For symbolic function descriptor pointers + * - 'F' For symbolic function descriptor pointers with offset + * - 'f' For simple symbolic function names without offset * - 'S' For symbolic direct pointers * - 'R' For a struct resource pointer, it prints the range of * addresses (not the name nor the flags) @@ -715,10 +719,11 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr, switch (*fmt) { case 'F': + case 'f': ptr = dereference_function_descriptor(ptr); /* Fallthrough */ case 'S': - return symbol_string(buf, end, ptr, spec); + return symbol_string(buf, end, ptr, spec, *fmt); case 'R': return resource_string(buf, end, ptr, spec); case 'm': @@ -954,7 +959,8 @@ qualifier: * * This function follows C99 vsnprintf, but has some extensions: * %pS output the name of a text symbol - * %pF output the name of a function pointer + * %pF output the name of a function pointer with its offset + * %pf output the name of a function pointer without its offset * %pR output the address range in a struct resource * * The return value is the number of characters which would @@ -1412,7 +1418,8 @@ EXPORT_SYMBOL_GPL(vbin_printf); * * The format follows C99 vsnprintf, but has some extensions: * %pS output the name of a text symbol - * %pF output the name of a function pointer + * %pF output the name of a function pointer with its offset + * %pf output the name of a function pointer without its offset * %pR output the address range in a struct resource * %n is ignored * -- cgit v0.10.2 From 43f6201a22dbf1c5abe1cab96b49bd56fa9df8f4 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 29 Apr 2009 16:55:56 +0200 Subject: perf_counter, x86: rename bitmasks to ->used_mask and ->active_mask Standardize on explicitly mentioning '_mask' in fields that are not plain flags but masks. This avoids typos like: if (cpuc->used) (which could easily slip through review unnoticed), while if a typo looks like this: if (cpuc->used_mask) it might get noticed during review. [ Impact: cleanup ] Signed-off-by: Robert Richter Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1241016956-24648-1-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 47e563b..fc06f4d 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -28,8 +28,8 @@ static u64 perf_counter_mask __read_mostly; struct cpu_hw_counters { struct perf_counter *counters[X86_PMC_IDX_MAX]; - unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; - unsigned long active[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; unsigned long interrupts; u64 throttle_ctrl; int enabled; @@ -332,7 +332,7 @@ static u64 amd_pmu_save_disable_all(void) for (idx = 0; idx < x86_pmu.num_counters; idx++) { u64 val; - if (!test_bit(idx, cpuc->active)) + if (!test_bit(idx, cpuc->active_mask)) continue; rdmsrl(MSR_K7_EVNTSEL0 + idx, val); if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE)) @@ -373,7 +373,7 @@ static void amd_pmu_restore_all(u64 ctrl) for (idx = 0; idx < x86_pmu.num_counters; idx++) { u64 val; - if (!test_bit(idx, cpuc->active)) + if (!test_bit(idx, cpuc->active_mask)) continue; rdmsrl(MSR_K7_EVNTSEL0 + idx, val); if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) @@ -576,7 +576,7 @@ static int x86_pmu_enable(struct perf_counter *counter) * Try to get the fixed counter, if that is already taken * then try to get a generic counter: */ - if (test_and_set_bit(idx, cpuc->used)) + if (test_and_set_bit(idx, cpuc->used_mask)) goto try_generic; hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; @@ -590,14 +590,14 @@ static int x86_pmu_enable(struct perf_counter *counter) } else { idx = hwc->idx; /* Try to get the previous generic counter again */ - if (test_and_set_bit(idx, cpuc->used)) { + if (test_and_set_bit(idx, cpuc->used_mask)) { try_generic: - idx = find_first_zero_bit(cpuc->used, + idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_counters); if (idx == x86_pmu.num_counters) return -EAGAIN; - set_bit(idx, cpuc->used); + set_bit(idx, cpuc->used_mask); hwc->idx = idx; } hwc->config_base = x86_pmu.eventsel; @@ -609,7 +609,7 @@ try_generic: x86_pmu.disable(hwc, idx); cpuc->counters[idx] = counter; - set_bit(idx, cpuc->active); + set_bit(idx, cpuc->active_mask); x86_perf_counter_set_period(counter, hwc, idx); x86_pmu.enable(hwc, idx); @@ -643,7 +643,7 @@ void perf_counter_print_debug(void) pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); } - pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used); + pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask); for (idx = 0; idx < x86_pmu.num_counters; idx++) { rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); @@ -677,7 +677,7 @@ static void x86_pmu_disable(struct perf_counter *counter) * Must be done before we disable, otherwise the nmi handler * could reenable again: */ - clear_bit(idx, cpuc->active); + clear_bit(idx, cpuc->active_mask); x86_pmu.disable(hwc, idx); /* @@ -692,7 +692,7 @@ static void x86_pmu_disable(struct perf_counter *counter) */ x86_perf_counter_update(counter, hwc, idx); cpuc->counters[idx] = NULL; - clear_bit(idx, cpuc->used); + clear_bit(idx, cpuc->used_mask); } /* @@ -741,7 +741,7 @@ again: struct perf_counter *counter = cpuc->counters[bit]; clear_bit(bit, (unsigned long *) &status); - if (!test_bit(bit, cpuc->active)) + if (!test_bit(bit, cpuc->active_mask)) continue; intel_pmu_save_and_restart(counter); @@ -779,7 +779,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi) ++cpuc->interrupts; for (idx = 0; idx < x86_pmu.num_counters; idx++) { - if (!test_bit(idx, cpuc->active)) + if (!test_bit(idx, cpuc->active_mask)) continue; counter = cpuc->counters[idx]; hwc = &counter->hw; -- cgit v0.10.2 From 23b94b967f118bef941369238f33c8140be46539 Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Wed, 29 Apr 2009 21:54:51 +0100 Subject: locking, rtmutex.c: Documentation cleanup Two minor updates on functions documentation: - Updated documentation for function rt_mutex_unlock(), which contained an incorrect name - Removed extra '*' from comment in function rt_mutex_destroy() [ Impact: cleanup ] Signed-off-by: Luis Henriques Cc: Steven Rostedt LKML-Reference: <20090429205451.GA23154@hades.domain.com> Signed-off-by: Ingo Molnar diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index 69d9cb9..013882e 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c @@ -864,9 +864,9 @@ int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock, EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); /** - * rt_mutex_lock_interruptible_ktime - lock a rt_mutex interruptible - * the timeout structure is provided - * by the caller + * rt_mutex_timed_lock - lock a rt_mutex interruptible + * the timeout structure is provided + * by the caller * * @lock: the rt_mutex to be locked * @timeout: timeout structure or NULL (no timeout) @@ -913,7 +913,7 @@ void __sched rt_mutex_unlock(struct rt_mutex *lock) } EXPORT_SYMBOL_GPL(rt_mutex_unlock); -/*** +/** * rt_mutex_destroy - mark a mutex unusable * @lock: the mutex to be destroyed * -- cgit v0.10.2 From 88c48db9788862d0290831d081bc3c64e13b592f Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Wed, 29 Apr 2009 14:00:25 -0400 Subject: SELinux: drop secondary_ops->sysctl We are still calling secondary_ops->sysctl even though the capabilities module does not define a sysctl operation. Signed-off-by: Eric Paris Acked-by: Stephen Smalley Signed-off-by: James Morris diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index ba808ef..dd19ba8 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1980,10 +1980,6 @@ static int selinux_sysctl(ctl_table *table, int op) u32 tsid, sid; int rc; - rc = secondary_ops->sysctl(table, op); - if (rc) - return rc; - sid = current_sid(); rc = selinux_sysctl_get_sid(table, (op == 0001) ? -- cgit v0.10.2 From 3bcac0263f0b45e67a64034ebcb69eb9abb742f4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 29 Apr 2009 13:45:05 +0100 Subject: SELinux: Don't flush inherited SIGKILL during execve() Don't flush inherited SIGKILL during execve() in SELinux's post cred commit hook. This isn't really a security problem: if the SIGKILL came before the credentials were changed, then we were right to receive it at the time, and should honour it; if it came after the creds were changed, then we definitely should honour it; and in any case, all that will happen is that the process will be scrapped before it ever returns to userspace. Signed-off-by: David Howells Signed-off-by: Oleg Nesterov Signed-off-by: James Morris diff --git a/include/linux/sched.h b/include/linux/sched.h index 1d19c02..d3b787c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1875,6 +1875,7 @@ extern void sched_dead(struct task_struct *p); extern void proc_caches_init(void); extern void flush_signals(struct task_struct *); +extern void __flush_signals(struct task_struct *); extern void ignore_signals(struct task_struct *); extern void flush_signal_handlers(struct task_struct *, int force_default); extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info); diff --git a/kernel/signal.c b/kernel/signal.c index 1c88144..f93efec 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -238,14 +238,19 @@ void flush_sigqueue(struct sigpending *queue) /* * Flush all pending signals for a task. */ +void __flush_signals(struct task_struct *t) +{ + clear_tsk_thread_flag(t, TIF_SIGPENDING); + flush_sigqueue(&t->pending); + flush_sigqueue(&t->signal->shared_pending); +} + void flush_signals(struct task_struct *t) { unsigned long flags; spin_lock_irqsave(&t->sighand->siglock, flags); - clear_tsk_thread_flag(t, TIF_SIGPENDING); - flush_sigqueue(&t->pending); - flush_sigqueue(&t->signal->shared_pending); + __flush_signals(t); spin_unlock_irqrestore(&t->sighand->siglock, flags); } diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index dd19ba8..5a34511 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2394,11 +2394,12 @@ static void selinux_bprm_committed_creds(struct linux_binprm *bprm) memset(&itimer, 0, sizeof itimer); for (i = 0; i < 3; i++) do_setitimer(i, &itimer, NULL); - flush_signals(current); spin_lock_irq(¤t->sighand->siglock); - flush_signal_handlers(current, 1); - sigemptyset(¤t->blocked); - recalc_sigpending(); + if (!(current->signal->flags & SIGNAL_GROUP_EXIT)) { + __flush_signals(current); + flush_signal_handlers(current, 1); + sigemptyset(¤t->blocked); + } spin_unlock_irq(¤t->sighand->siglock); } -- cgit v0.10.2 From ecd6de3c88e8cbcad175b2eab48ba05c2014f7b6 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 29 Apr 2009 16:02:24 +0200 Subject: selinux: selinux_bprm_committed_creds() should wake up ->real_parent, not ->parent. We shouldn't worry about the tracer if current is ptraced, exec() must not succeed if the tracer has no rights to trace this task after cred changing. But we should notify ->real_parent which is, well, real parent. Also, we don't need _irq to take tasklist, and we don't need parent's ->siglock to wake_up_interruptible(real_parent->signal->wait_chldexit). Since we hold tasklist, real_parent->signal must be stable. Otherwise spin_lock(siglock) is not safe too and can't help anyway. Signed-off-by: Oleg Nesterov Signed-off-by: James Morris diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 5a34511..39046dd 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2371,10 +2371,8 @@ static void selinux_bprm_committed_creds(struct linux_binprm *bprm) { const struct task_security_struct *tsec = current_security(); struct itimerval itimer; - struct sighand_struct *psig; u32 osid, sid; int rc, i; - unsigned long flags; osid = tsec->osid; sid = tsec->sid; @@ -2405,12 +2403,9 @@ static void selinux_bprm_committed_creds(struct linux_binprm *bprm) /* Wake up the parent if it is waiting so that it can recheck * wait permission to the new task SID. */ - read_lock_irq(&tasklist_lock); - psig = current->parent->sighand; - spin_lock_irqsave(&psig->siglock, flags); - wake_up_interruptible(¤t->parent->signal->wait_chldexit); - spin_unlock_irqrestore(&psig->siglock, flags); - read_unlock_irq(&tasklist_lock); + read_lock(&tasklist_lock); + wake_up_interruptible(¤t->real_parent->signal->wait_chldexit); + read_unlock(&tasklist_lock); } /* superblock security operations */ -- cgit v0.10.2 From e6be3a25861429166f945499c7ee616875bc3db9 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Thu, 30 Apr 2009 12:56:37 +0900 Subject: sh: pass through ioremap() for non-mmu processors. All 32-bit SuperH processors currently go through __ioremap_mode() and check for IO_TRAPPED and directly mapped segments. With this patch we simplify the MMU less case with a pass through version of __ioremap_mode() which just returns the physical address. The effects of this is change are: - fix non-MMU ioremap() of high address hardware blocks (sh7203 CMT) - make sure IO_TRAPPED is not selected Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt diff --git a/arch/sh/boards/Kconfig b/arch/sh/boards/Kconfig index dcc1af8..99e2d47 100644 --- a/arch/sh/boards/Kconfig +++ b/arch/sh/boards/Kconfig @@ -121,7 +121,7 @@ config SH_RTS7751R2D bool "RTS7751R2D" depends on CPU_SUBTYPE_SH7751R select SYS_SUPPORTS_PCI - select IO_TRAPPED + select IO_TRAPPED if MMU help Select RTS7751R2D if configuring for a Renesas Technology Sales SH-Graphics board. @@ -145,13 +145,13 @@ config SH_HIGHLANDER bool "Highlander" depends on CPU_SUBTYPE_SH7780 || CPU_SUBTYPE_SH7785 select SYS_SUPPORTS_PCI - select IO_TRAPPED + select IO_TRAPPED if MMU config SH_SH7785LCR bool "SH7785LCR" depends on CPU_SUBTYPE_SH7785 select SYS_SUPPORTS_PCI - select IO_TRAPPED + select IO_TRAPPED if MMU config SH_SH7785LCR_29BIT_PHYSMAPS bool "SH7785LCR 29bit physmaps" diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h index 0454f8d..ef21734 100644 --- a/arch/sh/include/asm/io.h +++ b/arch/sh/include/asm/io.h @@ -228,12 +228,6 @@ void __iounmap(void __iomem *addr); unsigned long onchip_remap(unsigned long addr, unsigned long size, const char *name); extern void onchip_unmap(unsigned long vaddr); -#else -#define __ioremap(offset, size, flags) ((void __iomem *)(offset)) -#define __iounmap(addr) do { } while (0) -#define onchip_remap(addr, size, name) (addr) -#define onchip_unmap(addr) do { } while (0) -#endif /* CONFIG_MMU */ static inline void __iomem * __ioremap_mode(unsigned long offset, unsigned long size, unsigned long flags) @@ -268,6 +262,12 @@ __ioremap_mode(unsigned long offset, unsigned long size, unsigned long flags) return __ioremap(offset, size, flags); } +#else +#define onchip_remap(addr, size, name) (addr) +#define onchip_unmap(addr) do { } while (0) +#define __ioremap_mode(offset, size, flags) ((void __iomem *)(offset)) +#define __iounmap(addr) do { } while (0) +#endif /* CONFIG_MMU */ #define ioremap(offset, size) \ __ioremap_mode((offset), (size), 0) -- cgit v0.10.2 From 3014f47460ecfb13d4169daae51f26a20bacfa17 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 29 Apr 2009 14:50:37 +0000 Subject: clocksource: sh_cmt 16-bit fixes This patch contains various fixes for 16-bit cmt hardware. With this applied periodic clockevents work fine on sh7203. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c index bf3e4c1..4ff1508 100644 --- a/drivers/clocksource/sh_cmt.c +++ b/drivers/clocksource/sh_cmt.c @@ -158,16 +158,18 @@ static int sh_cmt_enable(struct sh_cmt_priv *p, unsigned long *rate) pr_err("sh_cmt: cannot enable clock \"%s\"\n", cfg->clk); return ret; } - *rate = clk_get_rate(p->clk) / 8; /* make sure channel is disabled */ sh_cmt_start_stop_ch(p, 0); /* configure channel, periodic mode and maximum timeout */ - if (p->width == 16) - sh_cmt_write(p, CMCSR, 0); - else + if (p->width == 16) { + *rate = clk_get_rate(p->clk) / 512; + sh_cmt_write(p, CMCSR, 0x43); + } else { + *rate = clk_get_rate(p->clk) / 8; sh_cmt_write(p, CMCSR, 0x01a4); + } sh_cmt_write(p, CMCOR, 0xffffffff); sh_cmt_write(p, CMCNT, 0); @@ -615,7 +617,7 @@ static int sh_cmt_setup(struct sh_cmt_priv *p, struct platform_device *pdev) if (resource_size(res) == 6) { p->width = 16; p->overflow_bit = 0x80; - p->clear_bits = ~0xc0; + p->clear_bits = ~0x80; } else { p->width = 32; p->overflow_bit = 0x8000; -- cgit v0.10.2 From 698aa99da5f5e2b4c666fd21ab77306f0225b8f5 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Thu, 30 Apr 2009 04:08:18 +0000 Subject: sh: sh2/sh2a 16-bit CMT platform data This patch adds 16-bit cmt platform data for the following cpus: - sh7619 (2 channels) - sh7203/sh7263 (2 channels) - sh7206 (2 channels) Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt diff --git a/arch/sh/kernel/cpu/sh2/setup-sh7619.c b/arch/sh/kernel/cpu/sh2/setup-sh7619.c index 0e32d8e..d70c263 100644 --- a/arch/sh/kernel/cpu/sh2/setup-sh7619.c +++ b/arch/sh/kernel/cpu/sh2/setup-sh7619.c @@ -12,6 +12,8 @@ #include #include #include +#include +#include enum { UNUSED = 0, @@ -109,9 +111,75 @@ static struct platform_device eth_device = { .resource = eth_resources, }; +static struct sh_cmt_config cmt0_platform_data = { + .name = "CMT0", + .channel_offset = 0x02, + .timer_bit = 0, + .clk = "module_clk", + .clockevent_rating = 125, + .clocksource_rating = 0, /* disabled due to code generation issues */ +}; + +static struct resource cmt0_resources[] = { + [0] = { + .name = "CMT0", + .start = 0xf84a0072, + .end = 0xf84a0077, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = 86, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device cmt0_device = { + .name = "sh_cmt", + .id = 0, + .dev = { + .platform_data = &cmt0_platform_data, + }, + .resource = cmt0_resources, + .num_resources = ARRAY_SIZE(cmt0_resources), +}; + +static struct sh_cmt_config cmt1_platform_data = { + .name = "CMT1", + .channel_offset = 0x08, + .timer_bit = 1, + .clk = "module_clk", + .clockevent_rating = 125, + .clocksource_rating = 0, /* disabled due to code generation issues */ +}; + +static struct resource cmt1_resources[] = { + [0] = { + .name = "CMT1", + .start = 0xf84a0078, + .end = 0xf84a007d, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = 87, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device cmt1_device = { + .name = "sh_cmt", + .id = 1, + .dev = { + .platform_data = &cmt1_platform_data, + }, + .resource = cmt1_resources, + .num_resources = ARRAY_SIZE(cmt1_resources), +}; + static struct platform_device *sh7619_devices[] __initdata = { &sci_device, ð_device, + &cmt0_device, + &cmt1_device, }; static int __init sh7619_devices_setup(void) @@ -125,3 +193,19 @@ void __init plat_irq_setup(void) { register_intc_controller(&intc_desc); } + +static struct platform_device *sh7619_early_devices[] __initdata = { + &cmt0_device, + &cmt1_device, +}; + +#define STBCR3 0xf80a0000 + +void __init plat_early_device_setup(void) +{ + /* enable CMT clock */ + __raw_writeb(__raw_readb(STBCR3) & ~0x10, STBCR3); + + early_platform_add_devices(sh7619_early_devices, + ARRAY_SIZE(sh7619_early_devices)); +} diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7203.c b/arch/sh/kernel/cpu/sh2a/setup-sh7203.c index 820dfb2..0836ace 100644 --- a/arch/sh/kernel/cpu/sh2a/setup-sh7203.c +++ b/arch/sh/kernel/cpu/sh2a/setup-sh7203.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include enum { UNUSED = 0, @@ -205,6 +207,70 @@ static struct platform_device sci_device = { }, }; +static struct sh_cmt_config cmt0_platform_data = { + .name = "CMT0", + .channel_offset = 0x02, + .timer_bit = 0, + .clk = "module_clk", + .clockevent_rating = 125, + .clocksource_rating = 0, /* disabled due to code generation issues */ +}; + +static struct resource cmt0_resources[] = { + [0] = { + .name = "CMT0", + .start = 0xfffec002, + .end = 0xfffec007, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = 142, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device cmt0_device = { + .name = "sh_cmt", + .id = 0, + .dev = { + .platform_data = &cmt0_platform_data, + }, + .resource = cmt0_resources, + .num_resources = ARRAY_SIZE(cmt0_resources), +}; + +static struct sh_cmt_config cmt1_platform_data = { + .name = "CMT1", + .channel_offset = 0x08, + .timer_bit = 1, + .clk = "module_clk", + .clockevent_rating = 125, + .clocksource_rating = 0, /* disabled due to code generation issues */ +}; + +static struct resource cmt1_resources[] = { + [0] = { + .name = "CMT1", + .start = 0xfffec008, + .end = 0xfffec00d, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = 143, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device cmt1_device = { + .name = "sh_cmt", + .id = 1, + .dev = { + .platform_data = &cmt1_platform_data, + }, + .resource = cmt1_resources, + .num_resources = ARRAY_SIZE(cmt1_resources), +}; + static struct resource rtc_resources[] = { [0] = { .start = 0xffff2000, @@ -227,6 +293,8 @@ static struct platform_device rtc_device = { static struct platform_device *sh7203_devices[] __initdata = { &sci_device, + &cmt0_device, + &cmt1_device, &rtc_device, }; @@ -241,3 +309,19 @@ void __init plat_irq_setup(void) { register_intc_controller(&intc_desc); } + +static struct platform_device *sh7203_early_devices[] __initdata = { + &cmt0_device, + &cmt1_device, +}; + +#define STBCR4 0xfffe040c + +void __init plat_early_device_setup(void) +{ + /* enable CMT clock */ + __raw_writeb(__raw_readb(STBCR4) & ~0x04, STBCR4); + + early_platform_add_devices(sh7203_early_devices, + ARRAY_SIZE(sh7203_early_devices)); +} diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7206.c b/arch/sh/kernel/cpu/sh2a/setup-sh7206.c index c46a835..f9606e3 100644 --- a/arch/sh/kernel/cpu/sh2a/setup-sh7206.c +++ b/arch/sh/kernel/cpu/sh2a/setup-sh7206.c @@ -12,6 +12,8 @@ #include #include #include +#include +#include enum { UNUSED = 0, @@ -165,8 +167,74 @@ static struct platform_device sci_device = { }, }; +static struct sh_cmt_config cmt0_platform_data = { + .name = "CMT0", + .channel_offset = 0x02, + .timer_bit = 0, + .clk = "module_clk", + .clockevent_rating = 125, + .clocksource_rating = 0, /* disabled due to code generation issues */ +}; + +static struct resource cmt0_resources[] = { + [0] = { + .name = "CMT0", + .start = 0xfffec002, + .end = 0xfffec007, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = 140, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device cmt0_device = { + .name = "sh_cmt", + .id = 0, + .dev = { + .platform_data = &cmt0_platform_data, + }, + .resource = cmt0_resources, + .num_resources = ARRAY_SIZE(cmt0_resources), +}; + +static struct sh_cmt_config cmt1_platform_data = { + .name = "CMT1", + .channel_offset = 0x08, + .timer_bit = 1, + .clk = "module_clk", + .clockevent_rating = 125, + .clocksource_rating = 0, /* disabled due to code generation issues */ +}; + +static struct resource cmt1_resources[] = { + [0] = { + .name = "CMT1", + .start = 0xfffec008, + .end = 0xfffec00d, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = 144, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device cmt1_device = { + .name = "sh_cmt", + .id = 1, + .dev = { + .platform_data = &cmt1_platform_data, + }, + .resource = cmt1_resources, + .num_resources = ARRAY_SIZE(cmt1_resources), +}; + static struct platform_device *sh7206_devices[] __initdata = { &sci_device, + &cmt0_device, + &cmt1_device, }; static int __init sh7206_devices_setup(void) @@ -180,3 +248,19 @@ void __init plat_irq_setup(void) { register_intc_controller(&intc_desc); } + +static struct platform_device *sh7206_early_devices[] __initdata = { + &cmt0_device, + &cmt1_device, +}; + +#define STBCR4 0xfffe040c + +void __init plat_early_device_setup(void) +{ + /* enable CMT clock */ + __raw_writeb(__raw_readb(STBCR4) & ~0x04, STBCR4); + + early_platform_add_devices(sh7206_early_devices, + ARRAY_SIZE(sh7206_early_devices)); +} -- cgit v0.10.2 From f425752fc66acf1d4e47970ea704ed7d31c14173 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Thu, 30 Apr 2009 04:09:26 +0000 Subject: sh: remove old CMT driver This patch removes the old CMT driver (CONFIG_SH_CMT/timer-cmt.c) As replacement, select the sh_cmt driver with CONFIG_SH_TIMER_CMT and configure timer channel using platform data. If multiple CMT channels are enabled using platform data, use the earlytimer parameter on the kernel command line to select channel. For instance, use "earlytimer=sh_cmt.0" to select the first channel. To verify which timer is being used, look at printouts or the timer irq count in /proc/interrupts. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 9db02ce..7e96ade 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -462,22 +462,14 @@ config SH_TMU help This enables the use of the TMU as the system timer. -config SH_CMT - bool "CMT timer support" - depends on SYS_SUPPORTS_CMT && CPU_SH2 - default y - help - This enables the use of the CMT as the system timer. - -# -# Support for the new-style CMT driver. This will replace SH_CMT -# once its other dependencies are merged. -# config SH_TIMER_CMT - bool "CMT clockevents driver" - depends on SYS_SUPPORTS_CMT && !SH_CMT + bool "CMT timer driver" + depends on SYS_SUPPORTS_CMT + default y select GENERIC_CLOCKEVENTS select GENERIC_TIME + help + This enables build of the CMT timer driver. config SH_MTU2 bool "MTU2 timer support" diff --git a/arch/sh/include/asm/timer.h b/arch/sh/include/asm/timer.h index 4c3b66e3..8f80a55 100644 --- a/arch/sh/include/asm/timer.h +++ b/arch/sh/include/asm/timer.h @@ -23,7 +23,7 @@ struct sys_timer { #define TICK_SIZE (tick_nsec / 1000) -extern struct sys_timer tmu_timer, cmt_timer, mtu2_timer; +extern struct sys_timer tmu_timer, mtu2_timer; extern struct sys_timer *sys_timer; #ifndef CONFIG_GENERIC_TIME diff --git a/arch/sh/kernel/timers/Makefile b/arch/sh/kernel/timers/Makefile index 0b7f857..1e9a104 100644 --- a/arch/sh/kernel/timers/Makefile +++ b/arch/sh/kernel/timers/Makefile @@ -6,6 +6,5 @@ obj-y := timer.o obj-$(CONFIG_SH_TMU) += timer-tmu.o obj-$(CONFIG_SH_MTU2) += timer-mtu2.o -obj-$(CONFIG_SH_CMT) += timer-cmt.o obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += timer-broadcast.o diff --git a/arch/sh/kernel/timers/timer-cmt.c b/arch/sh/kernel/timers/timer-cmt.c deleted file mode 100644 index 9aa3486..0000000 --- a/arch/sh/kernel/timers/timer-cmt.c +++ /dev/null @@ -1,188 +0,0 @@ -/* - * arch/sh/kernel/timers/timer-cmt.c - CMT Timer Support - * - * Copyright (C) 2005 Yoshinori Sato - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if defined(CONFIG_CPU_SUBTYPE_SH7619) -#define CMT_CMSTR 0xf84a0070 -#define CMT_CMCSR_0 0xf84a0072 -#define CMT_CMCNT_0 0xf84a0074 -#define CMT_CMCOR_0 0xf84a0076 -#define CMT_CMCSR_1 0xf84a0078 -#define CMT_CMCNT_1 0xf84a007a -#define CMT_CMCOR_1 0xf84a007c - -#define STBCR3 0xf80a0000 -#define cmt_clock_enable() do { ctrl_outb(ctrl_inb(STBCR3) & ~0x10, STBCR3); } while(0) -#define CMT_CMCSR_INIT 0x0040 -#define CMT_CMCSR_CALIB 0x0000 -#elif defined(CONFIG_CPU_SUBTYPE_SH7203) || \ - defined(CONFIG_CPU_SUBTYPE_SH7206) || \ - defined(CONFIG_CPU_SUBTYPE_SH7263) -#define CMT_CMSTR 0xfffec000 -#define CMT_CMCSR_0 0xfffec002 -#define CMT_CMCNT_0 0xfffec004 -#define CMT_CMCOR_0 0xfffec006 - -#define STBCR4 0xfffe040c -#define cmt_clock_enable() do { ctrl_outb(ctrl_inb(STBCR4) & ~0x04, STBCR4); } while(0) -#define CMT_CMCSR_INIT 0x0040 -#define CMT_CMCSR_CALIB 0x0000 -#else -#error "Unknown CPU SUBTYPE" -#endif - -static unsigned long cmt_timer_get_offset(void) -{ - int count; - static unsigned short count_p = 0xffff; /* for the first call after boot */ - static unsigned long jiffies_p = 0; - - /* - * cache volatile jiffies temporarily; we have IRQs turned off. - */ - unsigned long jiffies_t; - - /* timer count may underflow right here */ - count = ctrl_inw(CMT_CMCOR_0); - count -= ctrl_inw(CMT_CMCNT_0); - - jiffies_t = jiffies; - - /* - * avoiding timer inconsistencies (they are rare, but they happen)... - * there is one kind of problem that must be avoided here: - * 1. the timer counter underflows - */ - - if (jiffies_t == jiffies_p) { - if (count > count_p) { - /* the nutcase */ - if (ctrl_inw(CMT_CMCSR_0) & 0x80) { /* Check CMF bit */ - count -= LATCH; - } else { - printk("%s (): hardware timer problem?\n", - __func__); - } - } - } else - jiffies_p = jiffies_t; - - count_p = count; - - count = ((LATCH-1) - count) * TICK_SIZE; - count = (count + LATCH/2) / LATCH; - - return count; -} - -static irqreturn_t cmt_timer_interrupt(int irq, void *dev_id) -{ - unsigned long timer_status; - - /* Clear CMF bit */ - timer_status = ctrl_inw(CMT_CMCSR_0); - timer_status &= ~0x80; - ctrl_outw(timer_status, CMT_CMCSR_0); - - handle_timer_tick(); - - return IRQ_HANDLED; -} - -static struct irqaction cmt_irq = { - .name = "timer", - .handler = cmt_timer_interrupt, - .flags = IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL, -}; - -static void cmt_clk_init(struct clk *clk) -{ - u8 divisor = CMT_CMCSR_INIT & 0x3; - ctrl_inw(CMT_CMCSR_0); - ctrl_outw(CMT_CMCSR_INIT, CMT_CMCSR_0); - clk->parent = clk_get(NULL, "module_clk"); - clk->rate = clk->parent->rate / (8 << (divisor << 1)); -} - -static void cmt_clk_recalc(struct clk *clk) -{ - u8 divisor = ctrl_inw(CMT_CMCSR_0) & 0x3; - clk->rate = clk->parent->rate / (8 << (divisor << 1)); -} - -static struct clk_ops cmt_clk_ops = { - .init = cmt_clk_init, - .recalc = cmt_clk_recalc, -}; - -static struct clk cmt0_clk = { - .name = "cmt0_clk", - .ops = &cmt_clk_ops, -}; - -static int cmt_timer_start(void) -{ - ctrl_outw(ctrl_inw(CMT_CMSTR) | 0x01, CMT_CMSTR); - return 0; -} - -static int cmt_timer_stop(void) -{ - ctrl_outw(ctrl_inw(CMT_CMSTR) & ~0x01, CMT_CMSTR); - return 0; -} - -static int cmt_timer_init(void) -{ - unsigned long interval; - - cmt_clock_enable(); - - setup_irq(CONFIG_SH_TIMER_IRQ, &cmt_irq); - - cmt0_clk.parent = clk_get(NULL, "module_clk"); - - cmt_timer_stop(); - - interval = cmt0_clk.parent->rate / 8 / HZ; - printk(KERN_INFO "Interval = %ld\n", interval); - - ctrl_outw(interval, CMT_CMCOR_0); - - clk_register(&cmt0_clk); - clk_enable(&cmt0_clk); - - cmt_timer_start(); - - return 0; -} - -static struct sys_timer_ops cmt_timer_ops = { - .init = cmt_timer_init, - .start = cmt_timer_start, - .stop = cmt_timer_stop, -#ifndef CONFIG_GENERIC_TIME - .get_offset = cmt_timer_get_offset, -#endif -}; - -struct sys_timer cmt_timer = { - .name = "cmt", - .ops = &cmt_timer_ops, -}; diff --git a/arch/sh/kernel/timers/timer.c b/arch/sh/kernel/timers/timer.c index 4e7e747..f3bd141 100644 --- a/arch/sh/kernel/timers/timer.c +++ b/arch/sh/kernel/timers/timer.c @@ -20,9 +20,6 @@ static struct sys_timer *sys_timers[] = { #ifdef CONFIG_SH_MTU2 &mtu2_timer, #endif -#ifdef CONFIG_SH_CMT - &cmt_timer, -#endif NULL, }; -- cgit v0.10.2 From c5dd016cdf0a040e1de0b691e274fbfe642b2cdc Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 30 Apr 2009 09:48:16 +1000 Subject: perf_counter: update copyright notice This adds my name to the list of copyright holders on the core perf_counter.c, since I have contributed a significant amount of the code in there. Signed-off-by: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter LKML-Reference: <18936.59200.888049.746658@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index a95a171..75f2b6c 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -4,6 +4,7 @@ * Copyright (C) 2008 Thomas Gleixner * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra + * Copyright © 2009 Paul Mackerras, IBM Corp. * * For licensing details see kernel-base/COPYING */ -- cgit v0.10.2 From a511e3f968c462a55ef58697257f5347c73d306e Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 29 Apr 2009 15:59:58 -0700 Subject: mutex: add atomic_dec_and_mutex_lock(), fix include/linux/mutex.h:136: warning: 'mutex_lock' declared inline after being called include/linux/mutex.h:136: warning: previous declaration of 'mutex_lock' was here uninline it. [ Impact: clean up and uninline, address compiler warning ] Signed-off-by: Andrew Morton Cc: Al Viro Cc: Christoph Hellwig Cc: Eric Paris Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <200904292318.n3TNIsi6028340@imap1.linux-foundation.org> Signed-off-by: Ingo Molnar diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 93054fc..878cab4 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -150,28 +150,6 @@ extern int __must_check mutex_lock_killable(struct mutex *lock); */ extern int mutex_trylock(struct mutex *lock); extern void mutex_unlock(struct mutex *lock); - -/** - * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0 - * @cnt: the atomic which we are to dec - * @lock: the mutex to return holding if we dec to 0 - * - * return true and hold lock if we dec to 0, return false otherwise - */ -static inline int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) -{ - /* dec if we can't possibly hit 0 */ - if (atomic_add_unless(cnt, -1, 1)) - return 0; - /* we might hit 0, so take the lock */ - mutex_lock(lock); - if (!atomic_dec_and_test(cnt)) { - /* when we actually did the dec, we didn't hit 0 */ - mutex_unlock(lock); - return 0; - } - /* we hit 0, and we hold the lock */ - return 1; -} +extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); #endif diff --git a/kernel/mutex.c b/kernel/mutex.c index 507cf2b..e2d25e9 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -471,5 +471,28 @@ int __sched mutex_trylock(struct mutex *lock) return ret; } - EXPORT_SYMBOL(mutex_trylock); + +/** + * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0 + * @cnt: the atomic which we are to dec + * @lock: the mutex to return holding if we dec to 0 + * + * return true and hold lock if we dec to 0, return false otherwise + */ +int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) +{ + /* dec if we can't possibly hit 0 */ + if (atomic_add_unless(cnt, -1, 1)) + return 0; + /* we might hit 0, so take the lock */ + mutex_lock(lock); + if (!atomic_dec_and_test(cnt)) { + /* when we actually did the dec, we didn't hit 0 */ + mutex_unlock(lock); + return 0; + } + /* we hit 0, and we hold the lock */ + return 1; +} +EXPORT_SYMBOL(atomic_dec_and_mutex_lock); -- cgit v0.10.2 From 2b72394e4089643f11669d9610907a1442fe044a Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 28 Apr 2009 16:00:49 +0300 Subject: x86: move max_pfn_mapped and max_low_pfn_mapped to setup.c This patch moves the max_pfn_mapped and max_low_pfn_mapped global variables to kernel/setup.c where they're initialized. [ Impact: cleanup ] Signed-off-by: Pekka Enberg LKML-Reference: <1240923649.1982.21.camel@penberg-laptop> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b415843..0d77e56 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -112,6 +112,14 @@ #define ARCH_SETUP #endif +/* + * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. + * The direct mapping extends to max_pfn_mapped, so that we can directly access + * apertures, ACPI and other tables without having to play with fixmaps. + */ +unsigned long max_low_pfn_mapped; +unsigned long max_pfn_mapped; + RESERVE_BRK(dmi_alloc, 65536); unsigned int boot_cpu_id __read_mostly; diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 2b27120..a640a7f 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -49,11 +49,9 @@ #include #include #include +#include #include -unsigned long max_low_pfn_mapped; -unsigned long max_pfn_mapped; - DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); unsigned long highstart_pfn, highend_pfn; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index a4e7846..1016ea0 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -50,14 +50,6 @@ #include #include -/* - * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. - * The direct mapping extends to max_pfn_mapped, so that we can directly access - * apertures, ACPI and other tables without having to play with fixmaps. - */ -unsigned long max_low_pfn_mapped; -unsigned long max_pfn_mapped; - static unsigned long dma_reserve __initdata; DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); -- cgit v0.10.2 From 9518e0e4350a5ea8ca200ce320b28d6284a7b0ce Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Tue, 28 Apr 2009 16:00:50 +0300 Subject: x86: move per-cpu mmu_gathers to mm/init.c [ Impact: cleanup ] Signed-off-by: Pekka Enberg LKML-Reference: <1240923650.1982.22.camel@penberg-laptop> Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index fedde53..4d67c33 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -9,6 +9,9 @@ #include #include #include +#include + +DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); unsigned long __initdata e820_table_start; unsigned long __meminitdata e820_table_end; diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index a640a7f..fef1d90 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -52,7 +52,6 @@ #include #include -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); unsigned long highstart_pfn, highend_pfn; static noinline int do_test_wp_bit(void); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 1016ea0..6a1a573 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -52,8 +52,6 @@ static unsigned long dma_reserve __initdata; -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); - static int __init parse_direct_gbpages_off(char *arg) { direct_gbpages = 0; -- cgit v0.10.2 From ba9c22f2c01cf5c88beed5a6b9e07d42e10bd358 Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Mon, 20 Apr 2009 22:22:22 -0700 Subject: futex: remove FUTEX_REQUEUE_PI (non CMP) The new requeue PI futex op codes were modeled after the existing FUTEX_REQUEUE and FUTEX_CMP_REQUEUE calls. I was unaware at the time that FUTEX_REQUEUE was only around for compatibility reasons and shouldn't be used in new code. Ulrich Drepper elaborates on this in his Futexes are Tricky paper: http://people.redhat.com/drepper/futex.pdf. The deprecated call doesn't catch changes to the futex corresponding to the destination futex which can lead to deadlock. Therefor, I feel it best to remove FUTEX_REQUEUE_PI and leave only FUTEX_CMP_REQUEUE_PI as there are not yet any existing users of the API. This patch does change the OP code value of FUTEX_CMP_REQUEUE_PI to 12 from 13. Since my test case is the only known user of this API, I felt this was the right thing to do, rather than leave a hole in the enumeration. I chose to continue using the _CMP_ modifier in the OP code to make it explicit to the user that the test is being done. Builds, boots, and ran several hundred iterations requeue_pi.c. Signed-off-by: Darren Hart LKML-Reference: <49ED580E.1050502@us.ibm.com> Signed-off-by: Thomas Gleixner diff --git a/include/linux/futex.h b/include/linux/futex.h index b05519c..34956c8 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -24,8 +24,7 @@ union ktime; #define FUTEX_WAIT_BITSET 9 #define FUTEX_WAKE_BITSET 10 #define FUTEX_WAIT_REQUEUE_PI 11 -#define FUTEX_REQUEUE_PI 12 -#define FUTEX_CMP_REQUEUE_PI 13 +#define FUTEX_CMP_REQUEUE_PI 12 #define FUTEX_PRIVATE_FLAG 128 #define FUTEX_CLOCK_REALTIME 256 @@ -43,7 +42,6 @@ union ktime; #define FUTEX_WAKE_BITSET_PRIVATE (FUTEX_WAKE_BITS | FUTEX_PRIVATE_FLAG) #define FUTEX_WAIT_REQUEUE_PI_PRIVATE (FUTEX_WAIT_REQUEUE_PI | \ FUTEX_PRIVATE_FLAG) -#define FUTEX_REQUEUE_PI_PRIVATE (FUTEX_REQUEUE_PI | FUTEX_PRIVATE_FLAG) #define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \ FUTEX_PRIVATE_FLAG) diff --git a/kernel/futex.c b/kernel/futex.c index 6d2daa4..aec8bf8 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2555,9 +2555,6 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, ret = futex_wait_requeue_pi(uaddr, fshared, val, timeout, val3, clockrt, uaddr2); break; - case FUTEX_REQUEUE_PI: - ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 1); - break; case FUTEX_CMP_REQUEUE_PI: ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, 1); @@ -2596,8 +2593,7 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, * number of waiters to wake in 'utime' if cmd == FUTEX_WAKE_OP. */ if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE || - cmd == FUTEX_REQUEUE_PI || cmd == FUTEX_CMP_REQUEUE_PI || - cmd == FUTEX_WAKE_OP) + cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP) val2 = (u32) (unsigned long) utime; return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); -- cgit v0.10.2 From 83c4832683bc8ebcd1687b3c0bf3ba1ab253dd4f Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Thu, 30 Apr 2009 12:03:16 +0200 Subject: x86: boot/compressed/vmlinux.lds.S: fix build of bzImage with 64 bit compiler Jesper reported that he saw following build issue: > ld:arch/x86/boot/compressed/vmlinux.lds:9: syntax error > make[2]: *** [arch/x86/boot/compressed/vmlinux] Error 1 > make[1]: *** [arch/x86/boot/compressed/vmlinux] Error 2 > make: *** [bzImage] Error 2 CPP defines the symbol "i386" to "1". Undefine this to fix it. [ Impact: build fix with certain tool chains ] Reported-by: Jesper Dangaard Brouer Signed-off-by: Sam Ravnborg Cc: Linus Torvalds LKML-Reference: Signed-off-by: Ingo Molnar diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S index ffcb1913..0d26c92 100644 --- a/arch/x86/boot/compressed/vmlinux.lds.S +++ b/arch/x86/boot/compressed/vmlinux.lds.S @@ -1,5 +1,7 @@ OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT) +#undef i386 + #ifdef CONFIG_X86_64 OUTPUT_ARCH(i386:x86-64) ENTRY(startup_64) -- cgit v0.10.2 From aac3f3c2c41ce49a6dbb98d9145265c00a964dc2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 30 Apr 2009 13:52:19 +0200 Subject: perf_counter tools: add perf-report to the Makefile Build it explicitly until it's a proper builtin command. Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/Makefile b/Documentation/perf_counter/Makefile index 543ccf2..877cf5d 100644 --- a/Documentation/perf_counter/Makefile +++ b/Documentation/perf_counter/Makefile @@ -228,7 +228,7 @@ COMPAT_CFLAGS = COMPAT_OBJS = LIB_H = LIB_OBJS = -PROGRAMS = +PROGRAMS = perf-report SCRIPT_PERL = SCRIPT_SH = TEST_PROGRAMS = @@ -808,6 +808,10 @@ clean: $(RM) $(htmldocs).tar.gz $(manpages).tar.gz $(RM) PERF-VERSION-FILE PERF-CFLAGS PERF-BUILD-OPTIONS +# temporary hack: +perf-report: perf-report.cc ../../include/linux/perf_counter.h Makefile + g++ -g -O2 -Wall -lrt -o $@ $< + .PHONY: all install clean strip .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell .PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS -- cgit v0.10.2 From 66cf782996f3d57d3cc199f0a2d47a54e2aa5991 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 30 Apr 2009 13:53:33 +0200 Subject: perf_counter tools: perf stat: make -l default-on Turn on scaling display by default - this is less confusing. Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/builtin-stat.c b/Documentation/perf_counter/builtin-stat.c index 112b94e..1fde127 100644 --- a/Documentation/perf_counter/builtin-stat.c +++ b/Documentation/perf_counter/builtin-stat.c @@ -171,7 +171,7 @@ static unsigned int page_size; static int zero; -static int scale; +static int scale = 1; static const unsigned int default_count[] = { 1000000, -- cgit v0.10.2 From bad760089c1ef7fe525c0f268a4078b9cb483903 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 30 Apr 2009 14:14:37 +0200 Subject: perf_counter tools: fix infinite loop in perf-report on zeroed event records Bail out early if a record has zero size - we have no chance to make reliable progress in that case. Print out the offset where this happens, and print the number of bytes we missed out on. Signed-off-by: Ingo Molnar diff --git a/Documentation/perf_counter/perf-report.cc b/Documentation/perf_counter/perf-report.cc index 1727317..933a075 100644 --- a/Documentation/perf_counter/perf-report.cc +++ b/Documentation/perf_counter/perf-report.cc @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -226,7 +227,7 @@ void load_kallsyms(void) while (!feof(file)) { uint64_t start; char c; - char sym[1024]; + char sym[1024000]; if (getline(&line, &n, file) < 0) break; @@ -416,12 +417,23 @@ more: if (head + event->header.size >= page_size * mmap_window) { unsigned long shift = page_size * (head / page_size); + int ret; + + ret = munmap(buf, page_size * mmap_window); + assert(ret == 0); - munmap(buf, page_size * mmap_window); offset += shift; head -= shift; goto remap; } + + + if (!event->header.size) { + fprintf(stderr, "zero-sized event at file offset %ld\n", offset + head); + fprintf(stderr, "skipping %ld bytes of events.\n", stat.st_size - offset - head); + goto done; + } + head += event->header.size; if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) { @@ -458,6 +470,8 @@ more: if (offset + head < stat.st_size) goto more; +done: + close(input); std::map::iterator hi = hist.begin(); -- cgit v0.10.2 From 09aa60df92a9c5ff00e156c0dbc79f166d406a7f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 29 Apr 2009 18:51:48 +0100 Subject: ASoC: Fix error message formatting in s3c64xx-i2s driver Signed-off-by: Mark Brown diff --git a/sound/soc/s3c24xx/s3c64xx-i2s.c b/sound/soc/s3c24xx/s3c64xx-i2s.c index 1345fbd..7679f7b 100644 --- a/sound/soc/s3c24xx/s3c64xx-i2s.c +++ b/sound/soc/s3c24xx/s3c64xx-i2s.c @@ -215,7 +215,7 @@ static __devinit int s3c64xx_iis_dev_probe(struct platform_device *pdev) i2s->iis_cclk = clk_get(&pdev->dev, "audio-bus"); if (IS_ERR(i2s->iis_cclk)) { - dev_err(&pdev->dev, "failed to get audio-bus"); + dev_err(&pdev->dev, "failed to get audio-bus\n"); ret = PTR_ERR(i2s->iis_cclk); goto err; } -- cgit v0.10.2 From 8a0f62b842e2f189e36d9f4c575ee15da9c605ff Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 29 Apr 2009 20:28:47 +0100 Subject: ASoC: Check for supported CPUs when building s3c-i2s-v2 Signed-off-by: Mark Brown diff --git a/sound/soc/s3c24xx/s3c-i2s-v2.c b/sound/soc/s3c24xx/s3c-i2s-v2.c index ab680aa..3b9201c 100644 --- a/sound/soc/s3c24xx/s3c-i2s-v2.c +++ b/sound/soc/s3c24xx/s3c-i2s-v2.c @@ -37,6 +37,20 @@ #include "s3c-i2s-v2.h" +#undef S3C_IIS_V2_SUPPORTED + +#if defined(CONFIG_CPU_S3C2412) || defined(CONFIG_CPU_S3C2413) +#define S3C_IIS_V2_SUPPORTED +#endif + +#ifdef CONFIG_PLAT_S3C64XX +#define S3C_IIS_V2_SUPPORTED +#endif + +#ifndef S3C_IIS_V2_SUPPORTED +#error Unsupported CPU model +#endif + #define S3C2412_I2S_DEBUG_CON 0 static inline struct s3c_i2sv2_info *to_info(struct snd_soc_dai *cpu_dai) -- cgit v0.10.2 From 51438449e717db54550b4676f38208092eb654da Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 29 Apr 2009 20:30:39 +0100 Subject: ASoC: Make S3C64xx clock export function to return struct clk This makes the interface usable with the s3c-iis-v2 rate calculator and consistent with S3C2412. Signed-off-by: Mark Brown diff --git a/sound/soc/s3c24xx/s3c64xx-i2s.c b/sound/soc/s3c24xx/s3c64xx-i2s.c index 7679f7b..cb11f78 100644 --- a/sound/soc/s3c24xx/s3c64xx-i2s.c +++ b/sound/soc/s3c24xx/s3c64xx-i2s.c @@ -108,14 +108,13 @@ static int s3c64xx_i2s_set_sysclk(struct snd_soc_dai *cpu_dai, return 0; } - -unsigned long s3c64xx_i2s_get_clockrate(struct snd_soc_dai *dai) +struct clk *s3c64xx_i2s_get_clock(struct snd_soc_dai *dai) { struct s3c_i2sv2_info *i2s = to_info(dai); - return clk_get_rate(i2s->iis_cclk); + return i2s->iis_cclk; } -EXPORT_SYMBOL_GPL(s3c64xx_i2s_get_clockrate); +EXPORT_SYMBOL_GPL(s3c64xx_i2s_get_clock); static int s3c64xx_i2s_probe(struct platform_device *pdev, struct snd_soc_dai *dai) diff --git a/sound/soc/s3c24xx/s3c64xx-i2s.h b/sound/soc/s3c24xx/s3c64xx-i2s.h index 597822a..02148ce 100644 --- a/sound/soc/s3c24xx/s3c64xx-i2s.h +++ b/sound/soc/s3c24xx/s3c64xx-i2s.h @@ -15,6 +15,8 @@ #ifndef __SND_SOC_S3C24XX_S3C64XX_I2S_H #define __SND_SOC_S3C24XX_S3C64XX_I2S_H __FILE__ +struct clk; + #include "s3c-i2s-v2.h" #define S3C64XX_DIV_BCLK S3C_I2SV2_DIV_BCLK @@ -26,6 +28,6 @@ extern struct snd_soc_dai s3c64xx_i2s_dai[]; -extern unsigned long s3c64xx_i2s_get_clockrate(struct snd_soc_dai *cpu_dai); +extern struct clk *s3c64xx_i2s_get_clock(struct snd_soc_dai *dai); #endif /* __SND_SOC_S3C24XX_S3C64XX_I2S_H */ -- cgit v0.10.2 From 553b1dd58c5cf1abd6d0965041169400a3cff1ad Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 29 Apr 2009 20:29:25 +0100 Subject: ASoC: Fix data format configuration for S3C64xx IISv2 and add 24 bit The data format configuration for S3C64xx IISv2 is completely different to that for S3C24xx. Instead of a single bit configuration in bit 0 of IISMOD we have format selection in bits 13 and 14 and bit clock rate selection in bits 1 and 2. While we're here add support for 24 bit samples in S3C64xx. At some point it may be desirable to expose the bit clock rate selection to users but given the limited configuration options that may not be required. Signed-off-by: Mark Brown diff --git a/sound/soc/s3c24xx/s3c-i2s-v2.c b/sound/soc/s3c24xx/s3c-i2s-v2.c index 3b9201c..54f4119 100644 --- a/sound/soc/s3c24xx/s3c-i2s-v2.c +++ b/sound/soc/s3c24xx/s3c-i2s-v2.c @@ -280,7 +280,7 @@ static int s3c2412_i2s_set_fmt(struct snd_soc_dai *cpu_dai, */ #define IISMOD_MASTER_MASK (1 << 11) #define IISMOD_SLAVE (1 << 11) -#define IISMOD_MASTER (0x0) +#define IISMOD_MASTER (0 << 11) #endif switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { @@ -341,6 +341,7 @@ static int s3c2412_i2s_hw_params(struct snd_pcm_substream *substream, iismod = readl(i2s->regs + S3C2412_IISMOD); pr_debug("%s: r: IISMOD: %x\n", __func__, iismod); +#if defined(CONFIG_CPU_S3C2412) || defined(CONFIG_CPU_S3C2413) switch (params_format(params)) { case SNDRV_PCM_FORMAT_S8: iismod |= S3C2412_IISMOD_8BIT; @@ -349,6 +350,25 @@ static int s3c2412_i2s_hw_params(struct snd_pcm_substream *substream, iismod &= ~S3C2412_IISMOD_8BIT; break; } +#endif + +#ifdef CONFIG_PLAT_S3C64XX + iismod &= ~0x606; + /* Sample size */ + switch (params_format(params)) { + case SNDRV_PCM_FORMAT_S8: + /* 8 bit sample, 16fs BCLK */ + iismod |= 0x2004; + break; + case SNDRV_PCM_FORMAT_S16_LE: + /* 16 bit sample, 32fs BCLK */ + break; + case SNDRV_PCM_FORMAT_S24_LE: + /* 24 bit sample, 48fs BCLK */ + iismod |= 0x4002; + break; + } +#endif writel(iismod, i2s->regs + S3C2412_IISMOD); pr_debug("%s: w: IISMOD: %x\n", __func__, iismod); diff --git a/sound/soc/s3c24xx/s3c64xx-i2s.c b/sound/soc/s3c24xx/s3c64xx-i2s.c index cb11f78..e0f4a16 100644 --- a/sound/soc/s3c24xx/s3c64xx-i2s.c +++ b/sound/soc/s3c24xx/s3c64xx-i2s.c @@ -146,7 +146,8 @@ static int s3c64xx_i2s_probe(struct platform_device *pdev, SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_88200 | SNDRV_PCM_RATE_96000) #define S3C64XX_I2S_FMTS \ - (SNDRV_PCM_FMTBIT_S8 | SNDRV_PCM_FMTBIT_S16_LE) + (SNDRV_PCM_FMTBIT_S8 | SNDRV_PCM_FMTBIT_S16_LE |\ + SNDRV_PCM_FMTBIT_S24_LE) static struct snd_soc_dai_ops s3c64xx_i2s_dai_ops = { .set_sysclk = s3c64xx_i2s_set_sysclk, -- cgit v0.10.2 From 07736d48051869c37838635b41850618aa63b9a7 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 30 Apr 2009 13:13:14 +0100 Subject: ASoC: Fix boot warnings from S3C IISv2 On startup we try to make sure that the port is quiesced but if the port is already stopped then this will generate a warning about the RX/TX mode configuration. Configure the mode before doing the teardown to suppress these warnings. Signed-off-by: Mark Brown diff --git a/sound/soc/s3c24xx/s3c-i2s-v2.c b/sound/soc/s3c24xx/s3c-i2s-v2.c index 54f4119..34142c8 100644 --- a/sound/soc/s3c24xx/s3c-i2s-v2.c +++ b/sound/soc/s3c24xx/s3c-i2s-v2.c @@ -573,6 +573,7 @@ int s3c_i2sv2_probe(struct platform_device *pdev, unsigned long base) { struct device *dev = &pdev->dev; + unsigned int iismod; i2s->dev = dev; @@ -594,12 +595,16 @@ int s3c_i2sv2_probe(struct platform_device *pdev, clk_enable(i2s->iis_pclk); + /* Mark ourselves as in TXRX mode so we can run through our cleanup + * process without warnings. */ + iismod = readl(i2s->regs + S3C2412_IISMOD); + iismod |= S3C2412_IISMOD_MODE_TXRX; + writel(iismod, i2s->regs + S3C2412_IISMOD); s3c2412_snd_txctrl(i2s, 0); s3c2412_snd_rxctrl(i2s, 0); return 0; } - EXPORT_SYMBOL_GPL(s3c_i2sv2_probe); #ifdef CONFIG_PM -- cgit v0.10.2 From c86bde54062a4d02c1b58203b7802797e4007a8a Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 30 Apr 2009 13:09:33 +0100 Subject: ASoC: Allow use of resource from the platform device for S3C IISv2 Signed-off-by: Mark Brown diff --git a/sound/soc/s3c24xx/s3c-i2s-v2.c b/sound/soc/s3c24xx/s3c-i2s-v2.c index 34142c8..cb85498 100644 --- a/sound/soc/s3c24xx/s3c-i2s-v2.c +++ b/sound/soc/s3c24xx/s3c-i2s-v2.c @@ -580,6 +580,24 @@ int s3c_i2sv2_probe(struct platform_device *pdev, /* record our i2s structure for later use in the callbacks */ dai->private_data = i2s; + if (!base) { + struct resource *res = platform_get_resource(pdev, + IORESOURCE_MEM, + 0); + if (!res) { + dev_err(dev, "Unable to get register resource\n"); + return -ENXIO; + } + + if (!request_mem_region(res->start, resource_size(res), + "s3c64xx-i2s-v4")) { + dev_err(dev, "Unable to request register region\n"); + return -EBUSY; + } + + base = res->start; + } + i2s->regs = ioremap(base, 0x100); if (i2s->regs == NULL) { dev_err(dev, "cannot ioremap registers\n"); -- cgit v0.10.2 From af3ea7bdc77be000f69a41e7c41060f72b5a7111 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 30 Apr 2009 13:13:55 +0100 Subject: ASoC: Display the clock rate used as the basis for rate calculation Aids debugging. Signed-off-by: Mark Brown diff --git a/sound/soc/s3c24xx/s3c-i2s-v2.c b/sound/soc/s3c24xx/s3c-i2s-v2.c index cb85498..ad690b2 100644 --- a/sound/soc/s3c24xx/s3c-i2s-v2.c +++ b/sound/soc/s3c24xx/s3c-i2s-v2.c @@ -523,6 +523,8 @@ int s3c_i2sv2_iis_calc_rate(struct s3c_i2sv2_rate_calc *info, unsigned int best_rate = 0; unsigned int best_deviation = INT_MAX; + pr_debug("Input clock rate %ldHz\n", clkrate); + if (fstab == NULL) fstab = iis_fs_tab; -- cgit v0.10.2 From 38e43c81a07de8ee8a757a9c93dd3a4937dd35e0 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 30 Apr 2009 13:14:38 +0100 Subject: ASoC: Display S3C IISv2 mode and MS errors by default Signed-off-by: Mark Brown diff --git a/sound/soc/s3c24xx/s3c-i2s-v2.c b/sound/soc/s3c24xx/s3c-i2s-v2.c index ad690b2..bc4e504 100644 --- a/sound/soc/s3c24xx/s3c-i2s-v2.c +++ b/sound/soc/s3c24xx/s3c-i2s-v2.c @@ -295,7 +295,7 @@ static int s3c2412_i2s_set_fmt(struct snd_soc_dai *cpu_dai, iismod |= IISMOD_MASTER; break; default: - pr_debug("unknwon master/slave format\n"); + pr_err("unknwon master/slave format\n"); return -EINVAL; } @@ -312,7 +312,7 @@ static int s3c2412_i2s_set_fmt(struct snd_soc_dai *cpu_dai, iismod |= S3C2412_IISMOD_SDF_IIS; break; default: - pr_debug("Unknown data format\n"); + pr_err("Unknown data format\n"); return -EINVAL; } -- cgit v0.10.2 From abbc82466967064e4eaafa367fc225a8c803569c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 30 Apr 2009 13:21:52 +0100 Subject: ASoC: Staticise txctrl and rxctrl for S3C IISv2 They aren't used by anything external and aren't prototyped; if any users appear they can be exported again for them. Also report what modes we have a problem with when we encounter invalid mode configurations. Signed-off-by: Mark Brown diff --git a/sound/soc/s3c24xx/s3c-i2s-v2.c b/sound/soc/s3c24xx/s3c-i2s-v2.c index bc4e504..972c276 100644 --- a/sound/soc/s3c24xx/s3c-i2s-v2.c +++ b/sound/soc/s3c24xx/s3c-i2s-v2.c @@ -89,7 +89,7 @@ static inline void dbg_showcon(const char *fn, u32 con) /* Turn on or off the transmission path. */ -void s3c2412_snd_txctrl(struct s3c_i2sv2_info *i2s, int on) +static void s3c2412_snd_txctrl(struct s3c_i2sv2_info *i2s, int on) { void __iomem *regs = i2s->regs; u32 fic, con, mod; @@ -119,7 +119,9 @@ void s3c2412_snd_txctrl(struct s3c_i2sv2_info *i2s, int on) break; default: - dev_err(i2s->dev, "TXEN: Invalid MODE in IISMOD\n"); + dev_err(i2s->dev, "TXEN: Invalid MODE %x in IISMOD\n", + mod & S3C2412_IISMOD_MODE_MASK); + break; } writel(con, regs + S3C2412_IISCON); @@ -146,7 +148,9 @@ void s3c2412_snd_txctrl(struct s3c_i2sv2_info *i2s, int on) break; default: - dev_err(i2s->dev, "TXDIS: Invalid MODE in IISMOD\n"); + dev_err(i2s->dev, "TXDIS: Invalid MODE %x in IISMOD\n", + mod & S3C2412_IISMOD_MODE_MASK); + break; } writel(mod, regs + S3C2412_IISMOD); @@ -157,9 +161,8 @@ void s3c2412_snd_txctrl(struct s3c_i2sv2_info *i2s, int on) dbg_showcon(__func__, con); pr_debug("%s: IIS: CON=%x MOD=%x FIC=%x\n", __func__, con, mod, fic); } -EXPORT_SYMBOL_GPL(s3c2412_snd_txctrl); -void s3c2412_snd_rxctrl(struct s3c_i2sv2_info *i2s, int on) +static void s3c2412_snd_rxctrl(struct s3c_i2sv2_info *i2s, int on) { void __iomem *regs = i2s->regs; u32 fic, con, mod; @@ -189,7 +192,8 @@ void s3c2412_snd_rxctrl(struct s3c_i2sv2_info *i2s, int on) break; default: - dev_err(i2s->dev, "RXEN: Invalid MODE in IISMOD\n"); + dev_err(i2s->dev, "RXEN: Invalid MODE %x in IISMOD\n", + mod & S3C2412_IISMOD_MODE_MASK); } writel(mod, regs + S3C2412_IISMOD); @@ -213,7 +217,8 @@ void s3c2412_snd_rxctrl(struct s3c_i2sv2_info *i2s, int on) break; default: - dev_err(i2s->dev, "RXEN: Invalid MODE in IISMOD\n"); + dev_err(i2s->dev, "RXDIS: Invalid MODE %x in IISMOD\n", + mod & S3C2412_IISMOD_MODE_MASK); } writel(con, regs + S3C2412_IISCON); @@ -223,7 +228,6 @@ void s3c2412_snd_rxctrl(struct s3c_i2sv2_info *i2s, int on) fic = readl(regs + S3C2412_IISFIC); pr_debug("%s: IIS: CON=%x MOD=%x FIC=%x\n", __func__, con, mod, fic); } -EXPORT_SYMBOL_GPL(s3c2412_snd_rxctrl); /* * Wait for the LR signal to allow synchronisation to the L/R clock -- cgit v0.10.2 From 71437552f2564c0d0c5cc4995045683051c5fe62 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 30 Apr 2009 13:42:04 +0100 Subject: ASoC: Use platform device resource for S3C64xx IISv2 Signed-off-by: Mark Brown diff --git a/sound/soc/s3c24xx/s3c64xx-i2s.c b/sound/soc/s3c24xx/s3c64xx-i2s.c index e0f4a16..3c06c40 100644 --- a/sound/soc/s3c24xx/s3c64xx-i2s.c +++ b/sound/soc/s3c24xx/s3c64xx-i2s.c @@ -220,8 +220,7 @@ static __devinit int s3c64xx_iis_dev_probe(struct platform_device *pdev) goto err; } - ret = s3c_i2sv2_probe(pdev, dai, i2s, - dai->id ? S3C64XX_PA_IIS1 : S3C64XX_PA_IIS0); + ret = s3c_i2sv2_probe(pdev, dai, i2s, 0); if (ret) goto err_clk; -- cgit v0.10.2 From 03682411b1ccd38cbde2e9a6ab43884ff34fbefc Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Thu, 30 Apr 2009 18:38:01 +0200 Subject: alim15x3: Remove historical hacks, re-enable init_hwif for PowerPC Some time ago we had to disable init_hwif callback for PowerPC builds. That was because of a historical IRQ overwrite in the driver, which was causing IDE malfunction on the MPC8610HPCD PowerPC boards. It's unclear whether this overwrite is still useful, but it is proven to cause a bit of harm, and today some PowerPC targets (Xilinx ML510, as reported by Roderick Colenbrander) need the init_hwif, so we have to re-enable it and remove the overwrite. Reported-by: Roderick Colenbrander Suggested-by: Bartlomiej Zolnierkiewicz Cc: Benjamin Herrenschmidt Signed-off-by: Anton Vorontsov Signed-off-by: Bartlomiej Zolnierkiewicz diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c index 537da1c..e59b6de 100644 --- a/drivers/ide/alim15x3.c +++ b/drivers/ide/alim15x3.c @@ -402,27 +402,23 @@ static u8 ali_cable_detect(ide_hwif_t *hwif) return cbl; } -#if !defined(CONFIG_SPARC64) && !defined(CONFIG_PPC) +#ifndef CONFIG_SPARC64 /** * init_hwif_ali15x3 - Initialize the ALI IDE x86 stuff * @hwif: interface to configure * * Obtain the IRQ tables for an ALi based IDE solution on the PC * class platforms. This part of the code isn't applicable to the - * Sparc and PowerPC systems. + * Sparc systems. */ static void __devinit init_hwif_ali15x3 (ide_hwif_t *hwif) { - struct pci_dev *dev = to_pci_dev(hwif->dev); u8 ideic, inmir; s8 irq_routing_table[] = { -1, 9, 3, 10, 4, 5, 7, 6, 1, 11, 0, 12, 0, 14, 0, 15 }; int irq = -1; - if (dev->device == PCI_DEVICE_ID_AL_M5229) - hwif->irq = hwif->channel ? 15 : 14; - if (isa_dev) { /* * read IDE interface control @@ -455,7 +451,7 @@ static void __devinit init_hwif_ali15x3 (ide_hwif_t *hwif) } #else #define init_hwif_ali15x3 NULL -#endif /* !defined(CONFIG_SPARC64) && !defined(CONFIG_PPC) */ +#endif /* CONFIG_SPARC64 */ /** * init_dma_ali15x3 - set up DMA on ALi15x3 -- cgit v0.10.2 From 30b4ae8a4498543863501f707879b7220b649602 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 4 Apr 2009 21:01:01 +0000 Subject: signals: split do_tkill Split out the code from do_tkill to make it reusable by the follow up patch which implements sys_rt_tgsigqueueinfo Signed-off-by: Thomas Gleixner Reviewed-by: Oleg Nesterov diff --git a/kernel/signal.c b/kernel/signal.c index d803473..56d27ac 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2278,24 +2278,17 @@ SYSCALL_DEFINE2(kill, pid_t, pid, int, sig) return kill_something_info(sig, &info, pid); } -static int do_tkill(pid_t tgid, pid_t pid, int sig) +static int +do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info) { - int error; - struct siginfo info; struct task_struct *p; unsigned long flags; - - error = -ESRCH; - info.si_signo = sig; - info.si_errno = 0; - info.si_code = SI_TKILL; - info.si_pid = task_tgid_vnr(current); - info.si_uid = current_uid(); + int error = -ESRCH; rcu_read_lock(); p = find_task_by_vpid(pid); if (p && (tgid <= 0 || task_tgid_vnr(p) == tgid)) { - error = check_kill_permission(sig, &info, p); + error = check_kill_permission(sig, info, p); /* * The null signal is a permissions and process existence * probe. No signal is actually delivered. @@ -2305,7 +2298,7 @@ static int do_tkill(pid_t tgid, pid_t pid, int sig) * signal is private anyway. */ if (!error && sig && lock_task_sighand(p, &flags)) { - error = specific_send_sig_info(sig, &info, p); + error = specific_send_sig_info(sig, info, p); unlock_task_sighand(p, &flags); } } @@ -2314,6 +2307,19 @@ static int do_tkill(pid_t tgid, pid_t pid, int sig) return error; } +static int do_tkill(pid_t tgid, pid_t pid, int sig) +{ + struct siginfo info; + + info.si_signo = sig; + info.si_errno = 0; + info.si_code = SI_TKILL; + info.si_pid = task_tgid_vnr(current); + info.si_uid = current_uid(); + + return do_send_specific(tgid, pid, sig, &info); +} + /** * sys_tgkill - send signal to one specific thread * @tgid: the thread group ID of the thread -- cgit v0.10.2 From 62ab4505e3efaf67784f84059e0fb9cedb1728ea Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 4 Apr 2009 21:01:06 +0000 Subject: signals: implement sys_rt_tgsigqueueinfo sys_kill has the per thread counterpart sys_tgkill. sigqueueinfo is missing a thread directed counterpart. Such an interface is important for migrating applications from other OSes which have the per thread delivery implemented. Signed-off-by: Thomas Gleixner Reviewed-by: Oleg Nesterov Acked-by: Roland McGrath Acked-by: Ulrich Drepper diff --git a/include/linux/compat.h b/include/linux/compat.h index f2ded21..af931ee 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -222,6 +222,8 @@ int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from); int copy_siginfo_to_user32(struct compat_siginfo __user *to, siginfo_t *from); int get_compat_sigevent(struct sigevent *event, const struct compat_sigevent __user *u_event); +long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig, + struct compat_siginfo __user *uinfo); static inline int compat_timeval_compare(struct compat_timeval *lhs, struct compat_timeval *rhs) diff --git a/include/linux/signal.h b/include/linux/signal.h index 84f997f..c755283 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -235,6 +235,8 @@ static inline int valid_signal(unsigned long sig) extern int next_signal(struct sigpending *pending, sigset_t *mask); extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p); extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *); +extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, + siginfo_t *info); extern long do_sigpending(void __user *, unsigned long); extern int sigprocmask(int, sigset_t *, sigset_t *); extern int show_unhandled_signals; diff --git a/kernel/compat.c b/kernel/compat.c index 42d5654..f6c204f 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -882,6 +882,17 @@ compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese, } +asmlinkage long +compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig, + struct compat_siginfo __user *uinfo) +{ + siginfo_t info; + + if (copy_siginfo_from_user32(&info, uinfo)) + return -EFAULT; + return do_rt_tgsigqueueinfo(tgid, pid, sig, &info); +} + #ifdef __ARCH_WANT_COMPAT_SYS_TIME /* compat_time_t is a 32 bit "long" and needs to get converted. */ diff --git a/kernel/signal.c b/kernel/signal.c index 56d27ac..f79b3b9 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2369,6 +2369,32 @@ SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig, return kill_proc_info(sig, &info, pid); } +long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info) +{ + /* This is only valid for single tasks */ + if (pid <= 0 || tgid <= 0) + return -EINVAL; + + /* Not even root can pretend to send signals from the kernel. + Nor can they impersonate a kill(), which adds source info. */ + if (info->si_code >= 0) + return -EPERM; + info->si_signo = sig; + + return do_send_specific(tgid, pid, sig, info); +} + +SYSCALL_DEFINE4(rt_tgsigqueueinfo, pid_t, tgid, pid_t, pid, int, sig, + siginfo_t __user *, uinfo) +{ + siginfo_t info; + + if (copy_from_user(&info, uinfo, sizeof(siginfo_t))) + return -EFAULT; + + return do_rt_tgsigqueueinfo(tgid, pid, sig, &info); +} + int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact) { struct task_struct *t = current; -- cgit v0.10.2 From 12d161147f828192b5bcc33166f468a827832767 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 4 Apr 2009 21:01:10 +0000 Subject: x86: hookup sys_rt_tgsigqueueinfo Make the new sys_rt_tgsigqueueinfo available for x86. Signed-off-by: Thomas Gleixner diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index a505202..dcef387 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -830,4 +830,5 @@ ia32_sys_call_table: .quad sys_inotify_init1 .quad compat_sys_preadv .quad compat_sys_pwritev + .quad compat_sys_rt_tgsigqueueinfo /* 335 */ ia32_syscall_end: diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index 6e72d74..708dae6 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h @@ -340,6 +340,7 @@ #define __NR_inotify_init1 332 #define __NR_preadv 333 #define __NR_pwritev 334 +#define __NR_rt_tgsigqueueinfo 335 #ifdef __KERNEL__ diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index f818294..4e2b054 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h @@ -657,6 +657,8 @@ __SYSCALL(__NR_inotify_init1, sys_inotify_init1) __SYSCALL(__NR_preadv, sys_preadv) #define __NR_pwritev 296 __SYSCALL(__NR_pwritev, sys_pwritev) +#define __NR_rt_tgsigqueueinfo 297 +__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) #ifndef __NO_STUBS diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index ff5c873..734f92c 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -334,3 +334,4 @@ ENTRY(sys_call_table) .long sys_inotify_init1 .long sys_preadv .long sys_pwritev + .long sys_rt_tgsigqueueinfo /* 335 */ -- cgit v0.10.2 From bf293c17b26b8854241df08b9b63f7270cbde012 Mon Sep 17 00:00:00 2001 From: Remis Lima Baima Date: Thu, 30 Apr 2009 18:36:23 +0200 Subject: x86: added 'ifndef _ASM_X86_IOMAP_H' to iomap.h iomap.h misses the include guards. [ Impact: cleanup ] Signed-off-by: Remis Lima Baima Signed-off-by: Arnd Bergmann LKML-Reference: <200904301836.23885.arnd@arndb.de> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h index 86af260..0e9fe1d 100644 --- a/arch/x86/include/asm/iomap.h +++ b/arch/x86/include/asm/iomap.h @@ -1,3 +1,6 @@ +#ifndef _ASM_X86_IOMAP_H +#define _ASM_X86_IOMAP_H + /* * Copyright © 2008 Ingo Molnar * @@ -31,3 +34,5 @@ iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); void iounmap_atomic(void *kvaddr, enum km_type type); + +#endif /* _ASM_X86_IOMAP_H */ -- cgit v0.10.2 From e74cc06df3b05e2b2c1611a043f6e6dcadaab1eb Mon Sep 17 00:00:00 2001 From: Louis Rilling Date: Wed, 28 Jan 2009 19:18:32 +0100 Subject: configfs: Silence lockdep on mkdir() and rmdir() When attaching default groups (subdirs) of a new group (in mkdir() or in configfs_register()), configfs recursively takes inode's mutexes along the path from the parent of the new group to the default subdirs. This is needed to ensure that the VFS will not race with operations on these sub-dirs. This is safe for the following reasons: - the VFS allows one to lock first an inode and second one of its children (The lock subclasses for this pattern are respectively I_MUTEX_PARENT and I_MUTEX_CHILD); - from this rule any inode path can be recursively locked in descending order as long as it stays under a single mountpoint and does not follow symlinks. Unfortunately lockdep does not know (yet?) how to handle such recursion. I've tried to use Peter Zijlstra's lock_set_subclass() helper to upgrade i_mutexes from I_MUTEX_CHILD to I_MUTEX_PARENT when we know that we might recursively lock some of their descendant, but this usage does not seem to fit the purpose of lock_set_subclass() because it leads to several i_mutex locked with subclass I_MUTEX_PARENT by the same task. >From inside configfs it is not possible to serialize those recursive locking with a top-level one, because mkdir() and rmdir() are already called with inodes locked by the VFS. So using some mutex_lock_nest_lock() is not an option. I am proposing two solutions: 1) one that wraps recursive mutex_lock()s with lockdep_off()/lockdep_on(). 2) (as suggested earlier by Peter Zijlstra) one that puts the i_mutexes recursively locked in different classes based on their depth from the top-level config_group created. This induces an arbitrary limit (MAX_LOCK_DEPTH - 2 == 46) on the nesting of configfs default groups whenever lockdep is activated but this limit looks reasonably high. Unfortunately, this also isolates VFS operations on configfs default groups from the others and thus lowers the chances to detect locking issues. Nobody likes solution 1), which I can understand. This patch implements solution 2). However lockdep is still not happy with configfs_depend_item(). Next patch reworks the locking of configfs_depend_item() and finally makes lockdep happy. [ Note: This hides a few locking interactions with the VFS from lockdep. That was my big concern, because we like lockdep's protection. However, the current state always dumps a spurious warning. The locking is correct, so I tell people to ignore the warning and that we'll keep our eyes on the locking to make sure it stays correct. With this patch, we eliminate the warning. We do lose some of the lockdep protections, but this only means that we still have to keep our eyes on the locking. We're going to do that anyway. -- Joel ] Signed-off-by: Louis Rilling Signed-off-by: Joel Becker diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h index 762d287..da6061a 100644 --- a/fs/configfs/configfs_internal.h +++ b/fs/configfs/configfs_internal.h @@ -39,6 +39,9 @@ struct configfs_dirent { umode_t s_mode; struct dentry * s_dentry; struct iattr * s_iattr; +#ifdef CONFIG_LOCKDEP + int s_depth; +#endif }; #define CONFIGFS_ROOT 0x0001 diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 05373db..d4d871f 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -78,6 +78,92 @@ static const struct dentry_operations configfs_dentry_ops = { .d_delete = configfs_d_delete, }; +#ifdef CONFIG_LOCKDEP + +/* + * Helpers to make lockdep happy with our recursive locking of default groups' + * inodes (see configfs_attach_group() and configfs_detach_group()). + * We put default groups i_mutexes in separate classes according to their depth + * from the youngest non-default group ancestor. + * + * For a non-default group A having default groups A/B, A/C, and A/C/D, default + * groups A/B and A/C will have their inode's mutex in class + * default_group_class[0], and default group A/C/D will be in + * default_group_class[1]. + * + * The lock classes are declared and assigned in inode.c, according to the + * s_depth value. + * The s_depth value is initialized to -1, adjusted to >= 0 when attaching + * default groups, and reset to -1 when all default groups are attached. During + * attachment, if configfs_create() sees s_depth > 0, the lock class of the new + * inode's mutex is set to default_group_class[s_depth - 1]. + */ + +static void configfs_init_dirent_depth(struct configfs_dirent *sd) +{ + sd->s_depth = -1; +} + +static void configfs_set_dir_dirent_depth(struct configfs_dirent *parent_sd, + struct configfs_dirent *sd) +{ + int parent_depth = parent_sd->s_depth; + + if (parent_depth >= 0) + sd->s_depth = parent_depth + 1; +} + +static void +configfs_adjust_dir_dirent_depth_before_populate(struct configfs_dirent *sd) +{ + /* + * item's i_mutex class is already setup, so s_depth is now only + * used to set new sub-directories s_depth, which is always done + * with item's i_mutex locked. + */ + /* + * sd->s_depth == -1 iff we are a non default group. + * else (we are a default group) sd->s_depth > 0 (see + * create_dir()). + */ + if (sd->s_depth == -1) + /* + * We are a non default group and we are going to create + * default groups. + */ + sd->s_depth = 0; +} + +static void +configfs_adjust_dir_dirent_depth_after_populate(struct configfs_dirent *sd) +{ + /* We will not create default groups anymore. */ + sd->s_depth = -1; +} + +#else /* CONFIG_LOCKDEP */ + +static void configfs_init_dirent_depth(struct configfs_dirent *sd) +{ +} + +static void configfs_set_dir_dirent_depth(struct configfs_dirent *parent_sd, + struct configfs_dirent *sd) +{ +} + +static void +configfs_adjust_dir_dirent_depth_before_populate(struct configfs_dirent *sd) +{ +} + +static void +configfs_adjust_dir_dirent_depth_after_populate(struct configfs_dirent *sd) +{ +} + +#endif /* CONFIG_LOCKDEP */ + /* * Allocates a new configfs_dirent and links it to the parent configfs_dirent */ @@ -94,6 +180,7 @@ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * pare INIT_LIST_HEAD(&sd->s_links); INIT_LIST_HEAD(&sd->s_children); sd->s_element = element; + configfs_init_dirent_depth(sd); spin_lock(&configfs_dirent_lock); if (parent_sd->s_type & CONFIGFS_USET_DROPPING) { spin_unlock(&configfs_dirent_lock); @@ -187,6 +274,7 @@ static int create_dir(struct config_item * k, struct dentry * p, error = configfs_make_dirent(p->d_fsdata, d, k, mode, CONFIGFS_DIR | CONFIGFS_USET_CREATING); if (!error) { + configfs_set_dir_dirent_depth(p->d_fsdata, d->d_fsdata); error = configfs_create(d, mode, init_dir); if (!error) { inc_nlink(p->d_inode); @@ -789,11 +877,13 @@ static int configfs_attach_group(struct config_item *parent_item, * error, as rmdir() would. */ mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); + configfs_adjust_dir_dirent_depth_before_populate(sd); ret = populate_groups(to_config_group(item)); if (ret) { configfs_detach_item(item); dentry->d_inode->i_flags |= S_DEAD; } + configfs_adjust_dir_dirent_depth_after_populate(sd); mutex_unlock(&dentry->d_inode->i_mutex); if (ret) d_delete(dentry); diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 5d349d3..4921e742 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -33,10 +33,15 @@ #include #include #include +#include #include #include "configfs_internal.h" +#ifdef CONFIG_LOCKDEP +static struct lock_class_key default_group_class[MAX_LOCK_DEPTH]; +#endif + extern struct super_block * configfs_sb; static const struct address_space_operations configfs_aops = { @@ -150,6 +155,38 @@ struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent * sd) return inode; } +#ifdef CONFIG_LOCKDEP + +static void configfs_set_inode_lock_class(struct configfs_dirent *sd, + struct inode *inode) +{ + int depth = sd->s_depth; + + if (depth > 0) { + if (depth <= ARRAY_SIZE(default_group_class)) { + lockdep_set_class(&inode->i_mutex, + &default_group_class[depth - 1]); + } else { + /* + * In practice the maximum level of locking depth is + * already reached. Just inform about possible reasons. + */ + printk(KERN_INFO "configfs: Too many levels of inodes" + " for the locking correctness validator.\n"); + printk(KERN_INFO "Spurious warnings may appear.\n"); + } + } +} + +#else /* CONFIG_LOCKDEP */ + +static void configfs_set_inode_lock_class(struct configfs_dirent *sd, + struct inode *inode) +{ +} + +#endif /* CONFIG_LOCKDEP */ + int configfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *)) { int error = 0; @@ -162,6 +199,7 @@ int configfs_create(struct dentry * dentry, int mode, int (*init)(struct inode * struct inode *p_inode = dentry->d_parent->d_inode; p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME; } + configfs_set_inode_lock_class(sd, inode); goto Proceed; } else -- cgit v0.10.2 From 420118caa32c8ccdf9fce5a623b9de3f951573c5 Mon Sep 17 00:00:00 2001 From: Louis Rilling Date: Wed, 28 Jan 2009 19:18:33 +0100 Subject: configfs: Rework configfs_depend_item() locking and make lockdep happy configfs_depend_item() recursively locks all inodes mutex from configfs root to the target item, which makes lockdep unhappy. The purpose of this recursive locking is to ensure that the item tree can be safely parsed and that the target item, if found, is not about to leave. This patch reworks configfs_depend_item() locking using configfs_dirent_lock. Since configfs_dirent_lock protects all changes to the configfs_dirent tree, and protects tagging of items to be removed, this lock can be used instead of the inodes mutex lock chain. This needs that the check for dependents be done atomically with CONFIGFS_USET_DROPPING tagging. Now lockdep looks happy with configfs. [ Lifted the setting of s_type into configfs_new_dirent() to satisfy the atomic setting of CONFIGFS_USET_CREATING -- Joel ] Signed-off-by: Louis Rilling Signed-off-by: Joel Becker diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index d4d871f..8e48b52 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -167,8 +167,8 @@ configfs_adjust_dir_dirent_depth_after_populate(struct configfs_dirent *sd) /* * Allocates a new configfs_dirent and links it to the parent configfs_dirent */ -static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * parent_sd, - void * element) +static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent *parent_sd, + void *element, int type) { struct configfs_dirent * sd; @@ -180,6 +180,7 @@ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * pare INIT_LIST_HEAD(&sd->s_links); INIT_LIST_HEAD(&sd->s_children); sd->s_element = element; + sd->s_type = type; configfs_init_dirent_depth(sd); spin_lock(&configfs_dirent_lock); if (parent_sd->s_type & CONFIGFS_USET_DROPPING) { @@ -225,12 +226,11 @@ int configfs_make_dirent(struct configfs_dirent * parent_sd, { struct configfs_dirent * sd; - sd = configfs_new_dirent(parent_sd, element); + sd = configfs_new_dirent(parent_sd, element, type); if (IS_ERR(sd)) return PTR_ERR(sd); sd->s_mode = mode; - sd->s_type = type; sd->s_dentry = dentry; if (dentry) { dentry->d_fsdata = configfs_get(sd); @@ -1006,11 +1006,11 @@ static int configfs_dump(struct configfs_dirent *sd, int level) * Note, btw, that this can be called at *any* time, even when a configfs * subsystem isn't registered, or when configfs is loading or unloading. * Just like configfs_register_subsystem(). So we take the same - * precautions. We pin the filesystem. We lock each i_mutex _in_order_ - * on our way down the tree. If we can find the target item in the + * precautions. We pin the filesystem. We lock configfs_dirent_lock. + * If we can find the target item in the * configfs tree, it must be part of the subsystem tree as well, so we - * do not need the subsystem semaphore. Holding the i_mutex chain locks - * out mkdir() and rmdir(), who might be racing us. + * do not need the subsystem semaphore. Holding configfs_dirent_lock helps + * locking out mkdir() and rmdir(), who might be racing us. */ /* @@ -1023,17 +1023,21 @@ static int configfs_dump(struct configfs_dirent *sd, int level) * do that so we can unlock it if we find nothing. * * Here we do a depth-first search of the dentry hierarchy looking for - * our object. We take i_mutex on each step of the way down. IT IS - * ESSENTIAL THAT i_mutex LOCKING IS ORDERED. If we come back up a branch, - * we'll drop the i_mutex. + * our object. + * We deliberately ignore items tagged as dropping since they are virtually + * dead, as well as items in the middle of attachment since they virtually + * do not exist yet. This completes the locking out of racing mkdir() and + * rmdir(). + * Note: subdirectories in the middle of attachment start with s_type = + * CONFIGFS_DIR|CONFIGFS_USET_CREATING set by create_dir(). When + * CONFIGFS_USET_CREATING is set, we ignore the item. The actual set of + * s_type is in configfs_new_dirent(), which has configfs_dirent_lock. * - * If the target is not found, -ENOENT is bubbled up and we have released - * all locks. If the target was found, the locks will be cleared by - * configfs_depend_rollback(). + * If the target is not found, -ENOENT is bubbled up. * * This adds a requirement that all config_items be unique! * - * This is recursive because the locking traversal is tricky. There isn't + * This is recursive. There isn't * much on the stack, though, so folks that need this function - be careful * about your stack! Patches will be accepted to make it iterative. */ @@ -1045,13 +1049,13 @@ static int configfs_depend_prep(struct dentry *origin, BUG_ON(!origin || !sd); - /* Lock this guy on the way down */ - mutex_lock(&sd->s_dentry->d_inode->i_mutex); if (sd->s_element == target) /* Boo-yah */ goto out; list_for_each_entry(child_sd, &sd->s_children, s_sibling) { - if (child_sd->s_type & CONFIGFS_DIR) { + if ((child_sd->s_type & CONFIGFS_DIR) && + !(child_sd->s_type & CONFIGFS_USET_DROPPING) && + !(child_sd->s_type & CONFIGFS_USET_CREATING)) { ret = configfs_depend_prep(child_sd->s_dentry, target); if (!ret) @@ -1060,33 +1064,12 @@ static int configfs_depend_prep(struct dentry *origin, } /* We looped all our children and didn't find target */ - mutex_unlock(&sd->s_dentry->d_inode->i_mutex); ret = -ENOENT; out: return ret; } -/* - * This is ONLY called if configfs_depend_prep() did its job. So we can - * trust the entire path from item back up to origin. - * - * We walk backwards from item, unlocking each i_mutex. We finish by - * unlocking origin. - */ -static void configfs_depend_rollback(struct dentry *origin, - struct config_item *item) -{ - struct dentry *dentry = item->ci_dentry; - - while (dentry != origin) { - mutex_unlock(&dentry->d_inode->i_mutex); - dentry = dentry->d_parent; - } - - mutex_unlock(&origin->d_inode->i_mutex); -} - int configfs_depend_item(struct configfs_subsystem *subsys, struct config_item *target) { @@ -1127,17 +1110,21 @@ int configfs_depend_item(struct configfs_subsystem *subsys, /* Ok, now we can trust subsys/s_item */ - /* Scan the tree, locking i_mutex recursively, return 0 if found */ + spin_lock(&configfs_dirent_lock); + /* Scan the tree, return 0 if found */ ret = configfs_depend_prep(subsys_sd->s_dentry, target); if (ret) - goto out_unlock_fs; + goto out_unlock_dirent_lock; - /* We hold all i_mutexes from the subsystem down to the target */ + /* + * We are sure that the item is not about to be removed by rmdir(), and + * not in the middle of attachment by mkdir(). + */ p = target->ci_dentry->d_fsdata; p->s_dependent_count += 1; - configfs_depend_rollback(subsys_sd->s_dentry, target); - +out_unlock_dirent_lock: + spin_unlock(&configfs_dirent_lock); out_unlock_fs: mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex); @@ -1162,10 +1149,10 @@ void configfs_undepend_item(struct configfs_subsystem *subsys, struct configfs_dirent *sd; /* - * Since we can trust everything is pinned, we just need i_mutex - * on the item. + * Since we can trust everything is pinned, we just need + * configfs_dirent_lock. */ - mutex_lock(&target->ci_dentry->d_inode->i_mutex); + spin_lock(&configfs_dirent_lock); sd = target->ci_dentry->d_fsdata; BUG_ON(sd->s_dependent_count < 1); @@ -1176,7 +1163,7 @@ void configfs_undepend_item(struct configfs_subsystem *subsys, * After this unlock, we cannot trust the item to stay alive! * DO NOT REFERENCE item after this unlock. */ - mutex_unlock(&target->ci_dentry->d_inode->i_mutex); + spin_unlock(&configfs_dirent_lock); } EXPORT_SYMBOL(configfs_undepend_item); @@ -1376,13 +1363,6 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) if (sd->s_type & CONFIGFS_USET_DEFAULT) return -EPERM; - /* - * Here's where we check for dependents. We're protected by - * i_mutex. - */ - if (sd->s_dependent_count) - return -EBUSY; - /* Get a working ref until we have the child */ parent_item = configfs_get_config_item(dentry->d_parent); subsys = to_config_group(parent_item)->cg_subsys; @@ -1406,9 +1386,17 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) mutex_lock(&configfs_symlink_mutex); spin_lock(&configfs_dirent_lock); - ret = configfs_detach_prep(dentry, &wait_mutex); - if (ret) - configfs_detach_rollback(dentry); + /* + * Here's where we check for dependents. We're protected by + * configfs_dirent_lock. + * If no dependent, atomically tag the item as dropping. + */ + ret = sd->s_dependent_count ? -EBUSY : 0; + if (!ret) { + ret = configfs_detach_prep(dentry, &wait_mutex); + if (ret) + configfs_detach_rollback(dentry); + } spin_unlock(&configfs_dirent_lock); mutex_unlock(&configfs_symlink_mutex); @@ -1519,7 +1507,7 @@ static int configfs_dir_open(struct inode *inode, struct file *file) */ err = -ENOENT; if (configfs_dirent_is_ready(parent_sd)) { - file->private_data = configfs_new_dirent(parent_sd, NULL); + file->private_data = configfs_new_dirent(parent_sd, NULL, 0); if (IS_ERR(file->private_data)) err = PTR_ERR(file->private_data); else -- cgit v0.10.2 From 78a3d9d5654a7fd99cf8b2ab06b9497b9c7aad64 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 29 Apr 2009 18:01:23 +0200 Subject: do_wait: do take security_task_wait() into account I was never able to understand what should we actually do when security_task_wait() fails, but the current code doesn't look right. If ->task_wait() returns the error, we update *notask_error correctly. But then we either reap the child (despite the fact this was forbidden) or clear *notask_error (and hide the securiy policy problems). This patch assumes that "stolen by ptrace" doesn't matter. If selinux denies the child we should ignore it but make sure we report -EACCESS instead of -ECHLD if there are no other eligible children. Signed-off-by: Oleg Nesterov Acked-by: Roland McGrath Signed-off-by: James Morris diff --git a/kernel/exit.c b/kernel/exit.c index 167e1e3..d2e8239 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1582,6 +1582,7 @@ static int wait_consider_task(struct task_struct *parent, int ptrace, */ if (*notask_error) *notask_error = ret; + return 0; } if (likely(!ptrace) && unlikely(p->ptrace)) { -- cgit v0.10.2 From d6662c351a827264706bf880a36cdd376808ba1c Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Fri, 12 Dec 2008 00:24:40 +0000 Subject: [ARM] SMDK6410: Ensure LCD settings are setup Ensure that the LCD output type is RGB and that the modem interface is not bypassing the LCD block. This ensures the LCD interface output gets to the pins in the correct format. Signed-off-by: Ben Dooks diff --git a/arch/arm/mach-s3c6410/mach-smdk6410.c b/arch/arm/mach-s3c6410/mach-smdk6410.c index 7f473e4..d0b9f09 100644 --- a/arch/arm/mach-s3c6410/mach-smdk6410.c +++ b/arch/arm/mach-s3c6410/mach-smdk6410.c @@ -39,6 +39,9 @@ #include #include +#include +#include +#include #include #include @@ -155,9 +158,23 @@ static struct i2c_board_info i2c_devs1[] __initdata = { static void __init smdk6410_map_io(void) { + u32 tmp; + s3c64xx_init_io(smdk6410_iodesc, ARRAY_SIZE(smdk6410_iodesc)); s3c24xx_init_clocks(12000000); s3c24xx_init_uarts(smdk6410_uartcfgs, ARRAY_SIZE(smdk6410_uartcfgs)); + + /* set the LCD type */ + + tmp = __raw_readl(S3C64XX_SPCON); + tmp &= ~S3C64XX_SPCON_LCD_SEL_MASK; + tmp |= S3C64XX_SPCON_LCD_SEL_RGB; + __raw_writel(tmp, S3C64XX_SPCON); + + /* remove the lcd bypass */ + tmp = __raw_readl(S3C64XX_MODEM_MIFPCON); + tmp &= ~MIFPCON_LCD_BYPASS; + __raw_writel(tmp, S3C64XX_MODEM_MIFPCON); } static void __init smdk6410_machine_init(void) -- cgit v0.10.2 From 3056ea0afba83d19af5f1f3daf6ed7211f0717da Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 27 Jan 2009 16:18:01 +0000 Subject: [ARM] SMDK6410: Add support for SMSC9115 ethernet controller Signed-off-by: Mark Brown Signed-off-by: Ben Dooks diff --git a/arch/arm/mach-s3c6410/mach-smdk6410.c b/arch/arm/mach-s3c6410/mach-smdk6410.c index d0b9f09..dfd84d0 100644 --- a/arch/arm/mach-s3c6410/mach-smdk6410.c +++ b/arch/arm/mach-s3c6410/mach-smdk6410.c @@ -24,6 +24,7 @@ #include #include #include +#include #include